Files
multi-agent-mux/.agents/skills/lib.sh
T

821 lines
30 KiB
Bash

#!/usr/bin/env bash
# lib.sh — shared library for the multi-agent-mux-* skills.
#
# Single source of truth for the four things that were inconsistently
# re-implemented across create/resume/delete/monitor (REVIEW.md §4.1):
# - derive_session_name : the tmux session slug (P0-A)
# - atomic_dump_yaml : SQLite db transaction + temp+rename + .bak + validate (P0-B)
# - env_python : env-safe Python (no heredoc injection) (P0-B / P1-B)
# - find_workspace_uuid : workspace-SCOPED resume id lookup (P0-C)
#
# Source it from each script with a path computed from the script location:
# source "$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)/lib.sh"
#
# HARD RULE: the agent-sessions.yaml file is only ever written through
# atomic_dump_yaml. Never `open(yaml_path, 'w')` anywhere else.
SKILL_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
WORKSPACE_ROOT="$(cd "$SKILL_DIR/../.." && pwd)"
AGENT_SESSIONS_YAML="${AGENT_SESSIONS_YAML:-$WORKSPACE_ROOT/.mam/agent-sessions.yaml}"
# Workspace-relative defaults with environment overrides (Phase Z)
HOME_DIR="${HOME_DIR:-$HOME}"
CLAUDE_PROJECT_DIR="${CLAUDE_PROJECT_DIR:-$HOME/.claude/projects}"
LOCAL_BIN="${LOCAL_BIN:-$HOME/.local/bin}"
# ---------------------------------------------------------------------------
# Tmux Server Isolation support
# ---------------------------------------------------------------------------
# Paths to exclude when resolving the real tmux binary (shim/wrapper dirs).
_TMUX_SHIM_DIR_PATTERN="${_TMUX_SHIM_DIR_PATTERN:-/multi-agent-tmux-shim/}"
_TMUX_SKILLS_BIN_PATTERN="${_TMUX_SKILLS_BIN_PATTERN:-/.agents/skills/.bin}"
TMUX_SERVER_NAME="${TMUX_SERVER_NAME:-default}"
_resolve_real_tmux_path() {
if [ -z "${_REAL_TMUX_PATH:-}" ] || [[ "$_REAL_TMUX_PATH" == *"${_TMUX_SHIM_DIR_PATTERN}"* ]] || [[ "$_REAL_TMUX_PATH" == *"${_TMUX_SKILLS_BIN_PATTERN}"* ]]; then
local dir save_ifs="$IFS"
_REAL_TMUX_PATH=""
IFS=:
for dir in $PATH; do
if [[ "$dir" != *"${_TMUX_SHIM_DIR_PATTERN}"* ]] && [[ "$dir" != *"${_TMUX_SKILLS_BIN_PATTERN}"* ]] && [ -x "$dir/tmux" ]; then
_REAL_TMUX_PATH="$dir/tmux"
break
fi
done
IFS="$save_ifs"
if [ -z "$_REAL_TMUX_PATH" ]; then
_REAL_TMUX_PATH="tmux"
fi
export _REAL_TMUX_PATH
fi
}
_init_tmux_isolation() {
_resolve_real_tmux_path
if [ -n "${TMUX_SERVER_NAME:-}" ] && [ "$TMUX_SERVER_NAME" != "default" ]; then
local wrapper_dir="${TMPDIR:-/tmp}${_TMUX_SHIM_DIR_PATTERN}${TMUX_SERVER_NAME}"
if [[ ":$PATH:" != *":$wrapper_dir:"* ]]; then
mkdir -p "$wrapper_dir"
cat <<EOF > "$wrapper_dir/tmux"
#!/usr/bin/env bash
if [ -z "\${TMUX_SERVER_NAME:-}" ] || [ "\$TMUX_SERVER_NAME" = "default" ]; then
exec "$_REAL_TMUX_PATH" "\$@"
else
exec "$_REAL_TMUX_PATH" -L "\$TMUX_SERVER_NAME" "\$@"
fi
EOF
chmod +x "$wrapper_dir/tmux"
export PATH="$wrapper_dir:$PATH"
fi
else
# 격리 비활성화 시 shim 자동 cleanup (PATH에서 제거)
local new_path="" dir save_ifs="$IFS"
IFS=:
for dir in $PATH; do
if [[ "$dir" != *"${_TMUX_SHIM_DIR_PATTERN}"* ]] && [[ "$dir" != *"${_TMUX_SKILLS_BIN_PATTERN}"* ]]; then
if [ -z "$new_path" ]; then
new_path="$dir"
else
new_path="$new_path:$dir"
fi
fi
done
IFS="$save_ifs"
export PATH="$new_path"
fi
}
_tmux() {
_init_tmux_isolation
if [ -z "${TMUX_SERVER_NAME:-}" ] || [ "$TMUX_SERVER_NAME" = "default" ]; then
"$_REAL_TMUX_PATH" "$@"
else
"$_REAL_TMUX_PATH" -L "$TMUX_SERVER_NAME" "$@"
fi
}
tmux() {
_tmux "$@"
}
# ---------------------------------------------------------------------------
# resolve_tmux_server <session_name>
#
# Query agent-sessions.yaml to find the tmux_server associated with a session.
# Fallback to TMUX_SERVER_NAME or 'default' if not registered or field is missing.
# Prints the resolved server name on stdout.
# ---------------------------------------------------------------------------
resolve_tmux_server() {
local session_name="$1"
SESSION_NAME="$session_name" env_python "$AGENT_SESSIONS_YAML" <<'PYEOF'
import os, sys, sqlite3, json, yaml
name = os.environ['SESSION_NAME']
yaml_path = os.environ['YAML_PATH']
db_path = os.path.splitext(yaml_path)[0] + '.db'
try:
if os.path.exists(db_path):
conn = sqlite3.connect(db_path, timeout=10.0)
try:
row = conn.execute('SELECT data FROM sessions WHERE name=?', (name,)).fetchone()
if row:
s = json.loads(row[0])
server = s.get('tmux_server')
if server:
print(server)
sys.exit(0)
except sqlite3.OperationalError:
pass
row = conn.execute('SELECT data FROM state WHERE id=1').fetchone()
if row:
d = json.loads(row[0])
for s in d.get('tmux_sessions', []):
if s.get('name') == name:
server = s.get('tmux_server')
if server:
print(server)
sys.exit(0)
conn.close()
elif os.path.exists(yaml_path):
with open(yaml_path) as f:
d = yaml.safe_load(f) or {}
for s in d.get('tmux_sessions', []):
if s.get('name') == name:
server = s.get('tmux_server')
if server:
print(server)
sys.exit(0)
except Exception:
pass
# Fallback
print(os.environ.get('TMUX_SERVER_NAME', 'default'))
PYEOF
}
# ---------------------------------------------------------------------------
# derive_session_name <workspace> <agent>
#
# THE single source of truth for the tmux session name. Rule:
# slug = the two trailing path components of the absolute workspace,
# '_' -> '-', lowercased, joined with '-'
# name = "<slug>-creator-<agent>"
#
# Workspace root 기준 상대 해석. 예:
# $WORKSPACE_ROOT/landing_page/refer_landing_page + claude
# -> landing-page-refer-landing-page-creator-claude
#
# Decision (REVIEW P0-A): the actual workspace basename (refer_landing_page)
# IS included. The hand-written historical entry that dropped it
# (lab-landing-page-creator-claude) was the bug, not the convention.
# Every script and SKILL.md must use exactly this rule.
# ---------------------------------------------------------------------------
derive_session_name() {
local workspace="$1" agent="$2"
local abs parent work slug
abs="$(cd "$workspace" 2>/dev/null && pwd)" || abs="$workspace"
parent="$(basename "$(dirname "$abs")" 2>/dev/null || echo "")"
work="$(basename "$abs" 2>/dev/null || echo "root")"
if [ -z "$parent" ] || [ "$parent" = "/" ] || [ "$parent" = "." ]; then
parent="workspace"
fi
if [ -z "$work" ] || [ "$work" = "/" ] || [ "$work" = "." ]; then
work="root"
fi
slug="$(printf '%s-%s' "$parent" "$work" | tr '[:upper:]' '[:lower:]' | tr '_' '-')"
slug="$(printf '%s' "$slug" | tr -cd 'a-zA-Z0-9-')"
printf '%s-creator-%s' "$slug" "$agent"
}
# ---------------------------------------------------------------------------
# env_python <yaml_path> [KEY=VALUE ...] (Python source read from stdin)
#
# Run python3 with the source supplied on stdin via a *quoted* heredoc, so the
# shell never interpolates the source. All values are passed through the
# environment (YAML_PATH plus any KEY=VALUE pairs). Untrusted data (workspace
# paths, capture-pane text) must travel as env vars and be read via os.environ
# inside the script — never spliced into the source. Read-only by convention;
# use atomic_dump_yaml when you need to write the YAML.
# ---------------------------------------------------------------------------
_validate_env_key() {
local key="$1"
if [[ ! "$key" =~ ^[a-zA-Z_][a-zA-Z0-9_]*$ ]]; then
echo "ERROR: Invalid environment variable name: $key" >&2
return 1
fi
case "$key" in
LD_PRELOAD|LD_LIBRARY_PATH|PYTHONPATH|PYTHONHOME|PYTHONINSPECT|PYTHONSTARTUP)
echo "ERROR: Blocked environment variable: $key" >&2
return 1
;;
esac
return 0
}
env_python() {
local yaml_path="$1"; shift
local -a envs=("YAML_PATH=$yaml_path" "HOME_DIR=$HOME_DIR" "CLAUDE_PROJECT_DIR=$CLAUDE_PROJECT_DIR" "LOCAL_BIN=$LOCAL_BIN")
while [ $# -gt 0 ]; do
case "$1" in
*=*)
local key="${1%%=*}"
_validate_env_key "$key" || return 1
envs+=("$1")
shift
;;
*)
break
;;
esac
done
env "${envs[@]}" python3 - "$@"
}
# ---------------------------------------------------------------------------
# atomic_dump_yaml <yaml_path> [KEY=VALUE ...] (mutation source from stdin)
#
# The ONLY sanctioned way to write agent-sessions.yaml. It:
# 1. takes an exclusive SQLite BEGIN IMMEDIATE transaction lock on
# agent-sessions.db (serialises all writers)
# 2. loads the current state into `d` (seeds from YAML if DB is empty)
# 3. exec()s the caller's mutation source (sees d, yaml, os, datetime,
# timezone, glob, subprocess; reads values via os.environ). The mutation
# may print and may `raise SystemExit(n)` to abort *without* writing.
# 4. validates the resulting schema
# 5. backs up to <yaml_path>.bak, then writes YAML atomically (temp + os.replace)
# when a session transitions to a finished state.
#
# The mutation source is passed via env and exec()'d — it is never string
# spliced and untrusted data never lands in Python source (P0-B / P1-B).
# ---------------------------------------------------------------------------
# Check if the workspace is on NFS — locking behaves differently on NFS
_check_is_nfs() {
local f="$1"
local mountpoint
mountpoint="$(df --output=target "$f" 2>/dev/null | tail -1)" || return 1
if mount | grep -q "$mountpoint.*nfs\|$mountpoint.*cifs\|$mountpoint.*fuse.sshfs"; then
return 0 # is NFS
fi
return 1 # not NFS
}
atomic_dump_yaml() {
local yaml_path="$1"; shift
if [ -z "${MAM_IS_NFS:-}" ]; then
if _check_is_nfs "$(dirname "$yaml_path")"; then
export MAM_IS_NFS="true"
echo "WARNING: $(dirname "$yaml_path") appears to be a network filesystem (NFS/CIFS/SSHFS)." >&2
echo "WARNING: SQLite journal_mode automatically falls back to DELETE." >&2
else
export MAM_IS_NFS="false"
fi
fi
local -a envs=("YAML_PATH=$yaml_path" "HOME_DIR=$HOME_DIR" "CLAUDE_PROJECT_DIR=$CLAUDE_PROJECT_DIR" "LOCAL_BIN=$LOCAL_BIN" "MAM_IS_NFS=$MAM_IS_NFS")
while [ $# -gt 0 ]; do
case "$1" in
*=*)
local key="${1%%=*}"
_validate_env_key "$key" || return 1
envs+=("$1")
shift
;;
*)
break
;;
esac
done
local mutation; mutation="$(cat)"
env "${envs[@]}" AGENT_SESSIONS_MUTATION="$mutation" python3 - <<'PYEOF'
import os, sys, tempfile, shutil, glob, subprocess, json, sqlite3
from datetime import datetime, timezone
import yaml
yaml_path = os.environ['YAML_PATH']
db_path = os.path.splitext(yaml_path)[0] + '.db'
def _validate(d):
if not isinstance(d, dict):
raise SystemExit("VALIDATE: top-level is not a mapping")
sessions = d.get('tmux_sessions', [])
if not isinstance(sessions, list):
raise SystemExit("VALIDATE: tmux_sessions is not a list")
valid = {'running', 'terminated', 'archived', 'stopped'}
for i, s in enumerate(sessions):
if not isinstance(s, dict):
raise SystemExit(f"VALIDATE: tmux_sessions[{i}] not a mapping")
if not s.get('name') or not s.get('status'):
raise SystemExit(f"VALIDATE: tmux_sessions[{i}] missing name/status")
if s['status'] not in valid:
raise SystemExit(f"VALIDATE: tmux_sessions[{i}] {s.get('name')!r} bad status {s['status']!r}")
if not isinstance(s.get('pane'), dict):
raise SystemExit(f"VALIDATE: tmux_sessions[{i}] {s.get('name')!r} missing pane")
def get_terminal_set(d):
return {s.get('name'): s.get('status') for s in d.get('tmux_sessions', []) if s.get('status') in ('stopped', 'terminated', 'archived')}
os.makedirs(os.path.dirname(db_path) or '.', exist_ok=True)
conn = sqlite3.connect(db_path, timeout=60.0)
for f in [db_path, db_path + '-wal', db_path + '-shm']:
if os.path.exists(f):
try:
os.chmod(f, 0o600)
except Exception:
pass
is_nfs = os.environ.get('MAM_IS_NFS') == 'true'
if is_nfs:
conn.execute('PRAGMA journal_mode=DELETE')
else:
conn.execute('PRAGMA journal_mode=WAL')
try:
# Disable auto-commit by explicitly starting a transaction with BEGIN IMMEDIATE
# This prevents the read-modify-write lost update race condition.
conn.execute('BEGIN IMMEDIATE')
conn.execute('CREATE TABLE IF NOT EXISTS state (id INTEGER PRIMARY KEY, data TEXT)')
conn.execute('CREATE TABLE IF NOT EXISTS sessions (name TEXT PRIMARY KEY, status TEXT, pane_cwd TEXT, data JSON)')
conn.execute('CREATE INDEX IF NOT EXISTS idx_sessions_pane_cwd ON sessions(pane_cwd)')
row = conn.execute('SELECT data FROM state WHERE id=1').fetchone()
if row:
d = json.loads(row[0])
else:
# Seed from YAML
if os.path.exists(yaml_path):
with open(yaml_path) as f:
d = yaml.safe_load(f) or {}
else:
d = {}
# Assemble d['tmux_sessions'] from sessions table if table contains data
db_sessions = []
cursor = conn.execute('SELECT name, status, pane_cwd, data FROM sessions')
for s_row in cursor.fetchall():
s_data = json.loads(s_row[3])
s_data['name'] = s_row[0]
s_data['status'] = s_row[1]
if 'pane' not in s_data:
s_data['pane'] = {}
s_data['pane']['cwd'] = s_row[2]
db_sessions.append(s_data)
if db_sessions:
d['tmux_sessions'] = db_sessions
elif 'tmux_sessions' not in d:
d['tmux_sessions'] = []
old_terminals = get_terminal_set(d)
# --- caller mutation (module scope: sees d, yaml, os, glob, subprocess) ---
exec(compile(os.environ['AGENT_SESSIONS_MUTATION'], '<mutation>', 'exec'), globals())
_validate(d)
# Separate globals and sessions for normalization
d_state = {k: v for k, v in d.items() if k != 'tmux_sessions'}
conn.execute('REPLACE INTO state (id, data) VALUES (1, ?)', (json.dumps(d_state),))
current_names = []
for s in d.get('tmux_sessions', []):
name = s.get('name')
status = s.get('status')
pane_cwd = (s.get('pane') or {}).get('cwd', '')
conn.execute('REPLACE INTO sessions (name, status, pane_cwd, data) VALUES (?, ?, ?, ?)',
(name, status, pane_cwd, json.dumps(s)))
current_names.append(name)
if current_names:
placeholders = ','.join('?' for _ in current_names)
conn.execute(f'DELETE FROM sessions WHERE name NOT IN ({placeholders})', current_names)
else:
conn.execute('DELETE FROM sessions')
new_terminals = get_terminal_set(d)
conn.commit()
# Write to YAML ONLY when a session transitions to a finished state
# (Moved after conn.commit() per Claude's feedback)
if new_terminals != old_terminals:
if os.path.exists(yaml_path):
try:
shutil.copy2(yaml_path, yaml_path + '.bak')
except Exception:
pass
dir_ = os.path.dirname(yaml_path) or '.'
fd, tmp = tempfile.mkstemp(dir=dir_, prefix='.agent-sessions.', suffix='.tmp')
try:
with os.fdopen(fd, 'w') as f:
yaml.safe_dump(d, f, default_flow_style=False, sort_keys=False,
allow_unicode=True, width=4096)
os.replace(tmp, yaml_path)
except Exception:
if os.path.exists(tmp):
os.remove(tmp)
raise
try:
conn.execute('PRAGMA wal_checkpoint(TRUNCATE)')
except Exception:
pass
except Exception:
conn.rollback()
raise
finally:
conn.close()
# H3: Re-apply chmod 0600 after close to cover newly created -wal / -shm files
try:
os.chmod(db_path, 0o600)
wal = db_path + '-wal'
if os.path.exists(wal): os.chmod(wal, 0o600)
shm = db_path + '-shm'
if os.path.exists(shm): os.chmod(shm, 0o600)
except Exception:
pass
PYEOF
}
# ---------------------------------------------------------------------------
# find_workspace_uuid <workspace> <agent>
#
# Workspace-SCOPED resolution of the resume UUID (P0-C). It NEVER returns a
# global agent_identities id unless that id's project_cwd matches THIS
# workspace. Resolution order:
# 1) tmux_sessions[] row whose pane.cwd == this workspace -> per-row own id
# (claude_session_id_own / agy_conversation_id_own)
# 2) on-disk scan scoped to this workspace
# (claude: ~/.claude/projects/<key>/*.jsonl ; agy: last_conversations.json[cwd])
# 3) agent_identities cache, ONLY when its project_cwd == this workspace
# Prints the UUID on stdout (empty line if none). Always exits 0.
# ---------------------------------------------------------------------------
find_workspace_uuid() {
local workspace="$1" agent="$2"
local abs; abs="$(cd "$workspace" 2>/dev/null && pwd)" || abs="$workspace"
WS_ABS="$abs" AGENT="$agent" env_python "$AGENT_SESSIONS_YAML" <<'PYEOF'
import os, json, glob, sqlite3
import yaml
ws = os.environ['WS_ABS']
agent = os.environ['AGENT']
home = os.environ['HOME_DIR']
yaml_path = os.environ['YAML_PATH']
db_path = os.path.splitext(yaml_path)[0] + '.db'
claude_project_dir = os.environ.get('CLAUDE_PROJECT_DIR', f"{home}/.claude/projects")
def jsonl_exists(uuid):
key = ws.replace('/', '-').replace('_', '-')
return os.path.exists(f"{claude_project_dir}/{key}/{uuid}.jsonl")
def db_exists(uuid):
return os.path.exists(f"{home}/.gemini/antigravity-cli/conversations/{uuid}.db")
def hermes_exists(uuid):
hdb = f"{home}/.hermes/state.db"
if not os.path.exists(hdb):
return False
try:
conn = sqlite3.connect(hdb)
r = conn.execute("SELECT 1 FROM sessions WHERE id=?", (uuid,)).fetchone()
conn.close()
return r is not None
except Exception:
return False
def cline_exists(uuid):
return os.path.exists(f"{home}/.cline/data/sessions/{uuid}/{uuid}.json")
def emit(u):
print(u)
raise SystemExit(0)
# 1) per-row own id for THIS workspace (optimized with direct sqlite query if db exists)
sessions = []
try:
if os.path.exists(db_path):
conn = sqlite3.connect(db_path, timeout=10.0)
has_sessions_table = False
try:
cursor = conn.execute('SELECT data FROM sessions WHERE pane_cwd=?', (ws,))
for row in cursor.fetchall():
sessions.append(json.loads(row[0]))
has_sessions_table = True
except sqlite3.OperationalError:
pass
if not has_sessions_table or not sessions:
row = conn.execute('SELECT data FROM state WHERE id=1').fetchone()
if row:
d = json.loads(row[0])
for s in d.get('tmux_sessions', []):
if isinstance(s, dict) and (s.get('pane') or {}).get('cwd') == ws:
sessions.append(s)
conn.close()
elif os.path.exists(yaml_path):
with open(yaml_path) as f:
d = yaml.safe_load(f) or {}
for s in d.get('tmux_sessions', []):
if isinstance(s, dict) and (s.get('pane') or {}).get('cwd') == ws:
sessions.append(s)
except Exception:
pass
for s in sessions:
name = s.get('name', '')
if agent == 'claude' and name.endswith('-creator-claude'):
cand = s.get('claude_session_id_own')
if cand and jsonl_exists(cand):
emit(cand)
if agent == 'agy' and name.endswith('-creator-agy'):
cand = s.get('agy_conversation_id_own')
if cand and db_exists(cand):
emit(cand)
if agent == 'hermes' and name.endswith('-creator-hermes'):
cand = s.get('hermes_conversation_id_own')
if cand and hermes_exists(cand):
emit(cand)
if agent == 'cline' and name.endswith('-creator-cline'):
cand = s.get('cline_conversation_id_own')
if cand and cline_exists(cand):
emit(cand)
# 2) disk scan scoped to THIS workspace
if agent == 'claude':
key = ws.replace('/', '-').replace('_', '-')
proj = f"{claude_project_dir}/{key}"
if os.path.isdir(proj):
for j in sorted(glob.glob(f"{proj}/*.jsonl"), key=os.path.getmtime, reverse=True):
sid = None
try:
with open(j) as f:
first = f.readline().strip()
if first:
sid = json.loads(first).get('sessionId')
except Exception:
sid = None
cand = sid or os.path.basename(j)[:-6]
if cand and jsonl_exists(cand):
emit(cand)
elif agent == 'agy':
lc = f"{home}/.gemini/antigravity-cli/cache/last_conversations.json"
if os.path.exists(lc):
cand = None
try:
cand = json.load(open(lc)).get(ws)
except Exception:
cand = None
if cand and db_exists(cand):
emit(cand)
elif agent == 'hermes':
hdb = f"{home}/.hermes/state.db"
if os.path.exists(hdb):
cand = None
try:
conn = sqlite3.connect(hdb)
r = conn.execute("SELECT id FROM sessions WHERE cwd=? ORDER BY started_at DESC LIMIT 1", (ws,)).fetchone()
conn.close()
if r:
cand = r[0]
except Exception:
cand = None
if cand:
emit(cand)
elif agent == 'cline':
sessions_dir = f"{home}/.cline/data/sessions"
if os.path.isdir(sessions_dir):
candidates = []
for session_folder in glob.glob(f"{sessions_dir}/*"):
if os.path.isdir(session_folder):
folder_name = os.path.basename(session_folder)
json_file = f"{session_folder}/{folder_name}.json"
if os.path.exists(json_file):
candidates.append(json_file)
candidates.sort(key=os.path.getmtime, reverse=True)
for j in candidates:
try:
with open(j) as f:
sdata = json.load(f)
if sdata.get('cwd') == ws or sdata.get('workspace_root') == ws:
sid = sdata.get('session_id')
if sid:
emit(sid)
except Exception:
pass
# 3) agent_identities cache, ONLY when its project_cwd == this workspace
ai = {}
try:
if os.path.exists(db_path):
conn = sqlite3.connect(db_path, timeout=10.0)
row = conn.execute('SELECT data FROM state WHERE id=1').fetchone()
if row:
ai = json.loads(row[0]).get('agent_identities', {})
conn.close()
elif os.path.exists(yaml_path):
with open(yaml_path) as f:
d = yaml.safe_load(f) or {}
ai = d.get('agent_identities', {})
except Exception:
pass
ai_agent = ai.get(agent) or {}
if ai_agent.get('project_cwd') == ws:
if agent == 'claude':
cand = ai_agent.get('session_id')
if cand and jsonl_exists(cand):
emit(cand)
elif agent == 'agy':
cand = ai.get('conversation_id')
if cand and db_exists(cand):
emit(cand)
elif agent == 'hermes':
cand = ai_agent.get('session_id') or ai.get('conversation_id')
if cand and hermes_exists(cand):
emit(cand)
elif agent == 'cline':
cand = ai_agent.get('session_id') or ai.get('conversation_id')
if cand and cline_exists(cand):
emit(cand)
print('')
PYEOF
}
# ---------------------------------------------------------------------------
# capture_conversation_id <agent> <workdir>
#
# Thin wrapper over find_workspace_uuid: resolves THIS workspace's conversation
# id (claude jsonl sessionId / agy db uuid) and prints it on stdout (empty line
# if none). find_workspace_uuid is already a workspace-scoped, 3-tier, race-free
# resolver (per-row own id -> workspace-scoped disk scan -> cwd-matched cache),
# so recording its result into the row before kill guarantees tier-1 on the next
# resume. Always exits 0.
# ---------------------------------------------------------------------------
capture_conversation_id() {
local agent="$1" workdir="$2"
find_workspace_uuid "$workdir" "$agent"
}
# ---------------------------------------------------------------------------
# is_already_stopped <session_name>
#
# Exits 0 if the row's status is 'stopped' (printing "stopped_at=<ts>" on
# stdout), 1 otherwise (including not-found). Used for idempotency: a second
# stop on an already-stopped session is a no-op.
# ---------------------------------------------------------------------------
is_already_stopped() {
local session_name="$1"
SESSION_NAME="$session_name" env_python "$AGENT_SESSIONS_YAML" <<'PYEOF'
import os, yaml, sqlite3, json
name = os.environ['SESSION_NAME']
yaml_path = os.environ['YAML_PATH']
db_path = os.path.splitext(yaml_path)[0] + '.db'
try:
if os.path.exists(db_path):
conn = sqlite3.connect(db_path, timeout=10.0)
has_sessions_table = False
try:
row = conn.execute('SELECT status, data FROM sessions WHERE name=?', (name,)).fetchone()
if row:
status, s_data_str = row[0], row[1]
if status == 'stopped':
s = json.loads(s_data_str)
print(f"stopped_at={s.get('stopped_at', '?')}")
raise SystemExit(0)
has_sessions_table = True
except sqlite3.OperationalError:
pass
if not has_sessions_table:
row = conn.execute('SELECT data FROM state WHERE id=1').fetchone()
if row:
d = json.loads(row[0])
for s in d.get('tmux_sessions', []):
if s.get('name') == name and s.get('status') == 'stopped':
print(f"stopped_at={s.get('stopped_at', '?')}")
raise SystemExit(0)
conn.close()
raise SystemExit(1)
elif os.path.exists(yaml_path):
with open(yaml_path) as f:
d = yaml.safe_load(f) or {}
for s in d.get('tmux_sessions', []):
if s.get('name') == name and s.get('status') == 'stopped':
print(f"stopped_at={s.get('stopped_at', '?')}")
raise SystemExit(0)
except Exception:
pass
raise SystemExit(1)
PYEOF
}
# ---------------------------------------------------------------------------
# multi-agent-mux-delegate-job integration helpers
#
# All paths are resolved relative to lib.sh's own location (BASH_SOURCE), so the
# skill tree is relocatable — no hardcoded absolute paths (review item 6).
# ---------------------------------------------------------------------------
# _delegate_py_bin — echo the virtualenv python (walk up from .agents/skills/), else python3.
_delegate_py_bin() {
# Return cached result if available (shell variable, not exported — avoids cross-workspace pollution)
if [ -n "${AGENT_PYTHON_BIN:-}" ] && [ -x "$AGENT_PYTHON_BIN" ]; then
printf '%s\n' "$AGENT_PYTHON_BIN"; return 0
fi
local d
d="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
while [ "$d" != "/" ] && [ -n "$d" ]; do
if [ -x "$d/.venv/bin/python" ]; then
AGENT_PYTHON_BIN="$d/.venv/bin/python"
printf '%s\n' "$AGENT_PYTHON_BIN"; return 0
fi
d="$(dirname "$d")"
done
AGENT_PYTHON_BIN="$(command -v python3 || echo python3)"
printf '%s\n' "$AGENT_PYTHON_BIN"
}
# _delegate_script <name> — echo the path to a multi-agent-mux-delegate-job script, resolved
# relative to .agents/skills/ (lib.sh dir). Empty if not found.
_delegate_script() {
local name="$1" skill_dir cand
skill_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cand="$skill_dir/multi-agent-mux-delegate-job/scripts/$name"
if [ -f "$cand" ]; then printf '%s\n' "$cand"; return 0; fi
printf '%s\n' "$(find "$skill_dir" -name "$name" 2>/dev/null | head -n 1 || true)"
}
# delegate_submit_job <prompt> <agent> <agent_session>
#
# Register a job in the multi-agent-mux-delegate-job registry. Prints the new JID on stdout.
delegate_submit_job() {
local prompt="$1" agent="$2" session="$3"
local py_bin registry_py
py_bin="$(_delegate_py_bin)"
registry_py="$(_delegate_script registry.py)"
if [ -z "$registry_py" ] || [ ! -f "$registry_py" ]; then
echo "ERROR: multi-agent-mux-delegate-job registry.py not found under .agents/skills/" >&2
return 1
fi
"$py_bin" "$registry_py" register \
--prompt "$prompt" \
--agent "$agent" \
--agent-session "$session"
}
# delegate_publish_event <job_id> <event> [detail]
#
# Publish a lifecycle event to the multi-agent-mux-delegate-job registry. Consolidates the
# inline .venv-walk + publish_event.py blocks that were duplicated across
# create/delete/resume (review item 7). Non-fatal by contract: an empty job id,
# a missing script, or a broker failure never aborts the caller.
delegate_publish_event() {
local job_id="$1" event="$2" detail="${3:-}"
[ -n "$job_id" ] || return 0
local py_bin pub
py_bin="$(_delegate_py_bin)"
pub="$(_delegate_script publish_event.py)"
[ -n "$pub" ] && [ -f "$pub" ] || return 0
"$py_bin" "$pub" --job "$job_id" --event "$event" --detail "$detail" || true
}
# start_watchdog <job_id> [workdir]
# Spawns a watchdog process to monitor a delegate-job JOB in the background.
# The watchdog re-spawns the subscriber every 2 minutes (or whatever hard
# limit we set) and exits automatically when the JOB reaches terminal state.
# Returns the watchdog PID via stdout.
start_watchdog() {
local job_id="$1"
local workdir="${2:-$PWD}"
local monitor_script="$workdir/.agents/skills/multi-agent-mux-monitor/scripts/reconcile.sh"
local log_file="$workdir/.mam/multi-agent-mux-monitor.log"
if [ ! -f "$monitor_script" ]; then
echo "ERROR: monitor script not found: $monitor_script" >&2
return 1
fi
# Check if reconcile.sh --subscribe is already running on this workspace
local pid
pid=$(pgrep -f "bash $monitor_script --subscribe" || true)
if [ -z "$pid" ]; then
# Start the wildcard monitor subscriber daemon with --idle-timeout 0 (never idle out)
# and ensure it runs with $workdir as cwd to anchor relative log paths.
local orig_pwd="$PWD"
cd "$workdir"
nohup bash "$monitor_script" --subscribe --idle-timeout 0 >> "$log_file" 2>&1 &
pid=$!
cd "$orig_pwd"
fi
echo "$pid"
}