refactor(skills): cleanup dead code + full workflow A→B→C→D integration
Cleanup: - Remove unused validate_yaml() helper from lib.sh - Remove USER_MANUAL.html + mqtt-broker-setup.html (no refs found) Workflow A (create_session ↔ delegate-job): - Add --submit-job <prompt> option to create_session.sh - Auto-register session in delegate-job registry, store delegate_job_id in YAML Workflow B (push-based monitor): - Migrate reconcile.sh to MQTT subscriber mode (polling fallback preserved) Workflow C (unified status): - status.sh now shows session + delegate-job state in single column Workflow D (audit log + perms): - JSON job files chmod 600 - create/delete/resume now publish lifecycle events to delegate-job
This commit is contained in:
@@ -21,18 +21,141 @@ STATE_DIR="${AGENT_SESSIONS_STATE_DIR:-$HOME/.cache/agent-sessions-monitor}"
|
||||
ONCE=0
|
||||
EMIT_DIFF=0
|
||||
DRY_RUN=0
|
||||
SUBSCRIBE=0
|
||||
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
--once) ONCE=1; shift ;;
|
||||
--emit-diff) EMIT_DIFF=1; shift ;;
|
||||
--dry-run) DRY_RUN=1; shift ;;
|
||||
-h|--help) echo "Usage: $0 [--once] [--emit-diff] [--dry-run]"; exit 0 ;;
|
||||
--subscribe) SUBSCRIBE=1; shift ;;
|
||||
-h|--help) echo "Usage: $0 [--once] [--emit-diff] [--dry-run] [--subscribe]"; exit 0 ;;
|
||||
*) echo "ERROR: unknown arg: $1" >&2; exit 2 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
[ -f "$AGENT_SESSIONS_YAML" ] || { echo "ERROR: $AGENT_SESSIONS_YAML not found" >&2; exit 1; }
|
||||
|
||||
if [ "$SUBSCRIBE" = "1" ]; then
|
||||
SUBSCRIBE_MODE=1 env_python "$AGENT_SESSIONS_YAML" <<'PYEOF'
|
||||
import os, sys, json, fcntl, tempfile, subprocess
|
||||
from datetime import datetime, timezone
|
||||
import yaml
|
||||
|
||||
yaml_path = os.environ['YAML_PATH']
|
||||
home = os.environ['HOME_DIR']
|
||||
|
||||
# Add skills/delegate-job/scripts to path to import mqtt_common
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__)) if '__file__' in globals() else os.getcwd()
|
||||
path_candidate = os.path.join('/home/godopu16/PuKi/laa/canary_projects/advanced_multi_agent', 'skills', 'delegate-job', 'scripts')
|
||||
if os.path.isdir(path_candidate):
|
||||
sys.path.append(path_candidate)
|
||||
else:
|
||||
d = script_dir
|
||||
found = False
|
||||
while d != '/' and d:
|
||||
p = os.path.join(d, 'skills', 'delegate-job', 'scripts')
|
||||
if os.path.isdir(p):
|
||||
sys.path.append(p)
|
||||
found = True
|
||||
break
|
||||
p2 = os.path.join(d, 'delegate-job', 'scripts')
|
||||
if os.path.isdir(p2):
|
||||
sys.path.append(p2)
|
||||
found = True
|
||||
break
|
||||
d = os.path.dirname(d)
|
||||
|
||||
import mqtt_common
|
||||
|
||||
cfg = mqtt_common.broker_config_from_env()
|
||||
client = mqtt_common.make_client("monitor_sub", cfg)
|
||||
|
||||
def on_message(client, userdata, msg):
|
||||
try:
|
||||
payload = json.loads(msg.payload.decode("utf-8"))
|
||||
jid = payload.get("job_id")
|
||||
event = payload.get("event")
|
||||
if not jid or not event:
|
||||
return
|
||||
|
||||
if event in ("completed", "error"):
|
||||
print(f"MQTT Monitor: received terminal event {event} for job {jid}", flush=True)
|
||||
update_session_by_job(jid, event)
|
||||
except Exception as e:
|
||||
print(f"MQTT Monitor error parsing message: {e}", flush=True)
|
||||
|
||||
def update_session_by_job(jid, event):
|
||||
lock_path = yaml_path + '.lock'
|
||||
lock_fh = open(lock_path, 'w')
|
||||
fcntl.flock(lock_fh, fcntl.LOCK_EX)
|
||||
try:
|
||||
if os.path.exists(yaml_path):
|
||||
with open(yaml_path) as f:
|
||||
d_local = yaml.safe_load(f) or {}
|
||||
else:
|
||||
d_local = {}
|
||||
|
||||
sessions = d_local.setdefault('tmux_sessions', [])
|
||||
updated = False
|
||||
for s in sessions:
|
||||
if s.get('delegate_job_id') == jid and s.get('status') == 'running':
|
||||
s['status'] = 'terminated'
|
||||
now_iso = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')
|
||||
s['terminated_at'] = now_iso
|
||||
s['terminated_at_epoch'] = int(datetime.now(timezone.utc).timestamp())
|
||||
s['termination_mode'] = f"auto-detected (MQTT {event})"
|
||||
name = s.get('name')
|
||||
srv = s.get('tmux_server') or 'default'
|
||||
kill_tmux_session(name, srv)
|
||||
updated = True
|
||||
|
||||
if updated:
|
||||
dir_ = os.path.dirname(yaml_path) or '.'
|
||||
fd, tmp = tempfile.mkstemp(dir=dir_, prefix='.agent-sessions.', suffix='.tmp')
|
||||
try:
|
||||
with os.fdopen(fd, 'w') as f:
|
||||
yaml.safe_dump(d_local, f, default_flow_style=False, sort_keys=False,
|
||||
allow_unicode=True, width=4096)
|
||||
os.replace(tmp, yaml_path)
|
||||
print(f"MQTT Monitor: updated YAML for job {jid} to terminated", flush=True)
|
||||
except Exception as e:
|
||||
if os.path.exists(tmp):
|
||||
os.remove(tmp)
|
||||
print(f"MQTT Monitor error writing YAML: {e}", flush=True)
|
||||
finally:
|
||||
fcntl.flock(lock_fh, fcntl.LOCK_UN)
|
||||
lock_fh.close()
|
||||
|
||||
def kill_tmux_session(name, srv):
|
||||
try:
|
||||
cmd = ['tmux']
|
||||
if srv != 'default':
|
||||
cmd += ['-L', srv]
|
||||
cmd += ['kill-session', '-t', name]
|
||||
subprocess.run(cmd, capture_output=True)
|
||||
print(f"MQTT Monitor: killed tmux session {name} on server {srv}", flush=True)
|
||||
except Exception as e:
|
||||
print(f"MQTT Monitor error killing tmux: {e}", flush=True)
|
||||
|
||||
client.on_message = on_message
|
||||
|
||||
def on_connect(_c, _u, _flags, reason_code, _props):
|
||||
rc = mqtt_common.reason_code_value(reason_code)
|
||||
if rc == 0:
|
||||
_c.subscribe("python/mqtt/jobs/+/events", qos=1)
|
||||
print("MQTT Monitor: subscribed to python/mqtt/jobs/+/events", flush=True)
|
||||
else:
|
||||
print(f"MQTT Monitor connection failed: {rc}", flush=True)
|
||||
|
||||
client.on_connect = on_connect
|
||||
print(f"MQTT Monitor: connecting to {cfg.host}:{cfg.port} (TLS={cfg.tls})...", flush=True)
|
||||
client.connect(cfg.host, cfg.port, cfg.keepalive)
|
||||
client.loop_forever()
|
||||
PYEOF
|
||||
exit 0
|
||||
fi
|
||||
|
||||
mkdir -p "$STATE_DIR"
|
||||
|
||||
# 모든 비교 로직을 단일 소스로 둔다. dry-run 은 env_python(읽기전용), 그 외엔
|
||||
|
||||
Reference in New Issue
Block a user