diff --git a/Messaging_System_REPORT.md b/Messaging_System_REPORT.md index 33da112..4692940 100644 --- a/Messaging_System_REPORT.md +++ b/Messaging_System_REPORT.md @@ -153,11 +153,13 @@ Every event payload must adhere to the following schema structure: --- -### 2.4 Integrity and Authentication Verification (Bearer Auth) +### 2.4 Integrity and Authentication Verification (HMAC-SHA256 Signatures) To prevent unauthorized users from hijacking or spoofing events on public brokers: 1. When a job is registered, a cryptographic token (`auth_token`) is generated (`secrets.token_urlsafe(32)`). -2. The publisher reads this token from the local job file and injects it into `data.auth_token` for all outgoing messages. -3. The subscriber (`job_subscriber.py`) reads the expected `auth_token` from the local registry and performs a plaintext bearer-token check on all received messages. Mismatched or missing tokens are discarded immediately. +2. The publisher reads this token and signs the JSON payload. Specifically, the publisher calculates an HMAC-SHA256 signature using the `auth_token` as the secret key over the serialized payload (with the `hmac_sig` field excluded). +3. The signature is attached as `data.hmac_sig` on the wire. +4. The subscriber (`job_subscriber.py`) reads the expected `auth_token` from the local registry and verifies the HMAC signature. Any message with a missing, invalid, or mismatched signature is discarded immediately with an "HMAC verify failed" log. +5. To prevent event drops, all publishers and subscribers must be updated simultaneously during deployment rollout, since the plaintext `auth_token` is never transmitted on the wire to prevent token interception. --- diff --git a/skills/tmux-agent-orchestrate-delegate-job/SKILL.md b/skills/tmux-agent-orchestrate-delegate-job/SKILL.md index 336811b..06e40a4 100644 --- a/skills/tmux-agent-orchestrate-delegate-job/SKILL.md +++ b/skills/tmux-agent-orchestrate-delegate-job/SKILL.md @@ -349,6 +349,10 @@ has been verified (2026-06-21, 6-batch refactoring sprint): 5. **Batch grouping** — group 2-3 FW items per batch when they touch different files (no file overlap). This amortises the dispatch overhead. Items touching the same file must be in separate batches to avoid conflicts. +6. **Pane Snapshots & Truncation Prevention** — to prevent long agent responses from being scrolled out and truncated due to TUI viewport limitations, enforce the following snapshotting pattern: + - Immediately after dispatching a brief, capture the pre-brief pane buffer via `capture-pane -S -200`. + - During long execution, run a background loop taking incremental snapshots (e.g. every 30 seconds `>> /tmp/pane-snap.txt`). + - Immediately after job termination, capture the entire final pane state to ensure no terminal logs are lost. ## Verification Checklist diff --git a/skills/tmux-agent-orchestrate-delegate-job/scripts/job_subscriber.py b/skills/tmux-agent-orchestrate-delegate-job/scripts/job_subscriber.py index 5d5a995..1e73a86 100755 --- a/skills/tmux-agent-orchestrate-delegate-job/scripts/job_subscriber.py +++ b/skills/tmux-agent-orchestrate-delegate-job/scripts/job_subscriber.py @@ -85,7 +85,7 @@ class _Watcher: # --- production auth check: data.auth_token must match if expected --- expected_token = self.tokens.get(jid) if not mqtt_common.verify_hmac(payload, expected_token): - logger.warning("drop event for job %s: auth_token mismatch", jid) + logger.warning("drop event for job %s: HMAC verify failed", jid) return # Persistent audit log from the *subscriber's* vantage point: every event # that survives defensive parsing is recorded here, including ones a diff --git a/skills/tmux-agent-orchestrate-delegate-job/scripts/publish_event.py b/skills/tmux-agent-orchestrate-delegate-job/scripts/publish_event.py index b47c934..cef585d 100755 --- a/skills/tmux-agent-orchestrate-delegate-job/scripts/publish_event.py +++ b/skills/tmux-agent-orchestrate-delegate-job/scripts/publish_event.py @@ -75,11 +75,9 @@ def build_payload( "detail": detail, "data": dict(data) if data else {}, } - # Production: carry the per-job auth token so the subscriber can verify the - # publisher. The token is compared in plain text (bearer-token style) by the - # subscriber — NOT an HMAC. See SKILL.md "Auth token" and PLAN 8.2. The - # registry stores the per-job token in `auth_token`; only include it on the - # wire when set so the public broker (no auth) doesn't leak anything. + # Production: carry the per-job HMAC-SHA256 signature in `data.hmac_sig` so + # the subscriber can verify the publisher without exposing the secret token. + # The signature is calculated over the entire payload (with `data.hmac_sig` excluded). if auth_token: sign_payload = {k: v for k, v in payload.items() if k != "data"} sign_payload["data"] = {k: v for k, v in payload.get("data", {}).items() if k != "hmac_sig"} diff --git a/skills/tmux-agent-orchestrate-monitor/SKILL.md b/skills/tmux-agent-orchestrate-monitor/SKILL.md index 5f983aa..6d79daa 100644 --- a/skills/tmux-agent-orchestrate-monitor/SKILL.md +++ b/skills/tmux-agent-orchestrate-monitor/SKILL.md @@ -107,7 +107,7 @@ bash skills/tmux-agent-orchestrate-monitor/scripts/reconcile.sh --subscribe --id bash skills/tmux-agent-orchestrate-monitor/scripts/reconcile.sh --subscribe --idle-timeout 0 ``` -Flags: `--once` (single pass), `--emit-diff` (print JSON), `--dry-run` (P1-E — no mutation), `--subscribe` (push-based MQTT subscription monitoring). `--subscribe` sub-flags: `--timeout N` (exit after N seconds of wall-clock; `0` = no limit, default), `--idle-timeout N` (exit after N seconds with no message; default `600`, `0` = never idle-out). On a broker connection failure (connect error **or** non-zero CONNACK), `--subscribe` falls back to a polling loop that re-runs `--once --emit-diff` every `RECONCILE_POLL_INTERVAL` (default 15) seconds until `--timeout`. Terminal-event YAML updates are written through `lib.sh::atomic_dump_yaml` (flock + schema-validate + `.bak`). There are **no** `--workspace` / `--agent` / `--comment-card` flags; the worker turns the emitted JSON `drifts[]` into `kanban_comment` calls itself. +Flags: `--once` (single pass), `--emit-diff` (print JSON), `--dry-run` (P1-E — no mutation), `--subscribe` (push-based MQTT subscription monitoring). `--subscribe` sub-flags: `--timeout N` (exit after N seconds of wall-clock; `0` = no limit, default), `--idle-timeout N` (exit after N seconds with no message; default `3600`, `0` = never idle-out). On a broker connection failure (connect error **or** non-zero CONNACK), `--subscribe` falls back to a polling loop that re-runs `--once --emit-diff` every `RECONCILE_POLL_INTERVAL` (default 15) seconds until `--timeout`. Terminal-event YAML updates are written through `lib.sh::atomic_dump_yaml` (flock + schema-validate + `.bak`). There are **no** `--workspace` / `--agent` / `--comment-card` flags; the worker turns the emitted JSON `drifts[]` into `kanban_comment` calls itself. ## Drift classes (what the script handles) diff --git a/skills/tmux-agent-orchestrate-monitor/scripts/reconcile.sh b/skills/tmux-agent-orchestrate-monitor/scripts/reconcile.sh index a3ccab4..d91ca20 100755 --- a/skills/tmux-agent-orchestrate-monitor/scripts/reconcile.sh +++ b/skills/tmux-agent-orchestrate-monitor/scripts/reconcile.sh @@ -22,10 +22,10 @@ ONCE=0 EMIT_DIFF=0 DRY_RUN=0 SUBSCRIBE=0 -# --subscribe controls (review item 4): 0 = no overall timeout; idle default 600s -# (raised from the old hardcoded 120s); idle 0 = never idle-out. +# --subscribe controls (review item 4): 0 = no overall timeout; idle default 3600s +# (raised from 600s to align with job timeout defaults); idle 0 = never idle-out. SUB_TIMEOUT=0 -SUB_IDLE_TIMEOUT=600 +SUB_IDLE_TIMEOUT=3600 POLL_INTERVAL="${RECONCILE_POLL_INTERVAL:-15}" while [ $# -gt 0 ]; do @@ -63,7 +63,7 @@ import os, sys, json, time, subprocess lib_sh = os.environ.get('LIB_SH', '') skills_dir = os.environ.get('SKILLS_DIR', '') timeout = int(os.environ.get('SUB_TIMEOUT', '0') or '0') # 0 = no overall timeout -idle_timeout = int(os.environ.get('SUB_IDLE_TIMEOUT', '600') or '0') # 0 = no idle timeout +idle_timeout = int(os.environ.get('SUB_IDLE_TIMEOUT', '3600') or '0') # 0 = no idle timeout # Locate skills/tmux-agent-orchestrate-delegate-job/scripts to import mqtt_common — relative first, then # an upward walk from cwd. No hardcoded absolute path (review item 6).