fix(delegate-job): unify .env loading in Python scripts (FW-04) + trap agent bootstrap errors (FW-06)

FW-04: mqtt_common.py now loads .env at module import via _load_dotenv().
  Walks up from script dir to find workspace .env, sets vars not already
  in os.environ (OS env takes precedence). Uses stdlib only — no
  python-dotenv dependency.

FW-06: bash wrapper sets trap EXIT before tmux new-session to publish
  error event if agent bootstrap fails (non-zero exit). Trap is cleared
  after successful session creation. Only active when job_id is set.
This commit is contained in:
2026-06-21 06:35:17 +00:00
parent 155c6e8d5c
commit 2cffcc46c5
2 changed files with 42 additions and 0 deletions
@@ -33,6 +33,40 @@ import paho.mqtt.client as mqtt
logger = logging.getLogger("delegate_job.mqtt_common") logger = logging.getLogger("delegate_job.mqtt_common")
def _load_dotenv(workspace_dir: str = None) -> None:
"""Load .env file from workspace if it exists and env var not already set.
This ensures Python scripts get the same env vars as the shell wrapper
scripts that source .env. Only sets vars that are not already in os.environ
(i.e. OS env takes precedence over .env file).
"""
import os
if workspace_dir is None:
# Walk up from this script to find workspace root
d = os.path.dirname(os.path.abspath(__file__))
for _ in range(5):
if os.path.isfile(os.path.join(d, ".env")):
break
d = os.path.dirname(d)
else:
d = workspace_dir
env_path = os.path.join(d, ".env")
if not os.path.isfile(env_path):
return
with open(env_path, "r") as f:
for line in f:
line = line.strip()
if not line or line.startswith("#"):
continue
if "=" in line:
key, _, val = line.partition("=")
key = key.strip()
val = val.strip().strip('"').strip("'")
if key and key not in os.environ:
os.environ[key] = val
_load_dotenv()
# -------------------------------------------------------------------------- # --------------------------------------------------------------------------
# Constants # Constants
# -------------------------------------------------------------------------- # --------------------------------------------------------------------------
@@ -199,9 +199,17 @@ run_agent() {
echo " kill the stale one first: tmux kill-session -t $sess" >&2 echo " kill the stale one first: tmux kill-session -t $sess" >&2
return 1 return 1
fi fi
# Before launching the agent, set up error trap to publish error event
if [ -n "${job_id:-}" ] && [ -n "${PY:-}" ]; then
local pub_script="$SCRIPT_DIR/scripts/publish_event.py"
trap 'rc=$?; if [ $rc -ne 0 ]; then "$PY" "$pub_script" --job "$job_id" --event error --detail "agent bootstrap failed (exit $rc)"; fi' EXIT
fi
tmux new-session -d -s "$sess" -c "$WORKDIR" \ tmux new-session -d -s "$sess" -c "$WORKDIR" \
"printf '%s' \"$instructions\" | $bin --dangerously-skip-permissions; echo; echo '--- agent exited (job $job_id); press enter to close ---'; read" "printf '%s' \"$instructions\" | $bin --dangerously-skip-permissions; echo; echo '--- agent exited (job $job_id); press enter to close ---'; read"
echo "agent launched in tmux session: $sess (attach with: tmux attach -t $sess)" echo "agent launched in tmux session: $sess (attach with: tmux attach -t $sess)"
trap - EXIT
} }
cmd_status() { cmd_status() {