feat(delegate-job): add subscriber auto-reconnect (FW-01) + HMAC-SHA256 event signing (FW-05)
FW-01: job_subscriber.py now has on_disconnect callback (5-arg paho v2 signature), reconnect_delay_set(1,16) for exponential backoff, and with_retry-wrapped initial connect (5 attempts). paho loop_start() handles auto-reconnect internally. FW-05: publish_event.py signs payloads with HMAC-SHA256 using auth_token as key (replaces plaintext token in wire). mqtt_common.py adds verify_hmac() helper using hmac.compare_digest (timing-safe). job_subscriber.py validates incoming events via verify_hmac. PoC mode (auth_token=None) skips verification — backward compatible. Reviewed by agy-existing (PASS) and claude-existing (FAIL: on_disconnect 4-arg signature → fixed to 5-arg matching paho v2 CallbackAPIVersion).
This commit is contained in:
@@ -84,11 +84,9 @@ class _Watcher:
|
||||
return
|
||||
# --- production auth check: data.auth_token must match if expected ---
|
||||
expected_token = self.tokens.get(jid)
|
||||
if expected_token is not None:
|
||||
got = (payload.get("data") or {}).get("auth_token")
|
||||
if got != expected_token:
|
||||
logger.warning("drop event for job %s: auth_token mismatch", jid)
|
||||
return
|
||||
if not mqtt_common.verify_hmac(payload, expected_token):
|
||||
logger.warning("drop event for job %s: auth_token mismatch", jid)
|
||||
return
|
||||
# Persistent audit log from the *subscriber's* vantage point: every event
|
||||
# that survives defensive parsing is recorded here, including ones a
|
||||
# different host published. This is the external-observer record that
|
||||
@@ -170,8 +168,18 @@ def main(argv=None) -> int:
|
||||
_c.subscribe(topic, qos=1)
|
||||
logger.info("subscribed to %s", topic)
|
||||
|
||||
def on_disconnect(_c, _u, _flags, reason_code, _props):
|
||||
rc = mqtt_common.reason_code_value(reason_code)
|
||||
if rc != 0:
|
||||
logger.warning("broker disconnected (rc=%s); will retry reconnect", reason_code)
|
||||
|
||||
client.on_connect = on_connect
|
||||
client.connect(config.host, config.port, config.keepalive)
|
||||
client.on_disconnect = on_disconnect
|
||||
client.reconnect_delay_set(min_delay=1, max_delay=16)
|
||||
mqtt_common.with_retry(
|
||||
lambda: client.connect(config.host, config.port, config.keepalive),
|
||||
attempts=5, base_delay=1.0, max_delay=16.0
|
||||
)()
|
||||
client.loop_start()
|
||||
|
||||
terminal: Dict[str, str] = {} # job_id -> "completed"/"error"
|
||||
|
||||
Reference in New Issue
Block a user