From 009863cf2baf70294929cb7b55537098f7604e87 Mon Sep 17 00:00:00 2001 From: Rob Colbert Date: Mon, 11 May 2026 16:02:57 -0400 Subject: [PATCH] fix: resolve drone heartbeat timeouts and JWT expiration bugs This commit addresses two interrelated issues causing drones to de-register and users to be forcibly signed out: ## Heartbeat Timeout Fixes 1. Move heartbeat interval to a Web Worker (not subject to browser tab throttling). Chrome throttles setInterval in background tabs to ~1/min, which causes the 19s heartbeat to miss the drone's timeout timer. The Web Worker fires reliably regardless of tab visibility. 2. Add visibilitychange handler: when the tab becomes visible again, send an immediate heartbeat to reset the drone's timer after any throttling that may have occurred. 3. Fix onReleaseSessionLock to clear the heartbeat timer. Previously, releasing the lock left the 60s timer running, causing a spurious timeout and status emit after the lock was already released. 4. Increase drone heartbeat timeout from 60s to 120s. With the Web Worker fix, heartbeats should be reliable, but doubling the timeout provides a generous safety margin. 5. Add socket disconnect/reconnect handlers on the drone side. On disconnect, clear the heartbeat timer. On reconnect, re-emit drone status so the platform knows the drone is alive. 6. Configure Socket.IO pingInterval/pingTimeout explicitly (25s/60s) instead of relying on defaults. ## JWT Expiration Fixes 1. Increase WebToken DB record expiration from 1 hour to 7 days. The 1-hour expiration was the real session lifetime gate (the JWT crypto exp was already 24h), and it was far too aggressive for a dev tool. 2. Fix web /auth/renew-token endpoint to use req.user from the session cookie instead of verifyJsonWebToken(req.body.token). This eliminates the catch-22 where an expired token cannot be used to request its own renewal. 3. Fix token refresh response parsing. The API v1 renew-token endpoint returns { success: true, token } at the top level, but the frontend was looking for json.data?.token, causing every refresh to fail. 4. Add proactive token refresh: check the JWT exp claim before each request and refresh if expiring within 5 minutes. This avoids unnecessary 401 errors and the resulting socket disconnections. 5. Update socket JWT on token renewal via a callback registered in App.tsx. This ensures that future socket reconnections use the new token instead of the expired one. ## Files Modified - gadget-code/frontend/src/workers/heartbeat.worker.ts (NEW) - gadget-code/frontend/src/lib/socket.ts - gadget-code/frontend/src/lib/api.ts - gadget-code/frontend/src/App.tsx - gadget-code/src/services/session.ts - gadget-code/src/controllers/auth.ts - gadget-code/src/services/socket.ts - gadget-drone/src/gadget-drone.ts --- gadget-code/frontend/src/App.tsx | 10 +- gadget-code/frontend/src/lib/api.ts | 80 +++++++++- gadget-code/frontend/src/lib/socket.ts | 137 ++++++++++++++++-- .../frontend/src/workers/heartbeat.worker.ts | 46 ++++++ gadget-code/src/controllers/auth.ts | 13 +- gadget-code/src/services/session.ts | 2 +- gadget-code/src/services/socket.ts | 2 + gadget-drone/src/gadget-drone.ts | 33 ++++- 8 files changed, 300 insertions(+), 23 deletions(-) create mode 100644 gadget-code/frontend/src/workers/heartbeat.worker.ts diff --git a/gadget-code/frontend/src/App.tsx b/gadget-code/frontend/src/App.tsx index 6c7be43..064ec3d 100644 --- a/gadget-code/frontend/src/App.tsx +++ b/gadget-code/frontend/src/App.tsx @@ -1,6 +1,6 @@ import { useState, useEffect, createContext, useContext } from 'react'; import { Routes, Route, Navigate, useNavigate } from 'react-router-dom'; -import { User } from './lib/api'; +import { User, setOnTokenRefreshed } from './lib/api'; import { socketClient } from './lib/socket'; import Header from './components/Header'; import StatusBar from './components/StatusBar'; @@ -79,6 +79,14 @@ export default function App() { if (storedProject) { setCurrentProject(storedProject); } + + // Register callback so the socket client updates its JWT whenever + // the API client refreshes the token. This ensures that future + // socket reconnections use the new token instead of the expired one. + setOnTokenRefreshed((newToken: string) => { + socketClient.updateJwt(newToken); + }); + setLoading(false); }, []); diff --git a/gadget-code/frontend/src/lib/api.ts b/gadget-code/frontend/src/lib/api.ts index fb3ef5e..0065d13 100644 --- a/gadget-code/frontend/src/lib/api.ts +++ b/gadget-code/frontend/src/lib/api.ts @@ -6,6 +6,21 @@ const USER_KEY = "dtp_user"; let isRefreshing = false; let refreshPromise: Promise | null = null; +/** + * Callback invoked after a successful token refresh so that other + * modules (e.g., the socket client) can update their stored JWT. + * Set via `setOnTokenRefreshed()`. + */ +let onTokenRefreshedCallback: ((newToken: string) => void) | null = null; + +/** + * Register a callback to be invoked whenever the JWT is refreshed. + * Used by the socket client to update its auth token for reconnections. + */ +export function setOnTokenRefreshed(cb: (newToken: string) => void): void { + onTokenRefreshedCallback = cb; +} + export interface ApiResponse { success: boolean; message?: string; @@ -49,6 +64,24 @@ function signOut(): void { window.location.href = "/"; } +/** + * Check if the current JWT's `exp` claim is within the refresh threshold. + * Decodes the payload (base64) without cryptographic verification. + * Returns true if the token will expire within `marginMs` milliseconds. + */ +function isTokenExpiringSoon(token: string, marginMs = 5 * 60 * 1000): boolean { + try { + const parts = token.split("."); + if (parts.length < 2) return true; + const payload = JSON.parse(atob(parts[1])); + if (!payload.exp) return true; + const expiresAt = payload.exp * 1000; // seconds → ms + return Date.now() > (expiresAt - marginMs); + } catch { + return true; // if we can't decode it, treat it as expiring + } +} + async function refreshAuthToken(): Promise { const response = await fetch(`${API_BASE}/api/v1/auth/renew-token`, { method: "POST", @@ -64,12 +97,22 @@ async function refreshAuthToken(): Promise { const text = await response.text(); try { - const json = JSON.parse(text) as ApiResponse<{ token: string }>; - if (!json.success || !json.data?.token) { + const json = JSON.parse(text) as ApiResponse & { token?: string; data?: { token?: string } }; + if (!json.success) { throw new Error(json.message || "Token refresh failed"); } - return json.data.token; - } catch { + /* + * The API v1 renew-token endpoint returns { success: true, token: "..." } + * at the top level (not nested under `data`). Handle both formats for + * robustness: check json.token first, then fall back to json.data?.token. + */ + const newToken = json.token ?? json.data?.token; + if (!newToken) { + throw new Error("Token refresh response missing token"); + } + return newToken; + } catch (err) { + if (err instanceof Error && err.message.includes("Token refresh")) throw err; throw new Error(`Invalid refresh response: ${text.slice(0, 200)}`); } } @@ -80,7 +123,33 @@ async function request( body?: Record, retryCount = 0, ): Promise { - const token = getToken(); + let token = getToken(); + + /* + * Proactive token refresh: if the JWT's `exp` claim shows it will expire + * within 5 minutes, refresh it before making the request. This avoids + * unnecessary 401 errors and the resulting socket disconnections. + */ + if (token && isTokenExpiringSoon(token)) { + try { + if (!isRefreshing) { + isRefreshing = true; + refreshPromise = refreshAuthToken(); + } + token = await refreshPromise; + setToken(token); + onTokenRefreshedCallback?.(token); + isRefreshing = false; + refreshPromise = null; + } catch { + isRefreshing = false; + refreshPromise = null; + // Don't sign out on proactive refresh failure — the token may still + // be valid at the DB level even if the JWT exp is close. Let the + // reactive 401 handler below deal with it if needed. + } + } + const headers: Record = { "Content-Type": "application/json", }; @@ -110,6 +179,7 @@ async function request( const newToken = await refreshPromise; setToken(newToken); + onTokenRefreshedCallback?.(newToken); isRefreshing = false; refreshPromise = null; diff --git a/gadget-code/frontend/src/lib/socket.ts b/gadget-code/frontend/src/lib/socket.ts index 505d8ac..ea4b2fb 100644 --- a/gadget-code/frontend/src/lib/socket.ts +++ b/gadget-code/frontend/src/lib/socket.ts @@ -2,6 +2,19 @@ import { createContext } from "react"; import { io, Socket } from "socket.io-client"; import type { ChatSession } from "./api"; +/** + * Web Worker for heartbeat timing — avoids browser tab throttling. + * Chrome throttles setInterval in background tabs to ~1/min, which + * would cause heartbeat timeouts against the drone's 120s timer. + */ +let HeartbeatWorker: typeof Worker | null = null; +try { + // Vite supports `new Worker` with `?worker` import syntax + HeartbeatWorker = Worker; +} catch { + // Web Workers not available (extremely rare in modern browsers) +} + const SOCKET_URL = ""; export interface ServerToClientEvents { @@ -104,7 +117,24 @@ class SocketClient { private reconnectAttempts = 0; private maxReconnectAttempts = 5; private jwt: string | null = null; - private heartbeatInterval: ReturnType | null = null; + + /** + * Web Worker for heartbeat timing. Runs the 19-second interval in a + * worker thread to avoid browser tab throttling, which would cause + * heartbeat timeouts against the drone's 120-second timer. + */ + private heartbeatWorker: Worker | null = null; + + /** + * Fallback setInterval for environments where Web Workers are unavailable. + * This is subject to browser tab throttling but is better than nothing. + */ + private heartbeatFallbackInterval: ReturnType | null = null; + + /** + * Bound visibility handler so we can remove it on disconnect. + */ + private boundVisibilityHandler: (() => void) | null = null; get connected(): boolean { return this._socket?.connected ?? false; @@ -203,6 +233,7 @@ class SocketClient { disconnect(): void { this.stopSessionHeartbeat(); + this.removeVisibilityHandler(); if (this._socket) { this._socket.disconnect(); this._socket = null; @@ -328,23 +359,103 @@ class SocketClient { }); } + /** + * Sends a single sessionHeartbeat event to the server (which relays + * it to the drone). The drone resets its 120-second timeout timer + * on each heartbeat. + */ + private sendHeartbeat(): void { + if (this._socket?.connected) { + this._socket.emit("sessionHeartbeat", (ack: boolean) => { + if (!ack) { + console.warn("sessionHeartbeat: drone did not acknowledge"); + } + }); + } + } + startSessionHeartbeat(): void { - if (this.heartbeatInterval) return; - this.heartbeatInterval = setInterval(() => { - if (this._socket) { - this._socket.emit("sessionHeartbeat", (ack: boolean) => { - if (!ack) { - console.warn("sessionHeartbeat: drone did not acknowledge"); - } - }); - } + // Already running? + if (this.heartbeatWorker || this.heartbeatFallbackInterval) return; + + if (HeartbeatWorker) { + // Use a Web Worker for the heartbeat interval — not subject to + // browser tab throttling, which can delay setInterval to 1/min + // in background tabs and cause drone heartbeat timeouts. + this.heartbeatWorker = new HeartbeatWorker( + new URL("../workers/heartbeat.worker.ts", import.meta.url), + { type: "module" }, + ); + this.heartbeatWorker.onmessage = (event: MessageEvent) => { + if (event.data?.type === "tick") { + this.sendHeartbeat(); + } + }; + this.heartbeatWorker.onerror = (err) => { + console.error("heartbeat worker error, falling back to setInterval", err); + this.stopSessionHeartbeat(); + this.startFallbackHeartbeat(); + }; + this.heartbeatWorker.postMessage({ type: "start" }); + } else { + // Fallback: use setInterval (subject to tab throttling) + this.startFallbackHeartbeat(); + } + + // Install visibility handler: send an immediate heartbeat when + // the tab becomes visible again, to reset the drone's timer + // after any throttling that may have occurred. + this.installVisibilityHandler(); + } + + private startFallbackHeartbeat(): void { + if (this.heartbeatFallbackInterval) return; + this.heartbeatFallbackInterval = setInterval(() => { + this.sendHeartbeat(); }, 19000); } stopSessionHeartbeat(): void { - if (this.heartbeatInterval) { - clearInterval(this.heartbeatInterval); - this.heartbeatInterval = null; + if (this.heartbeatWorker) { + this.heartbeatWorker.postMessage({ type: "stop" }); + this.heartbeatWorker.terminate(); + this.heartbeatWorker = null; + } + if (this.heartbeatFallbackInterval) { + clearInterval(this.heartbeatFallbackInterval); + this.heartbeatFallbackInterval = null; + } + } + + private installVisibilityHandler(): void { + if (this.boundVisibilityHandler) return; // already installed + this.boundVisibilityHandler = () => { + if (document.visibilityState === "visible") { + // Tab is now visible — send an immediate heartbeat to reset + // the drone's timeout timer, which may have been starved + // while the tab was backgrounded and timers were throttled. + this.sendHeartbeat(); + } + }; + document.addEventListener("visibilitychange", this.boundVisibilityHandler); + } + + private removeVisibilityHandler(): void { + if (this.boundVisibilityHandler) { + document.removeEventListener("visibilitychange", this.boundVisibilityHandler); + this.boundVisibilityHandler = null; + } + } + + /** + * Update the stored JWT used for socket authentication. + * Called after a successful token renewal so that future socket + * reconnections use the new token instead of the expired one. + */ + updateJwt(newToken: string): void { + this.jwt = newToken; + if (this._socket) { + this._socket.auth = { ...this._socket.auth, token: newToken }; } } } diff --git a/gadget-code/frontend/src/workers/heartbeat.worker.ts b/gadget-code/frontend/src/workers/heartbeat.worker.ts new file mode 100644 index 0000000..fb05051 --- /dev/null +++ b/gadget-code/frontend/src/workers/heartbeat.worker.ts @@ -0,0 +1,46 @@ +/** + * Heartbeat Web Worker + * + * Runs the session heartbeat interval in a Web Worker to avoid browser + * tab throttling. Chrome throttles `setInterval` in background tabs to + * ~1 execution per minute, which causes the 19-second heartbeat interval + * to miss the 120-second drone timeout. Web Workers are NOT subject to + * this throttling and will fire reliably regardless of tab visibility. + * + * Protocol: + * - Main thread sends { type: "start" } to begin the heartbeat interval + * - Main thread sends { type: "stop" } to stop the interval + * - Worker posts { type: "tick" } to the main thread on each interval + */ + +const HEARTBEAT_INTERVAL_MS = 19_000; // 19 seconds + +let intervalId: ReturnType | null = null; + +function start(): void { + if (intervalId !== null) return; // already running + intervalId = setInterval(() => { + self.postMessage({ type: "tick" }); + }, HEARTBEAT_INTERVAL_MS); +} + +function stop(): void { + if (intervalId !== null) { + clearInterval(intervalId); + intervalId = null; + } +} + +self.onmessage = (event: MessageEvent) => { + const { type } = event.data; + switch (type) { + case "start": + start(); + break; + case "stop": + stop(); + break; + default: + console.warn(`heartbeat.worker: unknown message type "${type}"`); + } +}; diff --git a/gadget-code/src/controllers/auth.ts b/gadget-code/src/controllers/auth.ts index 7a06205..0ee1234 100644 --- a/gadget-code/src/controllers/auth.ts +++ b/gadget-code/src/controllers/auth.ts @@ -77,8 +77,17 @@ export class AuthController extends DtpController { async postRenewToken(req: Request, res: Response): Promise { try { - const user = await SessionService.verifyJsonWebToken(req.body.token); - const token = await SessionService.createJsonWebToken(user); + /* + * Use req.user (set by restoreUserSession from the session cookie) + * instead of verifying the expired JWT in the request body. + * This eliminates the catch-22 where an expired token cannot be + * used to request its own renewal. + */ + if (!req.user) { + res.status(401).json({ success: false, message: "No valid session found" }); + return; + } + const token = await SessionService.createJsonWebToken(req.user); req.session.token = token; res.status(200).json({ success: true, token }); } catch (error) { diff --git a/gadget-code/src/services/session.ts b/gadget-code/src/services/session.ts index 8b31073..d89dc27 100644 --- a/gadget-code/src/services/session.ts +++ b/gadget-code/src/services/session.ts @@ -52,7 +52,7 @@ class SessionService extends DtpService { const NOW = new Date(); const webToken = new WebToken(); webToken.created = NOW; - webToken.expires = dayjs(NOW).add(1, "hour").toDate(); + webToken.expires = dayjs(NOW).add(7, "day").toDate(); webToken.user = user._id; const payload: UserWebToken = { diff --git a/gadget-code/src/services/socket.ts b/gadget-code/src/services/socket.ts index e2be8a2..8f10e22 100644 --- a/gadget-code/src/services/socket.ts +++ b/gadget-code/src/services/socket.ts @@ -73,6 +73,8 @@ class SocketService extends DtpService { SocketData >(httpServer, { maxHttpBufferSize: env.socket.maxHttpBufferSize, + pingInterval: 25000, // 25s between pings (matches default) + pingTimeout: 60000, // 60s before disconnect (generous; default is 20s) cors: { origin: "*", methods: ["GET", "POST"], diff --git a/gadget-drone/src/gadget-drone.ts b/gadget-drone/src/gadget-drone.ts index 75d2584..267859f 100644 --- a/gadget-drone/src/gadget-drone.ts +++ b/gadget-drone/src/gadget-drone.ts @@ -252,6 +252,31 @@ class GadgetDrone extends GadgetProcess { "requestTermination", this.onRequestTermination.bind(this), ); + + /* + * Handle socket disconnect: clear the heartbeat timer to prevent + * spurious timeout firing while disconnected. + */ + this.socket.on("disconnect", (reason) => { + this.log.info("socket disconnected from platform", { reason }); + if (this.heartbeatTimer) { + clearTimeout(this.heartbeatTimer); + this.heartbeatTimer = null; + } + }); + + /* + * Handle socket reconnect: re-emit current drone status so the + * platform knows the drone is still alive and available. + */ + this.socket.on("reconnect", (attemptNumber) => { + this.log.info("socket reconnected to platform", { attemptNumber }); + if (this.sessionLock) { + this.socket?.emit("status", "session lock active (reconnected)"); + } else { + this.socket?.emit("status", "available (reconnected)"); + } + }); }); } @@ -394,6 +419,12 @@ class GadgetDrone extends GadgetProcess { chatSession: { _id: chatSession._id, name: chatSession.name }, }); + // Clear the heartbeat timer to prevent spurious timeout after release + if (this.heartbeatTimer) { + clearTimeout(this.heartbeatTimer); + this.heartbeatTimer = null; + } + this.sessionLock = undefined; this.workspaceMode = WorkspaceMode.Syncing; this.socket?.emit("status", "session lock released"); @@ -414,7 +445,7 @@ class GadgetDrone extends GadgetProcess { "session lock released due to heartbeat timeout", ); this.heartbeatTimer = null; - }, 60000); + }, 120000); cb(true); }