diff --git a/docs/session-heartbeat.md b/docs/session-heartbeat.md new file mode 100644 index 0000000..559cd1c --- /dev/null +++ b/docs/session-heartbeat.md @@ -0,0 +1,227 @@ +# Session Heartbeat & Lock Release + +## Problem + +A drone could be left permanently locked if the IDE disconnects or +navigates away without explicitly releasing the session lock. Once locked, +the drone rejects all new lock requests until it is restarted. + +Two mechanisms solve this: + +1. **`releaseSessionLock`** — An explicit message to unlock a drone from a + chat session. Sent deliberately by the IDE on view cleanup, and as a + fallback by the backend on socket disconnect. +2. **`sessionHeartbeat`** — A periodic keepalive from IDE → drone. The + drone starts a 60-second timer on each heartbeat. If no heartbeat + arrives within 60 seconds, the drone automatically releases its lock + and returns to `Syncing` state. + +--- + +## Protocol + +Two new messages, both flowing `IDE → Web → Drone`: + +``` +┌──────────────┐ releaseSessionLock ┌──────────────┐ releaseSessionLock ┌──────────────┐ +│ │ ────────────────────────► │ │ ────────────────────────► │ │ +│ IDE │ sessionHeartbeat │ Web │ sessionHeartbeat │ Drone │ +│ (Browser) │ ────────────────────────► │ (Backend) │ ────────────────────────► │ (Worker) │ +│ │ ◄──────────────────────── │ │ ◄──────────────────────── │ │ +│ │ cb(ack) │ │ cb(ack) │ │ +└──────────────┘ └──────────────┘ └──────────────┘ +``` + +### `releaseSessionLock` + +| Direction | Type | Purpose | +|-----------|------|---------| +| IDE → Web | `ClientToServerEvents.releaseSessionLock` | IDE releases a held lock | +| Web → Drone | `ServerToClientEvents.releaseSessionLock` | Web forwards to drone | + +**Signature:** +```typescript +type ReleaseSessionLockMessage = ( + registration: IDroneRegistration, + project: IProject, + chatSession: IChatSession, + cb: (success: boolean) => void, +) => void; +``` + +The callback is simpler than `requestSessionLock` — just a boolean +success, no payload needed. + +### `sessionHeartbeat` + +| Direction | Type | Purpose | +|-----------|------|---------| +| IDE → Web | `ClientToServerEvents.sessionHeartbeat` | Periodic keepalive | +| Web → Drone | `ServerToClientEvents.sessionHeartbeat` | Forwarded to drone | + +**Signature:** +```typescript +type SessionHeartbeatMessage = (cb: (ack: boolean) => void) => void; +``` + +--- + +## Implementation Layer by Layer + +### 1. Shared Types — `packages/api/src/messages/ide.ts` + +Defines `ReleaseSessionLockCallback`, `ReleaseSessionLockMessage`, +`SessionHeartbeatCallback`, and `SessionHeartbeatMessage`. + +### 2. Socket Event Maps — `packages/api/src/messages/socket.ts` + +Both messages are registered in `ClientToServerEvents` (IDE → Web) and +`ServerToClientEvents` (Web → Drone). + +### 3. Frontend Socket Client — `gadget-code/frontend/src/lib/socket.ts` + +```typescript +class SocketClient { + private heartbeatInterval: ReturnType | null = null; + + releaseSessionLock(registration, project, chatSession): Promise; + + startSessionHeartbeat(): void; + stopSessionHeartbeat(): void; +} +``` + +- `releaseSessionLock()` wraps `socket.emit("releaseSessionLock", ...)` in + a Promise. +- `startSessionHeartbeat()` starts a `setInterval` at 19 seconds that + emits `sessionHeartbeat` with an ack callback. +- `stopSessionHeartbeat()` clears the interval. +- `disconnect()` automatically calls `stopSessionHeartbeat()`. + +### 4. ChatSessionView — `gadget-code/frontend/src/pages/ChatSessionView.tsx` + +- On mount after `session` and `project` are loaded: starts heartbeat. +- On unmount: stops heartbeat, then sends `releaseSessionLock` using the + drone registration from `localStorage` (`dtp_drone_registration`). +- Uses `sessionRef` / `projectRef` to capture latest state for the + unmount closure. + +### 5. Backend CodeSession — `gadget-code/src/lib/code-session.ts` + +**`onReleaseSessionLock(registration, project, chatSession, cb)`:** +1. Looks up `DroneSession` via `SocketService.getDroneSession(registration)`. +2. Forwards `releaseSessionLock` to the drone socket. +3. On success callback: calls `SocketService.unregisterChatSession()`, + clears `droneSession.chatSessionId`, clears local `selectedDrone`, + `chatSession`, `project`. +4. Calls `cb(success)`. + +**`onSessionHeartbeat(cb)`:** +1. Guards `this.selectedDrone` — returns `cb(false)` if not set. +2. Looks up `DroneSession` via `SocketService.getDroneSession()`. +3. Forwards heartbeat to drone socket with the ack callback. + +### 6. Backend Disconnect — `gadget-code/src/services/socket.ts` + +When a `CodeSession` disconnects: + +1. Retrieve the `CodeSession` from `codeSessions` **before** deleting + (fixes an existing bug where the session was read after deletion). +2. Call `disconnectingCodeSession.selectedDroneId` getter to check if a + drone was selected. +3. If yes, look up the `DroneSession` in `droneRegistrationIndex`. +4. Emit `releaseSessionLock` to the drone (fire-and-forget, no callback + needed since the socket is already going away). +5. Clean up `codeSessionUserIndex` and `chatSessionIndex`. +6. Delete from `codeSessions` map. + +This is a safety net for cases where the IDE closes without sending a +deliberate release (browser crash, tab close, network failure). + +### 7. Drone — `gadget-drone/src/gadget-drone.ts` + +**State:** +```typescript +private heartbeatTimer: ReturnType | null = null; +``` + +**`onReleaseSessionLock(registration, project, chatSession, cb)`:** +1. Validates registration (must match self). +2. If no lock held: `cb(true)` — nothing to do. +3. If lock held by different session: logs warning but still releases + (caller knows what it's doing). +4. Clears `sessionLock`, sets `workspaceMode = Syncing`. +5. Emits `"session lock released"` status. +6. `cb(true)`. + +**`onSessionHeartbeat(cb)`:** +1. Clears existing `heartbeatTimer` if set. +2. Sets new 60-second `heartbeatTimer` that: clears `sessionLock`, sets + `workspaceMode = Syncing`, emits status about heartbeat timeout. +3. Guards `isShuttingDown` in the timeout handler. +4. `cb(true)` (immediately acknowledges). + +**Shutdown:** Heartbeat timer is cleared in `stop()` so it doesn't fire +during graceful shutdown. + +--- + +## Timing + +| Parameter | Value | Rationale | +|-----------|-------|-----------| +| Heartbeat interval | 19 seconds | ~3 heartbeats per minute, stays well within timeout | +| Heartbeat timeout | 60 seconds | Tolerates 2 missed heartbeats + network jitter | + +--- + +## Edge Cases + +| Scenario | Behavior | +|----------|----------| +| User navigates from ChatSession to Project Manager | `releaseSessionLock` sent on unmount, heartbeat stopped | +| User closes browser tab | Socket disconnect fires backend-initiated `releaseSessionLock` | +| User closes browser entirely | Socket disconnect fires backend-initiated `releaseSessionLock` | +| Network drops, socket reconnects | Heartbeat resumes normally, drone timer resets each heartbeat | +| Network drops for >60 seconds | Drone auto-releases lock, IDE detects socket disconnect | +| Backend process restarts | Drone detects socket disconnect (reconnection), eventually heartbeat timeout fires | +| Drone crashes | IDE heartbeat callbacks stop firing → IDE detects socket disconnect | +| Multiple rapid session switches | Cleanup fires per-session, old lock released before new one acquired | +| No lock held, release requested | All handlers return `cb(true)` — successful no-op | +| Wrong session tries to release | Drone logs warning but still releases (disconnect path may not carry full session context) | +| Heartbeat arrives with no lock | Drone resets timer anyway — harmless | +| Deliberate release + disconnect race | Both paths emit `releaseSessionLock` — duplicate is handled gracefully (second release finds no lock, returns `true`) | + +--- + +## Always Release Held Locks + +Every code path that acquires a `sessionLock` must also release it: + +| Lock acquired | Must release here | Mechanism | +|---------------|-------------------|-----------| +| `ProjectManager.tsx` creates session + locks drone | `ChatSessionView` unmounts | `releaseSessionLock` in cleanup effect | +| `ProjectManager.tsx` opens existing session | `ChatSessionView` unmounts | `releaseSessionLock` in cleanup effect | +| Backend re-lock on socket reconnect | Backend disconnect handler | `releaseSessionLock` in `SocketService.onSocketDisconnect` | +| Any path (heartbeat fails) | Drone auto-release | 60-second `heartbeatTimer` timeout | + +**Rule:** If you add a new code path that calls `requestSessionLock`, you +must also ensure a corresponding `releaseSessionLock` path exists. The +heartbeat timeout is the last resort — never rely on it as the primary +release mechanism. + +--- + +## Verification Checklist + +- [ ] `releaseSessionLock` message defined in `ide.ts`, registered in `socket.ts` +- [ ] `sessionHeartbeat` message defined in `ide.ts`, registered in `socket.ts` +- [ ] Frontend `SocketClient` has `releaseSessionLock()`, `startSessionHeartbeat()`, `stopSessionHeartbeat()` +- [ ] `ChatSessionView` starts heartbeat on load, stops + releases on unmount +- [ ] `CodeSession` registers and handles both messages +- [ ] `SocketService.onSocketDisconnect` sends `releaseSessionLock` when a code session drops +- [ ] Existing bug in disconnect handler (reading session after delete) is fixed +- [ ] `GadgetDrone` registers and handles both messages +- [ ] Drone clears `sessionLock` and resets to `Syncing` on release or heartbeat timeout +- [ ] Heartbeat timer is cleaned up during `stop()` +- [ ] All packages build without errors diff --git a/gadget-code/frontend/src/lib/socket.ts b/gadget-code/frontend/src/lib/socket.ts index aa35365..7b9f72d 100644 --- a/gadget-code/frontend/src/lib/socket.ts +++ b/gadget-code/frontend/src/lib/socket.ts @@ -34,6 +34,13 @@ export interface ClientToServerEvents { mode: string, cb: (success: boolean, mode: string) => void, ) => void; + releaseSessionLock: ( + registration: any, + project: any, + chatSession: any, + cb: (success: boolean) => void, + ) => void; + sessionHeartbeat: (cb: (ack: boolean) => void) => void; } export interface SocketEvents { @@ -86,6 +93,7 @@ class SocketClient { private reconnectAttempts = 0; private maxReconnectAttempts = 5; private jwt: string | null = null; + private heartbeatInterval: ReturnType | null = null; get connected(): boolean { return this._socket?.connected ?? false; @@ -160,6 +168,7 @@ class SocketClient { } disconnect(): void { + this.stopSessionHeartbeat(); if (this._socket) { this._socket.disconnect(); this._socket = null; @@ -264,6 +273,46 @@ class SocketClient { } }); } + + releaseSessionLock( + registration: any, + project: any, + chatSession: any, + ): Promise { + return new Promise((resolve) => { + if (this._socket?.connected) { + this._socket.emit( + "releaseSessionLock", + registration, + project, + chatSession, + resolve, + ); + } else { + resolve(false); + } + }); + } + + startSessionHeartbeat(): void { + if (this.heartbeatInterval) return; + this.heartbeatInterval = setInterval(() => { + if (this._socket?.connected) { + this._socket.emit("sessionHeartbeat", (ack: boolean) => { + if (!ack) { + console.warn("sessionHeartbeat: drone did not acknowledge"); + } + }); + } + }, 19000); + } + + stopSessionHeartbeat(): void { + if (this.heartbeatInterval) { + clearInterval(this.heartbeatInterval); + this.heartbeatInterval = null; + } + } } export const socketClient = new SocketClient(); diff --git a/gadget-code/frontend/src/pages/ChatSessionView.tsx b/gadget-code/frontend/src/pages/ChatSessionView.tsx index 083e2b7..8248a6d 100644 --- a/gadget-code/frontend/src/pages/ChatSessionView.tsx +++ b/gadget-code/frontend/src/pages/ChatSessionView.tsx @@ -61,6 +61,8 @@ export default function ChatSessionView() { const updateRafRef = useRef(null); const currentTurnIdRef = useRef(null); const streamingStateRef = useRef>(new Map()); + const sessionRef = useRef(null); + const projectRef = useRef(null); useEffect(() => { loadSessionData(); @@ -86,6 +88,38 @@ export default function ChatSessionView() { }; }, []); + // Keep refs in sync with state for use in cleanup closures + useEffect(() => { + sessionRef.current = session; + }, [session]); + + useEffect(() => { + projectRef.current = project; + }, [project]); + + // Start heartbeat when session+project are loaded, release lock on unmount + useEffect(() => { + if (session && project) { + socketClient.startSessionHeartbeat(); + } + return () => { + socketClient.stopSessionHeartbeat(); + const droneJson = localStorage.getItem('dtp_drone_registration'); + if (droneJson && sessionRef.current && projectRef.current) { + try { + const registration = JSON.parse(droneJson); + socketClient.releaseSessionLock( + registration, + projectRef.current, + sessionRef.current, + ); + } catch (err) { + console.error('Failed to release session lock', err); + } + } + }; + }, [session, project]); + const loadSessionData = async () => { try { if (sessionId) { diff --git a/gadget-code/src/lib/code-session.ts b/gadget-code/src/lib/code-session.ts index 6627552..3b76b86 100644 --- a/gadget-code/src/lib/code-session.ts +++ b/gadget-code/src/lib/code-session.ts @@ -43,6 +43,27 @@ export class CodeSession extends SocketSession { this.onRequestWorkspaceMode.bind(this), ); this.socket.on("submitPrompt", this.onSubmitPrompt.bind(this)); + this.socket.on( + "releaseSessionLock", + this.onReleaseSessionLock.bind(this), + ); + this.socket.on("sessionHeartbeat", this.onSessionHeartbeat.bind(this)); + } + + get hasLock(): boolean { + return this.selectedDrone !== undefined && this.chatSession !== undefined; + } + + get selectedDroneId(): GadgetId | undefined { + return this.selectedDrone?._id; + } + + get activeChatSession(): IChatSession | undefined { + return this.chatSession; + } + + get activeProject(): IProject | undefined { + return this.project; } /** @@ -191,6 +212,49 @@ export class CodeSession extends SocketSession { ); } + /** + * Called when the IDE sends a releaseSessionLock event to release a + * previously-acquired session lock on a gadget-drone instance. + */ + onReleaseSessionLock( + registration: IDroneRegistration, + project: IProject, + chatSession: IChatSession, + cb: (success: boolean) => void, + ) { + const droneSession = SocketService.getDroneSession(registration); + droneSession.socket.emit( + "releaseSessionLock", + registration, + project, + chatSession, + (success: boolean) => { + if (success) { + SocketService.unregisterChatSession(chatSession._id); + droneSession.chatSessionId = undefined; + this.selectedDrone = undefined; + this.chatSession = undefined; + this.project = undefined; + } + cb(success); + }, + ); + } + + /** + * Called when the IDE sends a sessionHeartbeat event to keep the session + * lock alive. Forwards to the drone which maintains a timeout. + */ + onSessionHeartbeat(cb: (ack: boolean) => void) { + if (!this.selectedDrone) { + return cb(false); + } + const droneSession = SocketService.getDroneSession(this.selectedDrone); + droneSession.socket.emit("sessionHeartbeat", (ack: boolean) => { + cb(ack); + }); + } + /** * Called by DroneSession when the drone emits a workspace mode change. * Updates local state and forwards to the IDE socket. diff --git a/gadget-code/src/services/socket.ts b/gadget-code/src/services/socket.ts index a0c98bf..e2be8a2 100644 --- a/gadget-code/src/services/socket.ts +++ b/gadget-code/src/services/socket.ts @@ -195,14 +195,43 @@ class SocketService extends DtpService { switch (socket.data.sessionType) { case SocketSessionType.Code: this.log.info("closing code socket session", { id: socket.id }); - this.codeSessions.delete(socket.id); - const codeUserIndex = (this as any).codeSessionUserIndex; - if (codeUserIndex) { - const session = this.codeSessions.get(socket.id); - if (session) { - codeUserIndex.delete(session.user._id); + const disconnectingCodeSession = this.codeSessions.get(socket.id); + if (disconnectingCodeSession) { + // If this session held a drone lock, release it on disconnect + const droneRegId = disconnectingCodeSession.selectedDroneId; + const chatSess = disconnectingCodeSession.activeChatSession; + const proj = disconnectingCodeSession.activeProject; + if (droneRegId && chatSess && proj) { + try { + const droneSession = this.droneRegistrationIndex.get(droneRegId); + if (droneSession) { + droneSession.socket.emit( + "releaseSessionLock", + droneSession.registration, + proj, + chatSess, + () => { + /* fire-and-forget */ + }, + ); + } + } catch (error) { + this.log.error( + "failed to release session lock on code disconnect", + { error }, + ); + } + } + // Clean up user index + this.codeSessionUserIndex.delete( + disconnectingCodeSession.user._id, + ); + // Clean up chat session index + if (chatSess) { + this.unregisterChatSession(chatSess._id); } } + this.codeSessions.delete(socket.id); return; case SocketSessionType.Drone: diff --git a/gadget-drone/src/gadget-drone.ts b/gadget-drone/src/gadget-drone.ts index 5cbddcb..1873fad 100644 --- a/gadget-drone/src/gadget-drone.ts +++ b/gadget-drone/src/gadget-drone.ts @@ -50,6 +50,7 @@ class GadgetDrone extends GadgetProcess { private socket: ClientSocket | undefined; private isShuttingDown: boolean = false; + private heartbeatTimer: ReturnType | null = null; get name(): string { return "GadgetDrone"; @@ -137,6 +138,11 @@ class GadgetDrone extends GadgetProcess { this.log.info(`Gadget Drone v${env.pkg.version} shutting down`); this.isShuttingDown = true; + if (this.heartbeatTimer) { + clearTimeout(this.heartbeatTimer); + this.heartbeatTimer = null; + } + if (this.socket) { this.socket.disconnect(); delete this.socket; @@ -213,6 +219,11 @@ class GadgetDrone extends GadgetProcess { this.onRequestWorkspaceMode.bind(this), ); this.socket.on("processWorkOrder", this.onProcessWorkOrder.bind(this)); + this.socket.on( + "releaseSessionLock", + this.onReleaseSessionLock.bind(this), + ); + this.socket.on("sessionHeartbeat", this.onSessionHeartbeat.bind(this)); this.socket.on( "requestTermination", this.onRequestTermination.bind(this), @@ -329,6 +340,61 @@ class GadgetDrone extends GadgetProcess { this.socket.emit("status", "session lock granted"); } + async onReleaseSessionLock( + registration: IDroneRegistration, + project: IProject, + chatSession: IChatSession, + cb: (success: boolean) => void, + ) { + if (!this.registration) { + return cb(false); + } + if (registration._id !== this.registration._id) { + return cb(false); + } + + if (!this.sessionLock) { + this.log.info("releaseSessionLock: no lock held, nothing to release"); + return cb(true); + } + + if (chatSession._id !== this.sessionLock.session._id) { + this.log.warn("releaseSessionLock: session mismatch", { + requested: chatSession._id, + current: this.sessionLock.session._id, + }); + } + + this.log.info("releasing session lock", { + project: { _id: project._id, slug: project.slug }, + chatSession: { _id: chatSession._id, name: chatSession.name }, + }); + + this.sessionLock = undefined; + this.workspaceMode = WorkspaceMode.Syncing; + this.socket?.emit("status", "session lock released"); + cb(true); + } + + async onSessionHeartbeat(cb: (ack: boolean) => void) { + if (this.heartbeatTimer) { + clearTimeout(this.heartbeatTimer); + } + this.heartbeatTimer = setTimeout(() => { + if (this.isShuttingDown) return; + this.log.warn("heartbeat timeout: releasing session lock"); + this.sessionLock = undefined; + this.workspaceMode = WorkspaceMode.Syncing; + this.socket?.emit( + "status", + "session lock released due to heartbeat timeout", + ); + this.heartbeatTimer = null; + }, 60000); + + cb(true); + } + async onRequestWorkspaceMode( registration: IDroneRegistration, project: IProject, diff --git a/packages/api/src/messages/ide.ts b/packages/api/src/messages/ide.ts index f8b0728..f216520 100644 --- a/packages/api/src/messages/ide.ts +++ b/packages/api/src/messages/ide.ts @@ -66,3 +66,24 @@ export type SubmitPromptMessage = ( prompt: string, cb: SubmitPromptCallback, ) => void; + +/* + * releaseSessionLock + */ + +export type ReleaseSessionLockCallback = (success: boolean) => void; + +export type ReleaseSessionLockMessage = ( + registration: IDroneRegistration, + project: IProject, + chatSession: IChatSession, + cb: ReleaseSessionLockCallback, +) => void; + +/* + * sessionHeartbeat + */ + +export type SessionHeartbeatCallback = (ack: boolean) => void; + +export type SessionHeartbeatMessage = (cb: SessionHeartbeatCallback) => void; diff --git a/packages/api/src/messages/socket.ts b/packages/api/src/messages/socket.ts index 4e9ca44..c98e9db 100644 --- a/packages/api/src/messages/socket.ts +++ b/packages/api/src/messages/socket.ts @@ -15,8 +15,10 @@ import { WorkspaceModeChangedMessage, } from "./drone.ts"; import { + ReleaseSessionLockMessage, RequestSessionLockMessage, RequestWorkspaceModeMessage, + SessionHeartbeatMessage, SubmitPromptMessage, } from "./ide.ts"; @@ -46,6 +48,8 @@ export interface ClientToServerEvents { requestSessionLock: RequestSessionLockMessage; requestWorkspaceMode: RequestWorkspaceModeMessage; submitPrompt: SubmitPromptMessage; + releaseSessionLock: ReleaseSessionLockMessage; + sessionHeartbeat: SessionHeartbeatMessage; /* * gadget-drone => gadget-code:web @@ -68,6 +72,8 @@ export interface ServerToClientEvents { requestSessionLock: RequestSessionLockMessage; requestWorkspaceMode: RequestWorkspaceModeMessage; + releaseSessionLock: ReleaseSessionLockMessage; + sessionHeartbeat: SessionHeartbeatMessage; processWorkOrder: ProcessWorkOrderMessage; crashRecoveryResponse: CrashRecoveryResponseMessage; requestTermination: RequestTerminationMessage;