ai-interviewer/server/src/interview/interview.gateway.ts

243 lines
8.3 KiB
TypeScript

import {
WebSocketGateway,
WebSocketServer,
SubscribeMessage,
MessageBody,
ConnectedSocket,
OnGatewayInit,
OnGatewayConnection,
OnGatewayDisconnect,
} from '@nestjs/websockets';
import { Logger } from '@nestjs/common';
import { Server, Socket } from 'socket.io';
import { OrchestratorService } from './services/orchestrator.service';
import { FaceAuthService } from '../face-auth/face-auth.service';
import { CandidateService } from '../candidate/candidate.service';
/**
* InterviewGateway — real-time WebSocket entry point for AI interviews.
*
* Handles:
* - Room joining (linking a socket to a session)
* - Streaming audio chunks → STT (real-time via WebSocket)
* - VAD events are emitted automatically by the STT provider
* - TTS audio chunks are streamed back as they are generated
* - Manual end-of-speech fallback for providers without VAD
* - Face verification (single frame from webcam)
*/
@WebSocketGateway({
cors: { origin: '*' },
namespace: '/interview',
})
export class InterviewGateway
implements OnGatewayInit, OnGatewayConnection, OnGatewayDisconnect {
@WebSocketServer()
server: Server;
private readonly logger = new Logger(InterviewGateway.name);
/** Map socket → sessionId for cleanup */
private socketSessions: Map<string, string> = new Map();
constructor(
private readonly orchestrator: OrchestratorService,
private readonly faceAuth: FaceAuthService,
private readonly candidateService: CandidateService,
) { }
afterInit() {
this.logger.log('Interview WebSocket Gateway initialized');
}
handleConnection(client: Socket) {
this.logger.log(`Client connected: ${client.id}`);
}
handleDisconnect(client: Socket) {
const sessionId = this.socketSessions.get(client.id);
if (sessionId) {
this.logger.log(
`Client ${client.id} disconnected — cleaning up session ${sessionId}`,
);
this.orchestrator.endSession(sessionId).catch((err) => {
this.logger.error(`Cleanup error: ${err}`);
});
this.socketSessions.delete(client.id);
}
}
// ──────────────── Socket Events ────────────────
/**
* Client joins an interview session room.
* Initializes the streaming STT + TTS connections for this session.
*
* Payload: { candidateId: string }
*/
@SubscribeMessage('join-room')
async handleJoinRoom(
@ConnectedSocket() client: Socket,
@MessageBody() data: { candidateId: string },
) {
try {
const session = await this.orchestrator.createSession(data.candidateId);
const sessionId = session._id.toString();
client.join(sessionId);
this.socketSessions.set(client.id, sessionId);
// Initialize streaming with the socket directly
await this.orchestrator.initStreaming(sessionId, client);
this.logger.log(
`Client ${client.id} joined session ${sessionId} — streaming ready`,
);
client.emit('session-created', {
sessionId,
stage: session.currentStage,
});
} catch (err) {
client.emit('error', { message: `Failed to join room: ${err}` });
}
}
/**
* Receive a base64-encoded audio chunk from the client's microphone.
* Piped directly to the STT streaming WebSocket (no buffering).
*
* Payload: { audio: string } (base64) or Buffer (binary array from Socket.io)
*/
@SubscribeMessage('audio-chunk')
handleAudioChunk(
@ConnectedSocket() client: Socket,
@MessageBody() data: any,
) {
const sessionId = this.socketSessions.get(client.id);
if (!sessionId) {
client.emit('error', { message: 'Not in a session' });
return;
}
let audioBase64: string;
if (Buffer.isBuffer(data)) {
// It's a binary buffer from Socket.io
audioBase64 = data.toString('base64');
} else if (data instanceof ArrayBuffer) {
audioBase64 = Buffer.from(data).toString('base64');
} else if (typeof data === 'string') {
audioBase64 = data;
} else if (data && data.audio) {
audioBase64 = data.audio;
} else {
this.logger.warn(`Unknown audio chunk format received: ${typeof data}`);
return;
}
// Print dot to let us know chunks are flowing without spamming logs
process.stdout.write('.');
this.orchestrator.streamAudioChunk(sessionId, audioBase64);
}
/**
* Manual end-of-speech signal (fallback for providers without VAD).
* Triggers the STT → Brain → TTS pipeline with accumulated transcript.
*/
@SubscribeMessage('end-of-speech')
async handleEndOfSpeech(@ConnectedSocket() client: Socket) {
const sessionId = this.socketSessions.get(client.id);
if (!sessionId) return;
try {
await this.orchestrator.triggerPipeline(sessionId);
} catch (err) {
this.logger.error(`Pipeline error: ${err}`);
client.emit('error', { message: 'Pipeline processing failed' });
client.emit('ai-state', { state: 'listening' });
}
}
/**
* Receive a video frame (JPEG buffer) for face verification.
* Payload: { candidateId: string, frame: Buffer }
*/
@SubscribeMessage('face-verify')
async handleFaceVerify(
@ConnectedSocket() client: Socket,
@MessageBody() data: { candidateId: string; frame: Buffer },
) {
try {
const candidate = await this.candidateService.findById(data.candidateId);
// Extract descriptor from the incoming frame
const frameBuffer = Buffer.from(data.frame);
const incomingDescriptor =
await this.faceAuth.extractDescriptorFromBuffer(frameBuffer);
if (!incomingDescriptor) {
client.emit('face-result', {
verified: false,
message: 'No face detected in frame',
});
return;
}
// If no stored descriptor, save this one as reference
if (
!candidate.faceDescriptor ||
candidate.faceDescriptor.length === 0
) {
candidate.faceDescriptor = Array.from(incomingDescriptor);
candidate.captureFaceOnCall = false;
await candidate.save();
client.emit('face-result', {
verified: true,
message: 'Reference face captured and saved',
});
return;
}
// Compare with stored descriptor
const result = this.faceAuth.verifyFace(
incomingDescriptor,
candidate.faceDescriptor,
);
// Update session verification status
const sessionId = this.socketSessions.get(client.id);
if (sessionId) {
const session = await this.orchestrator.getSession(sessionId);
session.faceVerified = result.match;
await session.save();
}
client.emit('face-result', {
verified: result.match,
distance: result.distance,
message: result.match
? 'Face verified successfully'
: 'Face mismatch — flagged in report',
});
} catch (err) {
this.logger.error(`Face verify error: ${err}`);
client.emit('face-result', {
verified: false,
message: `Verification error: ${err}`,
});
}
}
/**
* End the interview session — closes streaming connections.
*/
@SubscribeMessage('end-interview')
async handleEndInterview(@ConnectedSocket() client: Socket) {
const sessionId = this.socketSessions.get(client.id);
if (!sessionId) return;
await this.orchestrator.endSession(sessionId);
client.emit('interview-ended', { sessionId });
this.socketSessions.delete(client.id);
}
}