feature done: Ai interviewer

This commit is contained in:
Joydeep Pandey 2026-02-20 13:00:50 +05:30
commit f6295196be
59 changed files with 16112 additions and 0 deletions

30
.env.example Normal file
View File

@ -0,0 +1,30 @@
# ── MongoDB ──
MONGODB_URI=mongodb://localhost:27017/ai-interviewer
# ── LLM (Cerebras) ──
CEREBRAS_API_KEY=
# ── Voice Provider: "sarvam" or "deepgram" ──
VOICE_PROVIDER=sarvam
# ── Sarvam AI ──
SARVAM_API_KEY=
SARVAM_STT_LANGUAGE=en-IN
SARVAM_STT_MODEL=saarika:v1
SARVAM_TTS_LANGUAGE=en-IN
SARVAM_TTS_MODEL=bulbul:v2
SARVAM_TTS_SPEAKER=anushka
SARVAM_TTS_PACE=1.0
SARVAM_TTS_TEMPERATURE=0.6
SARVAM_TTS_CODEC=wav
# ── Deepgram (alternative) ──
DEEPGRAM_API_KEY=
DEEPGRAM_STT_LANGUAGE=en-US
DEEPGRAM_STT_MODEL=nova-2
DEEPGRAM_TTS_MODEL=aura-asteria-en
DEEPGRAM_TTS_CODEC=wav
# ── Server ──
PORT=3001
CORS_ORIGIN=http://localhost:5173

7
.gitignore vendored Normal file
View File

@ -0,0 +1,7 @@
node_modules
.env
.vscode
.idea
.DS_Store
dist
uploads

73
README.md Normal file
View File

@ -0,0 +1,73 @@
# AI Interviewer
A real-time AI-powered interview platform with audio streaming, face verification, and intelligent evaluation.
## Architecture
```
ai_interview_assistant/
├── server/ # NestJS backend
│ └── src/
│ ├── candidate/ # Profile, resume upload, OCR
│ ├── face-auth/ # face-api.js face verification
│ ├── interview/ # WebSocket gateway, orchestrator, voice providers
│ └── brain/ # Cerebras LLM integration, evaluation
├── client/ # React + Vite frontend
│ └── src/
│ ├── components/ # Avatar
│ ├── pages/ # Onboarding, InterviewRoom
│ ├── hooks/ # useSocket, useAudioRecorder
│ └── services/ # REST API client
└── .env.example # Environment variables template
```
## Quick Start
### 1. Environment Setup
```bash
cp .env.example .env
# Fill in your API keys: DEEPGRAM_API_KEY, SARVAM_API_KEY, CEREBRAS_API_KEY
```
### 2. Backend
```bash
cd server
npm install
npm run start:dev # http://localhost:3001
```
> **Note:** `face-api.js` requires model weight files in `server/face-models/`. Download them from [face-api.js models](https://github.com/justadudewhohacks/face-api.js/tree/master/weights).
### 3. Frontend
```bash
cd client
npm install
npm run dev # http://localhost:5173
```
### 4. MongoDB
Ensure MongoDB is running on `localhost:27017` (or update `MONGODB_URI` in `.env`).
## Tech Stack
| Layer | Technology |
| -------- | ---------------------------------------- |
| Backend | NestJS, Mongoose, Socket.io, face-api.js |
| Frontend | React 19, Vite, TailwindCSS, socket.io |
| LLM | Cerebras (llama-4-scout) |
| STT/TTS | Deepgram (primary), Sarvam (fallback) |
| OCR | pdf-parse + tesseract.js |
| Database | MongoDB |
## Key Features
- **Real-time audio streaming** via WebSocket (Socket.io)
- **Face verification** on interview start (flag-only, non-blocking)
- **Resume OCR** supporting text PDFs and scanned images
- **Stage-aware interviewing** (Intro → Technical → Behavioral → Wrap-up)
- **Structured evaluation** with per-dimension ratings and reviews
- **Swappable voice providers** (Deepgram ↔ Sarvam via env config)

22
client/index.html Normal file
View File

@ -0,0 +1,22 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<meta
name="description"
content="AI-powered interview platform with real-time voice interaction, face verification, and intelligent evaluation."
/>
<title>AI Interviewer</title>
<link rel="preconnect" href="https://fonts.googleapis.com" />
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
<link
href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&display=swap"
rel="stylesheet"
/>
</head>
<body class="bg-surface-950 text-white antialiased">
<div id="root"></div>
<script type="module" src="/src/main.tsx"></script>
</body>
</html>

2890
client/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

28
client/package.json Normal file
View File

@ -0,0 +1,28 @@
{
"name": "ai-interviewer-client",
"private": true,
"version": "1.0.0",
"type": "module",
"scripts": {
"dev": "vite",
"build": "tsc -b && vite build",
"preview": "vite preview"
},
"dependencies": {
"react": "^19.0.0",
"react-dom": "^19.0.0",
"react-router-dom": "^7.1.0",
"react-webcam": "^7.2.0",
"socket.io-client": "^4.8.0"
},
"devDependencies": {
"@types/react": "^19.0.0",
"@types/react-dom": "^19.0.0",
"@vitejs/plugin-react": "^4.3.0",
"autoprefixer": "^10.4.20",
"postcss": "^8.4.49",
"tailwindcss": "^3.4.16",
"typescript": "^5.7.0",
"vite": "^6.0.0"
}
}

6
client/postcss.config.js Normal file
View File

@ -0,0 +1,6 @@
export default {
plugins: {
tailwindcss: {},
autoprefixer: {},
},
};

31
client/src/App.tsx Normal file
View File

@ -0,0 +1,31 @@
import { BrowserRouter, Routes, Route } from "react-router-dom";
import CandidateOnboarding from "./pages/CandidateOnboarding";
import InterviewRoom from "./pages/InterviewRoom";
function App() {
return (
<BrowserRouter>
<div className="min-h-screen bg-surface-950">
{/* ── Header ── */}
<header className="fixed top-0 left-0 right-0 z-50 glass border-b border-surface-700/30">
<div className="max-w-7xl mx-auto px-6 py-4 flex items-center justify-between">
<h1 className="text-xl font-bold gradient-text tracking-tight">
AI Interviewer
</h1>
<span className="text-xs text-surface-200/40 font-mono">v1.0</span>
</div>
</header>
{/* ── Routes ── */}
<main className="pt-20">
<Routes>
<Route path="/" element={<CandidateOnboarding />} />
<Route path="/interview/:sessionId" element={<InterviewRoom />} />
</Routes>
</main>
</div>
</BrowserRouter>
);
}
export default App;

View File

@ -0,0 +1,85 @@
import { useEffect, useState } from "react";
import type { AIState } from "../hooks/useSocket";
// You can replace these with actual avatar assets from your public folder
const IMG_STATIC =
"https://api.dicebear.com/7.x/bottts/svg?seed=Felix&mouth=smile01";
const IMG_TALK_1 =
"https://api.dicebear.com/7.x/bottts/svg?seed=Felix&mouth=smile02";
const IMG_TALK_2 =
"https://api.dicebear.com/7.x/bottts/svg?seed=Felix&mouth=smile03";
const FRAMES = [IMG_STATIC, IMG_TALK_1, IMG_TALK_2];
interface AvatarProps {
state: AIState;
}
export default function Avatar({ state }: AvatarProps) {
const [frameIndex, setFrameIndex] = useState(0);
// Cycle frames when speaking to simulate mouth movement
useEffect(() => {
if (state !== "speaking") {
setFrameIndex(0); // Show static frame when not speaking
return;
}
const interval = setInterval(() => {
setFrameIndex((prev) => (prev + 1) % 3);
}, 200);
return () => clearInterval(interval);
}, [state]);
const stateLabels: Record<AIState, string> = {
idle: "Ready",
listening: "Listening…",
thinking: "Thinking…",
speaking: "Speaking…",
};
return (
<div className="flex flex-col items-center gap-4">
{/* Avatar container */}
<div className="relative">
{/* Outer pulse ring (listening) */}
{state === "listening" && (
<div className="absolute inset-[-12px] rounded-full pulse-ring bg-emerald-500/20" />
)}
{/* Main Image Frame */}
<div
className={`
w-48 h-48 rounded-full flex items-center justify-center overflow-hidden
bg-surface-700 border-4 shadow-2xl transition-all duration-300
${state === "speaking" ? "border-primary-500 shadow-primary-500/40" : ""}
${state === "listening" ? "border-emerald-500 shadow-emerald-500/40" : ""}
${state === "thinking" ? "border-amber-500 shadow-amber-500/40" : ""}
${state === "idle" ? "border-surface-600 shadow-none" : ""}
`}
>
<img
src={FRAMES[frameIndex]}
alt="AI Avatar"
className="w-full h-full object-cover"
/>
</div>
</div>
{/* State label */}
<div
className={`
px-4 py-1.5 rounded-full text-sm font-medium
${state === "listening" ? "bg-emerald-500/20 text-emerald-300" : ""}
${state === "thinking" ? "bg-amber-500/20 text-amber-300" : ""}
${state === "speaking" ? "bg-primary-500/20 text-primary-300" : ""}
${state === "idle" ? "bg-surface-700/30 text-surface-200/60" : ""}
transition-all duration-300
`}
>
{stateLabels[state]}
</div>
</div>
);
}

View File

@ -0,0 +1,116 @@
import { useState, useRef, useCallback } from 'react';
interface UseAudioRecorderOptions {
onChunk?: (chunk: ArrayBuffer) => void;
onSilence?: () => void;
silenceThreshold?: number;
silenceTimeout?: number;
}
export function useAudioRecorder(options: UseAudioRecorderOptions = {}) {
const {
onChunk,
onSilence,
silenceThreshold = 0.01,
silenceTimeout = 1500,
} = options;
const [isRecording, setIsRecording] = useState(false);
const audioCtxRef = useRef<AudioContext | null>(null);
const mediaStreamRef = useRef<MediaStream | null>(null);
const processorRef = useRef<ScriptProcessorNode | null>(null);
const silenceTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
const startRecording = useCallback(async () => {
try {
const stream = await navigator.mediaDevices.getUserMedia({
audio: {
echoCancellation: true,
noiseSuppression: true,
sampleRate: 16000,
},
});
mediaStreamRef.current = stream;
// Enforce passing a known sampleRate when supported
const AudioContext = window.AudioContext || (window as any).webkitAudioContext;
const audioCtx = new AudioContext({ sampleRate: 16000 });
audioCtxRef.current = audioCtx;
const source = audioCtx.createMediaStreamSource(stream);
// Buffer size of 4096 is ~256ms at 16000Hz
const processor = audioCtx.createScriptProcessor(4096, 1, 1);
processorRef.current = processor;
source.connect(processor);
processor.connect(audioCtx.destination);
setIsRecording(true);
processor.onaudioprocess = (e) => {
const float32Array = e.inputBuffer.getChannelData(0);
// Check for silence (RMS)
let sum = 0;
for (let i = 0; i < float32Array.length; i++) {
sum += float32Array[i] * float32Array[i];
}
const rms = Math.sqrt(sum / float32Array.length);
if (rms < silenceThreshold) {
if (!silenceTimerRef.current) {
silenceTimerRef.current = setTimeout(() => {
onSilence?.();
silenceTimerRef.current = null;
}, silenceTimeout);
}
} else {
if (silenceTimerRef.current) {
clearTimeout(silenceTimerRef.current);
silenceTimerRef.current = null;
}
}
if (onChunk) {
// Convert float32 to int16 (PCM)
const int16Buffer = new Int16Array(float32Array.length);
for (let i = 0; i < float32Array.length; i++) {
let s = Math.max(-1, Math.min(1, float32Array[i]));
int16Buffer[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
}
onChunk(int16Buffer.buffer);
}
};
} catch (err) {
console.error('Failed to start recording:', err);
}
}, [onChunk, onSilence, silenceThreshold, silenceTimeout]);
const stopRecording = useCallback(() => {
if (processorRef.current) {
processorRef.current.disconnect();
processorRef.current = null;
}
if (audioCtxRef.current) {
audioCtxRef.current.close();
audioCtxRef.current = null;
}
if (mediaStreamRef.current) {
mediaStreamRef.current.getTracks().forEach((t) => t.stop());
mediaStreamRef.current = null;
}
if (silenceTimerRef.current) {
clearTimeout(silenceTimerRef.current);
silenceTimerRef.current = null;
}
setIsRecording(false);
}, []);
return { isRecording, startRecording, stopRecording };
}

View File

@ -0,0 +1,126 @@
import { useEffect, useRef, useCallback, useState } from 'react';
import { io, Socket } from 'socket.io-client';
const SOCKET_URL = import.meta.env.VITE_SOCKET_URL || 'http://localhost:3001';
export type AIState = 'idle' | 'listening' | 'thinking' | 'speaking';
interface UseSocketOptions {
onTranscript?: (text: string) => void;
onAudioResponse?: (audio: ArrayBuffer) => void;
onStateChange?: (state: AIState) => void;
onSessionCreated?: (data: { sessionId: string; stage: string }) => void;
onFaceResult?: (data: { verified: boolean; message: string }) => void;
onError?: (err: { message: string }) => void;
onInterviewEnded?: (data: { sessionId: string }) => void;
}
/**
* Custom hook managing the Socket.io connection to the interview gateway.
* Handles audio chunk emission, TTS playback, and all socket events.
*/
export function useSocket(options: UseSocketOptions = {}) {
const socketRef = useRef<Socket | null>(null);
const [connected, setConnected] = useState(false);
const [aiState, setAiState] = useState<AIState>('idle');
const audioContextRef = useRef<AudioContext | null>(null);
// ── Connect on mount ──
useEffect(() => {
const socket = io(`${SOCKET_URL}/interview`, {
transports: ['websocket'],
autoConnect: true,
});
socketRef.current = socket;
socket.on('connect', () => setConnected(true));
socket.on('disconnect', () => setConnected(false));
socket.on('session-created', (data) => {
options.onSessionCreated?.(data);
});
socket.on('ai-transcript', (data: { text: string }) => {
options.onTranscript?.(data.text);
});
socket.on('ai-audio', (audioData: ArrayBuffer) => {
options.onAudioResponse?.(audioData);
playAudio(audioData);
});
socket.on('ai-state', (data: { state: AIState }) => {
setAiState(data.state);
options.onStateChange?.(data.state);
});
socket.on('face-result', (data) => {
options.onFaceResult?.(data);
});
socket.on('interview-ended', (data) => {
options.onInterviewEnded?.(data);
});
socket.on('error', (err) => {
options.onError?.(err);
});
return () => {
socket.disconnect();
};
// eslint-disable-next-line react-hooks/exhaustive-deps
}, []);
// ── Audio playback ──
const playAudio = useCallback(async (audioData: ArrayBuffer) => {
try {
if (!audioContextRef.current) {
audioContextRef.current = new AudioContext();
}
const ctx = audioContextRef.current;
const audioBuffer = await ctx.decodeAudioData(audioData.slice(0));
const source = ctx.createBufferSource();
source.buffer = audioBuffer;
source.connect(ctx.destination);
source.start(0);
} catch (err) {
console.error('Audio playback failed:', err);
}
}, []);
// ── Emit helpers ──
const joinRoom = useCallback((candidateId: string) => {
socketRef.current?.emit('join-room', { candidateId });
}, []);
const sendAudioChunk = useCallback((chunk: ArrayBuffer) => {
socketRef.current?.emit('audio-chunk', chunk);
}, []);
const signalEndOfSpeech = useCallback(() => {
socketRef.current?.emit('end-of-speech');
}, []);
const sendFaceFrame = useCallback(
(candidateId: string, frame: ArrayBuffer) => {
socketRef.current?.emit('face-verify', { candidateId, frame });
},
[],
);
const endInterview = useCallback(() => {
socketRef.current?.emit('end-interview');
}, []);
return {
connected,
aiState,
joinRoom,
sendAudioChunk,
signalEndOfSpeech,
sendFaceFrame,
endInterview,
};
}

120
client/src/index.css Normal file
View File

@ -0,0 +1,120 @@
@tailwind base;
@tailwind components;
@tailwind utilities;
/* ─── Base Styles ─── */
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: "Inter", system-ui, sans-serif;
background: #020617;
color: #f8fafc;
min-height: 100vh;
}
/* ─── Scrollbar ─── */
::-webkit-scrollbar {
width: 6px;
}
::-webkit-scrollbar-track {
background: #0f172a;
}
::-webkit-scrollbar-thumb {
background: #334155;
border-radius: 3px;
}
/* ─── Glass Card ─── */
.glass {
background: rgba(15, 23, 42, 0.6);
backdrop-filter: blur(20px);
border: 1px solid rgba(99, 102, 241, 0.15);
border-radius: 16px;
}
/* ─── Avatar Animation ─── */
@keyframes avatar-speak {
0%,
100% {
transform: scale(1);
opacity: 1;
}
50% {
transform: scale(1.02);
opacity: 0.9;
}
}
@keyframes spin-slow {
from {
transform: rotate(0deg);
}
to {
transform: rotate(360deg);
}
}
.avatar-speaking {
animation: avatar-speak 0.4s ease-in-out infinite;
}
.animate-spin-slow {
animation: spin-slow 8s linear infinite;
}
/* ─── Pulse Ring (Listening State) ─── */
@keyframes pulse-ring {
0% {
box-shadow: 0 0 0 0 rgba(99, 102, 241, 0.5);
}
70% {
box-shadow: 0 0 0 20px rgba(99, 102, 241, 0);
}
100% {
box-shadow: 0 0 0 0 rgba(99, 102, 241, 0);
}
}
.pulse-ring {
animation: pulse-ring 2s ease-out infinite;
}
/* ─── Gradient Text ─── */
.gradient-text {
background: linear-gradient(135deg, #818cf8, #6366f1, #a78bfa);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
background-clip: text;
}
/* ─── Button Styles ─── */
.btn-primary {
@apply px-6 py-3 bg-primary-600 hover:bg-primary-500 text-white font-semibold
rounded-xl transition-all duration-200 shadow-lg shadow-primary-600/25
hover:shadow-primary-500/40 active:scale-[0.98];
}
.btn-danger {
@apply px-6 py-3 bg-red-600 hover:bg-red-500 text-white font-semibold
rounded-xl transition-all duration-200 shadow-lg shadow-red-600/25
hover:shadow-red-500/40 active:scale-[0.98];
}
/* ─── Input Styles ─── */
.input-field {
@apply w-full px-4 py-3 bg-surface-900/50 border border-surface-700/50
rounded-xl text-white placeholder-surface-200/30 focus:outline-none
focus:ring-2 focus:ring-primary-500/50 focus:border-primary-500/50
transition-all duration-200;
}
/* ─── File Upload ─── */
.file-upload {
@apply w-full px-4 py-8 border-2 border-dashed border-surface-700/50
rounded-xl text-center cursor-pointer hover:border-primary-500/50
hover:bg-primary-500/5 transition-all duration-200;
}

10
client/src/main.tsx Normal file
View File

@ -0,0 +1,10 @@
import React from "react";
import ReactDOM from "react-dom/client";
import App from "./App";
import "./index.css";
ReactDOM.createRoot(document.getElementById("root")!).render(
<React.StrictMode>
<App />
</React.StrictMode>,
);

View File

@ -0,0 +1,215 @@
import { useState, useRef, type FormEvent, type ChangeEvent } from "react";
import { useNavigate } from "react-router-dom";
import { createCandidate } from "../services/api";
/**
* CandidateOnboarding page collects candidate info, resume, and photo
* before starting the interview.
*/
export default function CandidateOnboarding() {
const navigate = useNavigate();
const [name, setName] = useState("");
const [email, setEmail] = useState("");
const [experienceSummary, setExperienceSummary] = useState("");
const [resumeFile, setResumeFile] = useState<File | null>(null);
const [photoFile, setPhotoFile] = useState<File | null>(null);
const [loading, setLoading] = useState(false);
const [error, setError] = useState("");
const resumeInputRef = useRef<HTMLInputElement>(null);
const photoInputRef = useRef<HTMLInputElement>(null);
const handleSubmit = async (e: FormEvent) => {
e.preventDefault();
setError("");
setLoading(true);
try {
const formData = new FormData();
formData.append("name", name);
formData.append("email", email);
if (experienceSummary)
formData.append("experienceSummary", experienceSummary);
if (resumeFile) formData.append("resume", resumeFile);
if (photoFile) formData.append("profilePicture", photoFile);
const candidate = await createCandidate(formData);
// Navigate to interview room with candidate data
navigate(`/interview/${candidate._id}`, {
state: { candidateId: candidate._id, candidateName: candidate.name },
});
} catch (err: any) {
setError(err.message || "Something went wrong");
} finally {
setLoading(false);
}
};
const handleFileChange =
(setter: (f: File | null) => void) =>
(e: ChangeEvent<HTMLInputElement>) => {
setter(e.target.files?.[0] || null);
};
return (
<div className="min-h-[calc(100vh-5rem)] flex items-center justify-center px-4 py-12">
<div className="glass w-full max-w-lg p-8">
{/* ── Header ── */}
<div className="text-center mb-8">
<h2 className="text-3xl font-bold gradient-text mb-2">
Welcome, Candidate
</h2>
<p className="text-surface-200/60 text-sm">
Fill in your details to start the AI-powered interview
</p>
</div>
{/* ── Form ── */}
<form onSubmit={handleSubmit} className="space-y-5">
{/* Name */}
<div>
<label className="block text-sm font-medium text-surface-200/80 mb-1.5">
Full Name *
</label>
<input
id="input-name"
type="text"
className="input-field"
placeholder="John Doe"
value={name}
onChange={(e) => setName(e.target.value)}
required
/>
</div>
{/* Email */}
<div>
<label className="block text-sm font-medium text-surface-200/80 mb-1.5">
Email *
</label>
<input
id="input-email"
type="email"
className="input-field"
placeholder="john@example.com"
value={email}
onChange={(e) => setEmail(e.target.value)}
required
/>
</div>
{/* Experience Summary */}
<div>
<label className="block text-sm font-medium text-surface-200/80 mb-1.5">
Experience Summary
</label>
<textarea
id="input-experience"
className="input-field min-h-[80px] resize-y"
placeholder="Brief summary of your experience..."
value={experienceSummary}
onChange={(e) => setExperienceSummary(e.target.value)}
rows={3}
/>
</div>
{/* Resume Upload */}
<div>
<label className="block text-sm font-medium text-surface-200/80 mb-1.5">
Resume (PDF or Image)
</label>
<div
className="file-upload"
onClick={() => resumeInputRef.current?.click()}
>
<input
ref={resumeInputRef}
type="file"
accept=".pdf,.png,.jpg,.jpeg"
className="hidden"
onChange={handleFileChange(setResumeFile)}
/>
{resumeFile ? (
<span className="text-primary-400 font-medium">
📄 {resumeFile.name}
</span>
) : (
<span className="text-surface-200/40">
Click to upload resume
</span>
)}
</div>
</div>
{/* Photo Upload */}
<div>
<label className="block text-sm font-medium text-surface-200/80 mb-1.5">
Profile Photo (optional)
</label>
<div
className="file-upload"
onClick={() => photoInputRef.current?.click()}
>
<input
ref={photoInputRef}
type="file"
accept=".png,.jpg,.jpeg"
className="hidden"
onChange={handleFileChange(setPhotoFile)}
/>
{photoFile ? (
<span className="text-primary-400 font-medium">
📷 {photoFile.name}
</span>
) : (
<span className="text-surface-200/40">
Click to upload photo
</span>
)}
</div>
</div>
{/* Error */}
{error && (
<div className="text-red-400 text-sm bg-red-500/10 px-4 py-2 rounded-lg">
{error}
</div>
)}
{/* Submit */}
<button
id="btn-start-interview"
type="submit"
disabled={loading || !name || !email}
className="btn-primary w-full disabled:opacity-40 disabled:cursor-not-allowed"
>
{loading ? (
<span className="flex items-center justify-center gap-2">
<svg className="animate-spin h-5 w-5" viewBox="0 0 24 24">
<circle
className="opacity-25"
cx="12"
cy="12"
r="10"
stroke="currentColor"
strokeWidth="4"
fill="none"
/>
<path
className="opacity-75"
fill="currentColor"
d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z"
/>
</svg>
Processing
</span>
) : (
"Start Interview →"
)}
</button>
</form>
</div>
</div>
);
}

View File

@ -0,0 +1,315 @@
import { useEffect, useRef, useState, useCallback } from "react";
import { useParams, useLocation, useNavigate } from "react-router-dom";
import Webcam from "react-webcam";
import Avatar from "../components/Avatar";
import { useSocket, type AIState } from "../hooks/useSocket";
import { useAudioRecorder } from "../hooks/useAudioRecorder";
import { evaluateInterview } from "../services/api";
interface TranscriptLine {
role: "user" | "ai";
text: string;
}
/**
* InterviewRoom the core interview experience.
*
* Left: User's webcam feed
* Right: AI Avatar + transcript
*
* Audio is captured from the mic, streamed via socket as binary chunks,
* and TTS audio is played back through the Web Audio API.
*/
export default function InterviewRoom() {
const { sessionId } = useParams<{ sessionId: string }>();
const location = useLocation();
const navigate = useNavigate();
const candidateId = (location.state as any)?.candidateId || sessionId || "";
const [transcript, setTranscript] = useState<TranscriptLine[]>([]);
const [currentSessionId, setCurrentSessionId] = useState("");
const [faceStatus, setFaceStatus] = useState<string>("");
const [interviewEnded, setInterviewEnded] = useState(false);
const [evaluation, setEvaluation] = useState<any>(null);
const webcamRef = useRef<Webcam>(null);
const transcriptEndRef = useRef<HTMLDivElement>(null);
// ── Socket connection ──
const {
connected,
aiState,
joinRoom,
sendAudioChunk,
signalEndOfSpeech,
sendFaceFrame,
endInterview,
} = useSocket({
onSessionCreated: (data) => {
setCurrentSessionId(data.sessionId);
// Send face verification frame once session is established
setTimeout(() => captureAndVerifyFace(), 2000);
},
onTranscript: (text) => {
setTranscript((prev) => [...prev, { role: "ai", text }]);
},
onFaceResult: (data) => {
setFaceStatus(data.message);
},
onInterviewEnded: async (data) => {
setInterviewEnded(true);
try {
const result = (await evaluateInterview(data.sessionId)) as any;
setEvaluation(result.evaluation);
} catch {
// Evaluation fetch failed — non-critical
}
},
onError: (err) => {
console.error("Socket error:", err.message);
},
});
// ── Audio recording with silence detection ──
const { isRecording, startRecording, stopRecording } = useAudioRecorder({
onChunk: (chunk) => {
sendAudioChunk(chunk);
},
onSilence: () => {
// User stopped speaking — trigger pipeline
signalEndOfSpeech();
setTranscript((prev) => {
// Add a placeholder for user speech (will be refined by STT)
if (prev.length === 0 || prev[prev.length - 1].role === "ai") {
return [...prev, { role: "user", text: "(processing speech…)" }];
}
return prev;
});
},
});
// ── Auto-scroll transcript ──
useEffect(() => {
transcriptEndRef.current?.scrollIntoView({ behavior: "smooth" });
}, [transcript]);
// ── Join room on mount ──
useEffect(() => {
if (connected && candidateId) {
joinRoom(candidateId);
}
}, [connected, candidateId, joinRoom]);
// ── Face verification ──
const captureAndVerifyFace = useCallback(() => {
if (!webcamRef.current || !candidateId) return;
const canvas = webcamRef.current.getCanvas();
if (!canvas) return;
canvas.toBlob((blob) => {
if (!blob) return;
blob.arrayBuffer().then((buffer) => {
sendFaceFrame(candidateId, buffer);
});
}, "image/jpeg");
}, [candidateId, sendFaceFrame]);
// ── End interview handler ──
const handleEndInterview = () => {
stopRecording();
endInterview();
};
// ── Render: Evaluation results ──
if (interviewEnded && evaluation) {
return (
<div className="min-h-[calc(100vh-5rem)] flex items-center justify-center px-4 py-12">
<div className="glass w-full max-w-2xl p-8">
<h2 className="text-2xl font-bold gradient-text mb-6 text-center">
Interview Complete
</h2>
<div className="grid grid-cols-2 gap-4 mb-8">
{Object.entries(evaluation).map(([key, val]: [string, any]) => (
<div key={key} className="glass p-4">
<h3 className="text-sm font-semibold text-primary-300 uppercase tracking-wider mb-1">
{key}
</h3>
<div className="flex items-baseline gap-2 mb-2">
<span className="text-3xl font-bold text-white">
{val.rating}
</span>
<span className="text-surface-200/40 text-sm">/ 10</span>
</div>
<p className="text-surface-200/60 text-sm leading-relaxed">
{val.review_message}
</p>
</div>
))}
</div>
<button onClick={() => navigate("/")} className="btn-primary w-full">
Back to Home
</button>
</div>
</div>
);
}
// ── Render: Interview room ──
return (
<div className="min-h-[calc(100vh-5rem)] px-4 py-6">
<div className="max-w-7xl mx-auto grid grid-cols-1 lg:grid-cols-2 gap-6 h-[calc(100vh-8rem)]">
{/* ─── LEFT: User Video ─── */}
<div className="glass p-4 flex flex-col">
<div className="flex items-center justify-between mb-3">
<h3 className="text-sm font-semibold text-surface-200/60 uppercase tracking-wider">
Your Camera
</h3>
<div className="flex items-center gap-2">
<div
className={`w-2 h-2 rounded-full ${
connected ? "bg-emerald-400" : "bg-red-400"
}`}
/>
<span className="text-xs text-surface-200/40">
{connected ? "Connected" : "Connecting…"}
</span>
</div>
</div>
<div className="flex-1 rounded-xl overflow-hidden bg-surface-900/50 relative">
<Webcam
ref={webcamRef}
audio={false}
mirrored
className="w-full h-full object-cover"
videoConstraints={{
facingMode: "user",
width: 640,
height: 480,
}}
/>
{/* Face status badge */}
{faceStatus && (
<div className="absolute bottom-3 left-3 right-3">
<div
className={`text-xs px-3 py-1.5 rounded-lg backdrop-blur-md ${
faceStatus.includes("verified") ||
faceStatus.includes("captured")
? "bg-emerald-500/20 text-emerald-300"
: "bg-amber-500/20 text-amber-300"
}`}
>
{faceStatus}
</div>
</div>
)}
</div>
{/* Controls */}
<div className="flex items-center justify-center gap-4 mt-4">
<button
id="btn-toggle-mic"
onClick={isRecording ? stopRecording : startRecording}
className={`
w-14 h-14 rounded-full flex items-center justify-center
transition-all duration-200
${
isRecording
? "bg-red-500 shadow-lg shadow-red-500/30 hover:bg-red-400"
: "bg-primary-600 shadow-lg shadow-primary-600/30 hover:bg-primary-500"
}
`}
>
{isRecording ? (
// Stop icon
<svg
className="w-6 h-6 text-white"
fill="currentColor"
viewBox="0 0 24 24"
>
<rect x="6" y="6" width="12" height="12" rx="2" />
</svg>
) : (
// Mic icon
<svg
className="w-6 h-6 text-white"
fill="none"
stroke="currentColor"
strokeWidth="2"
viewBox="0 0 24 24"
>
<path
strokeLinecap="round"
strokeLinejoin="round"
d="M12 1a3 3 0 00-3 3v8a3 3 0 006 0V4a3 3 0 00-3-3z"
/>
<path
strokeLinecap="round"
strokeLinejoin="round"
d="M19 10v2a7 7 0 01-14 0v-2"
/>
<line x1="12" y1="19" x2="12" y2="23" />
<line x1="8" y1="23" x2="16" y2="23" />
</svg>
)}
</button>
<button
id="btn-end-interview"
onClick={handleEndInterview}
className="btn-danger text-sm"
>
End Interview
</button>
</div>
</div>
{/* ─── RIGHT: Avatar + Transcript ─── */}
<div className="glass p-4 flex flex-col">
{/* Avatar */}
<div className="flex justify-center py-6">
<Avatar state={aiState} />
</div>
{/* Transcript */}
<div className="flex-1 overflow-y-auto space-y-3 px-2">
<h3 className="text-sm font-semibold text-surface-200/60 uppercase tracking-wider mb-2">
Transcript
</h3>
{transcript.length === 0 && (
<p className="text-center text-surface-200/30 text-sm py-8">
Start speaking to begin the interview
</p>
)}
{transcript.map((line, i) => (
<div
key={i}
className={`flex ${
line.role === "user" ? "justify-end" : "justify-start"
}`}
>
<div
className={`max-w-[80%] px-4 py-2.5 rounded-2xl text-sm leading-relaxed ${
line.role === "user"
? "bg-primary-600/30 text-primary-100 rounded-br-md"
: "bg-surface-700/40 text-surface-200/80 rounded-bl-md"
}`}
>
{line.text}
</div>
</div>
))}
<div ref={transcriptEndRef} />
</div>
</div>
</div>
</div>
);
}

View File

@ -0,0 +1,53 @@
const API_URL = import.meta.env.VITE_API_URL || 'http://localhost:3001';
/**
* Thin wrapper around fetch for backend REST calls.
*/
async function request<T>(
path: string,
options?: RequestInit,
): Promise<T> {
const res = await fetch(`${API_URL}${path}`, {
headers: { 'Content-Type': 'application/json', ...options?.headers },
...options,
});
if (!res.ok) {
const error = await res.text();
throw new Error(`API error ${res.status}: ${error}`);
}
return res.json();
}
/* ─── Candidates ─── */
export async function createCandidate(formData: FormData) {
const res = await fetch(`${API_URL}/candidates`, {
method: 'POST',
body: formData, // multipart — no Content-Type header
});
if (!res.ok) throw new Error(`Create candidate failed: ${res.status}`);
return res.json();
}
export async function getCandidate(id: string) {
return request(`/candidates/${id}`);
}
/* ─── Interviews ─── */
export async function createInterview(candidateId: string) {
return request('/interviews', {
method: 'POST',
body: JSON.stringify({ candidateId }),
});
}
export async function getInterview(id: string) {
return request(`/interviews/${id}`);
}
export async function evaluateInterview(id: string) {
return request(`/interviews/${id}/evaluate`, { method: 'POST' });
}

1
client/src/vite-env.d.ts vendored Normal file
View File

@ -0,0 +1 @@
/// <reference types="vite/client" />

40
client/tailwind.config.js Normal file
View File

@ -0,0 +1,40 @@
/** @type {import('tailwindcss').Config} */
export default {
content: ["./index.html", "./src/**/*.{js,ts,jsx,tsx}"],
theme: {
extend: {
colors: {
primary: {
50: "#eef2ff",
100: "#e0e7ff",
200: "#c7d2fe",
300: "#a5b4fc",
400: "#818cf8",
500: "#6366f1",
600: "#4f46e5",
700: "#4338ca",
800: "#3730a3",
900: "#312e81",
950: "#1e1b4b",
},
surface: {
50: "#f8fafc",
100: "#f1f5f9",
200: "#e2e8f0",
700: "#334155",
800: "#1e293b",
900: "#0f172a",
950: "#020617",
},
},
animation: {
"pulse-slow": "pulse 3s cubic-bezier(0.4, 0, 0.6, 1) infinite",
"bounce-subtle": "bounce 2s ease-in-out infinite",
},
fontFamily: {
sans: ["Inter", "system-ui", "sans-serif"],
},
},
},
plugins: [],
};

25
client/tsconfig.json Normal file
View File

@ -0,0 +1,25 @@
{
"compilerOptions": {
"target": "ES2020",
"useDefineForClassFields": true,
"lib": ["ES2020", "DOM", "DOM.Iterable"],
"module": "ESNext",
"skipLibCheck": true,
"moduleResolution": "bundler",
"allowImportingTsExtensions": true,
"isolatedModules": true,
"moduleDetection": "force",
"noEmit": true,
"jsx": "react-jsx",
"strict": true,
"noUnusedLocals": false,
"noUnusedParameters": false,
"noFallthroughCasesInSwitch": true,
"forceConsistentCasingInFileNames": true,
"baseUrl": ".",
"paths": {
"@/*": ["src/*"]
}
},
"include": ["src"]
}

View File

@ -0,0 +1 @@
{"root":["./src/app.tsx","./src/main.tsx","./src/vite-env.d.ts","./src/components/avatar.tsx","./src/hooks/useaudiorecorder.ts","./src/hooks/usesocket.ts","./src/pages/candidateonboarding.tsx","./src/pages/interviewroom.tsx","./src/services/api.ts"],"version":"5.9.3"}

22
client/vite.config.ts Normal file
View File

@ -0,0 +1,22 @@
import { defineConfig } from 'vite';
import react from '@vitejs/plugin-react';
import path from 'path';
export default defineConfig({
plugins: [react()],
resolve: {
alias: {
'@': path.resolve(__dirname, './src'),
},
},
server: {
port: 5173,
proxy: {
'/api': {
target: 'http://localhost:3001',
changeOrigin: true,
rewrite: (path) => path.replace(/^\/api/, ''),
},
},
},
});

39
package-lock.json generated Normal file
View File

@ -0,0 +1,39 @@
{
"name": "ai_interview_assistant",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"devDependencies": {
"@types/pdf-parse": "^1.1.5"
}
},
"node_modules/@types/node": {
"version": "25.3.0",
"resolved": "https://registry.npmjs.org/@types/node/-/node-25.3.0.tgz",
"integrity": "sha512-4K3bqJpXpqfg2XKGK9bpDTc6xO/xoUP/RBWS7AtRMug6zZFaRekiLzjVtAoZMquxoAbzBvy5nxQ7veS5eYzf8A==",
"dev": true,
"license": "MIT",
"dependencies": {
"undici-types": "~7.18.0"
}
},
"node_modules/@types/pdf-parse": {
"version": "1.1.5",
"resolved": "https://registry.npmjs.org/@types/pdf-parse/-/pdf-parse-1.1.5.tgz",
"integrity": "sha512-kBfrSXsloMnUJOKi25s3+hRmkycHfLK6A09eRGqF/N8BkQoPUmaCr+q8Cli5FnfohEz/rsv82zAiPz/LXtOGhA==",
"dev": true,
"license": "MIT",
"dependencies": {
"@types/node": "*"
}
},
"node_modules/undici-types": {
"version": "7.18.2",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.18.2.tgz",
"integrity": "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w==",
"dev": true,
"license": "MIT"
}
}
}

5
package.json Normal file
View File

@ -0,0 +1,5 @@
{
"devDependencies": {
"@types/pdf-parse": "^1.1.5"
}
}

BIN
server/eng.traineddata Normal file

Binary file not shown.

8
server/nest-cli.json Normal file
View File

@ -0,0 +1,8 @@
{
"$schema": "https://json.schemastore.org/nest-cli",
"collection": "@nestjs/schematics",
"sourceRoot": "src",
"compilerOptions": {
"deleteOutDir": true
}
}

9457
server/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

76
server/package.json Normal file
View File

@ -0,0 +1,76 @@
{
"name": "ai-interviewer",
"version": "1.0.0",
"description": "AI Interview Platform — NestJS Backend",
"private": true,
"scripts": {
"build": "nest build",
"start": "nest start",
"start:dev": "nest start --watch",
"start:debug": "nest start --debug --watch",
"start:prod": "node dist/main",
"lint": "eslint \"{src,test}/**/*.ts\"",
"test": "jest",
"test:watch": "jest --watch",
"test:cov": "jest --coverage"
},
"dependencies": {
"@nestjs/common": "^10.4.0",
"@nestjs/config": "^3.3.0",
"@nestjs/core": "^10.4.0",
"@nestjs/mongoose": "^10.1.0",
"@nestjs/platform-express": "^10.4.0",
"@nestjs/platform-socket.io": "^10.4.0",
"@nestjs/websockets": "^10.4.0",
"class-validator": "^0.14.1",
"class-transformer": "^0.5.1",
"mongoose": "^8.9.0",
"multer": "^1.4.5-lts.1",
"pdf-parse": "^1.1.1",
"reflect-metadata": "^0.2.2",
"rxjs": "^7.8.1",
"sarvamai": "^1.0.0",
"@deepgram/sdk": "^3.0.0",
"ws": "^8.18.0",
"socket.io": "^4.8.0",
"tesseract.js": "^5.1.1",
"uuid": "^11.0.0"
},
"optionalDependencies": {
"face-api.js": "^0.22.2",
"canvas": "^2.11.2"
},
"devDependencies": {
"@types/ws": "^8.5.0",
"@nestjs/cli": "^10.4.0",
"@nestjs/schematics": "^10.2.0",
"@nestjs/testing": "^10.4.0",
"@types/express": "^5.0.0",
"@types/jest": "^29.5.14",
"@types/multer": "^1.4.12",
"@types/node": "^22.10.0",
"@types/uuid": "^10.0.0",
"jest": "^29.7.0",
"ts-jest": "^29.2.0",
"ts-loader": "^9.5.0",
"ts-node": "^10.9.2",
"typescript": "^5.7.0"
},
"jest": {
"moduleFileExtensions": [
"js",
"json",
"ts"
],
"rootDir": "src",
"testRegex": ".*\\.spec\\.ts$",
"transform": {
"^.+\\.(t|j)s$": "ts-jest"
},
"collectCoverageFrom": [
"**/*.(t|j)s"
],
"coverageDirectory": "../coverage",
"testEnvironment": "node"
}
}

29
server/src/app.module.ts Normal file
View File

@ -0,0 +1,29 @@
import { Module } from '@nestjs/common';
import { ConfigModule } from '@nestjs/config';
import { MongooseModule } from '@nestjs/mongoose';
import { CandidateModule } from './candidate/candidate.module';
import { FaceAuthModule } from './face-auth/face-auth.module';
import { InterviewModule } from './interview/interview.module';
import { BrainModule } from './brain/brain.module';
@Module({
imports: [
// Load .env from project root
ConfigModule.forRoot({
isGlobal: true,
envFilePath: '../.env',
}),
// MongoDB connection
MongooseModule.forRoot(
process.env.MONGODB_URI || 'mongodb://localhost:27017/ai-interviewer',
),
// Feature modules
CandidateModule,
FaceAuthModule,
InterviewModule,
BrainModule,
],
})
export class AppModule { }

View File

@ -0,0 +1,25 @@
import { Module } from '@nestjs/common';
import { MongooseModule } from '@nestjs/mongoose';
import {
Candidate,
CandidateSchema,
} from '../candidate/schemas/candidate.schema';
import {
ConversationState,
ConversationStateSchema,
} from '../interview/schemas/conversation-state.schema';
import { BrainService } from './brain.service';
import { EvaluationService } from './evaluation.service';
@Module({
imports: [
// BrainService needs access to the Candidate collection for resume context
MongooseModule.forFeature([
{ name: Candidate.name, schema: CandidateSchema },
{ name: ConversationState.name, schema: ConversationStateSchema },
]),
],
providers: [BrainService, EvaluationService],
exports: [BrainService, EvaluationService],
})
export class BrainModule { }

View File

@ -0,0 +1,239 @@
import { Injectable, Logger } from '@nestjs/common';
import { InjectModel } from '@nestjs/mongoose';
import { Model } from 'mongoose';
import { Candidate, CandidateDocument } from '../candidate/schemas/candidate.schema';
import { TranscriptEntry } from '../interview/schemas/interview-session.schema';
import { ConversationState, ConversationStateDocument } from '../interview/schemas/conversation-state.schema';
@Injectable()
export class BrainService {
private readonly logger = new Logger(BrainService.name);
private readonly apiKey = process.env.CEREBRAS_API_KEY || '';
constructor(
@InjectModel(Candidate.name)
private readonly candidateModel: Model<CandidateDocument>,
@InjectModel(ConversationState.name)
private readonly stateModel: Model<ConversationStateDocument>,
) { }
async generateResponse(
sessionId: string,
candidateId: string,
userText: string,
history: TranscriptEntry[],
): Promise<string> {
const candidate = await this.candidateModel.findById(candidateId).exec();
if (!candidate) return "I'm sorry, I cannot find your profile.";
let state = await this.stateModel.findOne({ sessionId }).exec();
if (!state) {
state = new this.stateModel({ sessionId, currentStrategy: 'INTRODUCTION' });
await state.save();
}
// 1. generateChainedQuestion(): If previous user answer was very short or vague, ask "Why?" or probe deeper.
if (history.length > 0 && userText.split(' ').length < 10 && state.currentStrategy !== 'INTRODUCTION' && state.currentStrategy !== 'CONCLUSION') {
return this.generateChainedQuestion(userText, history);
}
let responseText = '';
// Dynamic thresholds based on MAX_INTERVIEW_QUESTIONS
const maxEnv = parseInt(process.env.MAX_INTERVIEW_QUESTIONS || '15', 10);
const maxQuestions = Math.min(Math.max(maxEnv, 5), 25);
// At least 1 intro turn (which usually translates to 2 history items: user + ai greeting)
// We measure turns by history.length / 2.
const currentTurn = Math.floor(history.length / 2);
const introLimit = Math.max(1, Math.floor(maxQuestions * 0.10));
const expLimit = introLimit + Math.max(1, Math.floor(maxQuestions * 0.30));
const techLimit = expLimit + Math.max(1, Math.floor(maxQuestions * 0.30));
const oddLimit = techLimit + Math.max(1, Math.floor(maxQuestions * 0.15));
const behavLimit = maxQuestions; // The rest
// Switch based on current strategy lifecycle
switch (state.currentStrategy) {
case 'INTRODUCTION':
responseText = await this.handleIntro(candidate, history);
if (currentTurn >= introLimit) {
state.currentStrategy = 'EXPERIENCE_DEEP_DIVE';
}
break;
case 'EXPERIENCE_DEEP_DIVE':
responseText = await this.askExperienceBased(candidate, history);
if (currentTurn >= expLimit) {
state.currentStrategy = 'TECHNICAL_CORE';
}
break;
case 'TECHNICAL_CORE':
responseText = await this.askTechnicalAndCore(state, candidate, history);
if (currentTurn >= techLimit) {
state.currentStrategy = 'ODD_SKILL_PROBE';
}
break;
case 'ODD_SKILL_PROBE':
responseText = await this.detectAndAskOddSkill(state, candidate, history);
if (currentTurn >= oddLimit) {
state.currentStrategy = 'BEHAVIORAL';
}
break;
case 'BEHAVIORAL':
responseText = await this.handleBehavioral(candidate, history);
if (currentTurn >= behavLimit - 1) { // Minus 1 because the next turn will be conclusion wrapper
state.currentStrategy = 'CONCLUSION';
}
break;
case 'CONCLUSION':
default:
responseText = await this.handleConclusion(candidate, history);
break;
}
state.lastQuestionAsked = responseText;
await state.save();
return responseText;
}
private async callCerebras(messages: Array<{ role: string, content: string }>, maxTokens = 150): Promise<string> {
if (!this.apiKey) {
this.logger.warn('CEREBRAS_API_KEY missing. Mocking response.');
return "That's interesting. Please tell me more.";
}
try {
const response = await fetch('https://api.cerebras.ai/v1/chat/completions', {
method: 'POST',
headers: {
Authorization: `Bearer ${this.apiKey}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'llama3.1-8b',
messages,
max_tokens: maxTokens,
temperature: 0.7,
}),
});
const data = await response.json();
return data?.choices?.[0]?.message?.content || "Can you elaborate?";
} catch (err) {
this.logger.error(`Cerebras error: ${err}`);
return "I missed that, could you repeat your point?";
}
}
private formatHistory(history: TranscriptEntry[]): Array<{ role: string; content: string }> {
return history.map(h => ({
role: h.role === 'ai' ? 'assistant' : 'user',
content: h.text
}));
}
// 1. Chained Question
private async generateChainedQuestion(userText: string, history: TranscriptEntry[]): Promise<string> {
const messages = [
{ role: 'system', content: 'The candidate just gave a very brief or vague answer. Politely ask them to elaborate, explain "why", or provide a specific example. Keep the prompt exactly 1 sentence.' },
...this.formatHistory(history.slice(-3)),
{ role: 'user', content: userText }
];
return this.callCerebras(messages, 50);
}
// 2. Odd Skill Probe
private async detectAndAskOddSkill(state: ConversationStateDocument, candidate: CandidateDocument, history: TranscriptEntry[]): Promise<string> {
const skills = candidate.skills?.join(', ') || 'general web development';
const messages = [
{ role: 'system', content: `You are a technical interviewer. The candidate listed these skills: ${skills}. Find the most unusual, niche, or distinct skill in that list. Ask a highly specific, challenging question about their experience with that exact skill. 1-2 sentences max.` },
...this.formatHistory(history.slice(-4))
];
return this.callCerebras(messages);
}
// 3. Experience Deep Dive
private async askExperienceBased(candidate: CandidateDocument, history: TranscriptEntry[]): Promise<string> {
const messages = [
{ role: 'system', content: `You are an interviewer focusing on the candidate's past projects. Here is their experience summary: "${candidate.experienceSummary || 'Candidate has a background in software engineering.'}". Ask a probing, role-specific question about the impact or architecture of one of those projects. 1-2 sentences max.` },
...this.formatHistory(history.slice(-4))
];
return this.callCerebras(messages);
}
// 4. Technical Core
private async askTechnicalAndCore(state: ConversationStateDocument, candidate: CandidateDocument, history: TranscriptEntry[]): Promise<string> {
state.technicalQuestionsAsked += 1;
const type = state.technicalQuestionsAsked % 2 === 0 ? 'Computer Science core concepts (e.g., Big O, memory, concurrency)' : 'frameworks mentioned in their resume';
const messages = [
{ role: 'system', content: `You are a technical interviewer. Ask a technical question focusing on ${type}. Base it on their skills: ${candidate.skills?.join(', ') || 'programming'}. Make it challenging but fair. 1-2 sentences max.` },
...this.formatHistory(history.slice(-4))
];
return this.callCerebras(messages);
}
// 5. Behavioral
private async handleBehavioral(candidate: CandidateDocument, history: TranscriptEntry[]): Promise<string> {
const messages = [
{ role: 'system', content: `You are a behavioral interviewer. Switch the tone to soft skills. Ask a tough scenario-based question (STAR method) regarding conflict resolution, tight deadlines, or difficult teamwork. 1-2 sentences max.` },
...this.formatHistory(history.slice(-4))
];
return this.callCerebras(messages);
}
// Introduction
private async handleIntro(candidate: CandidateDocument, history: TranscriptEntry[]): Promise<string> {
const messages = [
{ role: 'system', content: `You are the AI interviewer. The candidate's name is ${candidate.name}. Greet them, confirm their name, and ask for a very brief introduction. Be extremely welcoming. 2 sentences max.` },
...this.formatHistory(history)
];
return this.callCerebras(messages);
}
// Conclusion
private async handleConclusion(candidate: CandidateDocument, history: TranscriptEntry[]): Promise<string> {
const messages = [
{ role: 'system', content: `Wrap up the interview. Thank the candidate ${candidate.name} for their time, summarize briefly that they did well, and ask if they have any final questions. 2 sentences max.` },
...this.formatHistory(history.slice(-2))
];
return this.callCerebras(messages);
}
// Final Report Generator
async generateFinalReport(transcript: TranscriptEntry[]): Promise<any> {
const messages = [
{
role: 'system', content: `You are an expert HR Interview Evaluator. Read the following interview transcript and output strictly a JSON object evaluating the candidate.
Return ONLY JSON in this exact structure:
{
"communication_skills": { "rating": 5, "review_message": "..." },
"technical": { "rating": 8, "review_message": "..." },
"behavior": { "rating": 7, "review_message": "..." },
"previous_experience_and_contribution": { "rating": 6, "review_message": "..." }
}` },
{ role: 'user', content: JSON.stringify(transcript) }
];
try {
const response = await fetch('https://api.cerebras.ai/v1/chat/completions', {
method: 'POST',
headers: {
Authorization: `Bearer ${this.apiKey}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'llama3.1-8b',
messages,
temperature: 0.2,
response_format: { type: 'json_object' }
}),
});
const data = await response.json();
const content = data?.choices?.[0]?.message?.content;
if (content) {
return JSON.parse(content);
}
} catch (err) {
this.logger.error(`Error generating final report: ${err}`);
}
return null;
}
}

View File

@ -0,0 +1,97 @@
import { Injectable, Logger } from '@nestjs/common';
import { TranscriptEntry } from '../interview/schemas/interview-session.schema';
/**
* Evaluation output schema as specified in requirements.
*/
export interface EvaluationResult {
communication: { rating: number; review_message: string };
technical: { rating: number; review_message: string };
behaviour: { rating: number; review_message: string };
experience: { rating: number; review_message: string };
}
/**
* EvaluationService analyses the full interview transcript via Cerebras LLM
* and produces a structured JSON evaluation.
*/
@Injectable()
export class EvaluationService {
private readonly logger = new Logger(EvaluationService.name);
private readonly apiKey = process.env.CEREBRAS_API_KEY || '';
/**
* Evaluate the entire interview transcript.
* @param transcript Full conversation history
* @returns Structured evaluation with ratings and review messages
*/
async evaluate(transcript: TranscriptEntry[]): Promise<EvaluationResult> {
const formattedTranscript = transcript
.map(
(entry) =>
`[${entry.role.toUpperCase()}]: ${entry.text}`,
)
.join('\n');
const systemPrompt = `You are an expert interview evaluator. Analyze the following interview transcript and provide a structured evaluation.
You MUST respond with ONLY valid JSON in this exact format no markdown, no explanation:
{
"communication": { "rating": <0-10>, "review_message": "<brief review>" },
"technical": { "rating": <0-10>, "review_message": "<brief review>" },
"behaviour": { "rating": <0-10>, "review_message": "<brief review>" },
"experience": { "rating": <0-10>, "review_message": "<brief review>" }
}
Rating scale: 0 = not assessed, 1-3 = below expectations, 4-6 = meets expectations, 7-9 = exceeds expectations, 10 = exceptional.`;
if (!this.apiKey) {
this.logger.warn(
'CEREBRAS_API_KEY not set — returning default evaluation',
);
return this.defaultEvaluation();
}
try {
const response = await fetch(
'https://api.cerebras.ai/v1/chat/completions',
{
method: 'POST',
headers: {
Authorization: `Bearer ${this.apiKey}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'llama-4-scout-17b-16e-instruct',
messages: [
{ role: 'system', content: systemPrompt },
{ role: 'user', content: formattedTranscript },
],
max_tokens: 500,
temperature: 0.3,
}),
},
);
const data = await response.json();
const content = data?.choices?.[0]?.message?.content || '';
// Parse the JSON response
const evaluation: EvaluationResult = JSON.parse(content);
this.logger.log('Evaluation generated successfully');
return evaluation;
} catch (err) {
this.logger.error(`Evaluation failed: ${err}`);
return this.defaultEvaluation();
}
}
private defaultEvaluation(): EvaluationResult {
return {
communication: { rating: 0, review_message: 'Not evaluated' },
technical: { rating: 0, review_message: 'Not evaluated' },
behaviour: { rating: 0, review_message: 'Not evaluated' },
experience: { rating: 0, review_message: 'Not evaluated' },
};
}
}

View File

@ -0,0 +1,70 @@
import {
Controller,
Post,
Get,
Param,
Body,
UseInterceptors,
UploadedFiles,
} from '@nestjs/common';
import { FileFieldsInterceptor } from '@nestjs/platform-express';
import { diskStorage } from 'multer';
import { v4 as uuidv4 } from 'uuid';
import * as path from 'path';
import { CandidateService } from './candidate.service';
import { CreateCandidateDto } from './dto/create-candidate.dto';
/** Multer storage configuration saves uploads to /uploads with unique names */
const storage = diskStorage({
destination: path.join(__dirname, '..', '..', 'uploads'),
filename: (_req, file, cb) => {
const ext = path.extname(file.originalname);
cb(null, `${uuidv4()}${ext}`);
},
});
@Controller('candidates')
export class CandidateController {
constructor(private readonly candidateService: CandidateService) { }
/**
* POST /candidates
* Accepts multipart form data with optional resume and profile picture.
*/
@Post()
@UseInterceptors(
FileFieldsInterceptor(
[
{ name: 'resume', maxCount: 1 },
{ name: 'profilePicture', maxCount: 1 },
],
{ storage },
),
)
async create(
@Body() dto: CreateCandidateDto,
@UploadedFiles()
files: {
resume?: Express.Multer.File[];
profilePicture?: Express.Multer.File[];
},
) {
return this.candidateService.create(
dto,
files?.resume?.[0],
files?.profilePicture?.[0],
);
}
/** GET /candidates/:id */
@Get(':id')
async findById(@Param('id') id: string) {
return this.candidateService.findById(id);
}
/** GET /candidates */
@Get()
async findAll() {
return this.candidateService.findAll();
}
}

View File

@ -0,0 +1,20 @@
import { Module, forwardRef } from '@nestjs/common';
import { MongooseModule } from '@nestjs/mongoose';
import { Candidate, CandidateSchema } from './schemas/candidate.schema';
import { CandidateController } from './candidate.controller';
import { CandidateService } from './candidate.service';
import { OcrService } from './services/ocr.service';
import { FaceAuthModule } from '../face-auth/face-auth.module';
@Module({
imports: [
MongooseModule.forFeature([
{ name: Candidate.name, schema: CandidateSchema },
]),
forwardRef(() => FaceAuthModule),
],
controllers: [CandidateController],
providers: [CandidateService, OcrService],
exports: [CandidateService],
})
export class CandidateModule { }

View File

@ -0,0 +1,137 @@
import { Injectable, Logger, NotFoundException } from '@nestjs/common';
import { InjectModel } from '@nestjs/mongoose';
import { Model } from 'mongoose';
import { Candidate, CandidateDocument } from './schemas/candidate.schema';
import { CreateCandidateDto } from './dto/create-candidate.dto';
import { OcrService } from './services/ocr.service';
import { FaceAuthService } from '../face-auth/face-auth.service';
@Injectable()
export class CandidateService {
private readonly logger = new Logger(CandidateService.name);
constructor(
@InjectModel(Candidate.name)
private readonly candidateModel: Model<CandidateDocument>,
private readonly ocrService: OcrService,
private readonly faceAuthService: FaceAuthService,
) { }
/**
* Create a new candidate, process their resume and profile picture.
*/
async create(
dto: CreateCandidateDto,
resumeFile?: Express.Multer.File,
profilePicture?: Express.Multer.File,
): Promise<CandidateDocument> {
const candidate = new this.candidateModel({
name: dto.name,
email: dto.email,
experienceSummary: dto.experienceSummary || '',
resumePath: resumeFile?.path || '',
profilePicturePath: profilePicture?.path || '',
captureFaceOnCall: !profilePicture, // No photo → capture during call
});
// ── OCR: extract resume text ──
if (resumeFile?.path) {
try {
candidate.resumeText = await this.ocrService.extractText(
resumeFile.path,
);
this.logger.log(
`Extracted ${candidate.resumeText.length} chars from resume`,
);
// Use Cerebras to extract structured skills and summary
if (candidate.resumeText) {
this.logger.log(`Extracting metadata from resume via Cerebras`);
const metadata = await this.extractResumeMetadata(candidate.resumeText);
candidate.skills = metadata.skills;
if (!candidate.experienceSummary) {
candidate.experienceSummary = metadata.experienceSummary;
}
}
} catch (err) {
this.logger.error(`Resume OCR or Metadata Extraction failed: ${err}`);
}
}
// ── Face descriptor extraction ──
if (profilePicture?.path) {
try {
const descriptor = await this.faceAuthService.extractDescriptor(
profilePicture.path,
);
if (descriptor) {
candidate.faceDescriptor = Array.from(descriptor);
candidate.captureFaceOnCall = false;
}
} catch (err) {
this.logger.error(`Face descriptor extraction failed: ${err}`);
candidate.captureFaceOnCall = true;
}
}
return candidate.save();
}
async findById(id: string): Promise<CandidateDocument> {
const candidate = await this.candidateModel.findById(id).exec();
if (!candidate) {
throw new NotFoundException(`Candidate ${id} not found`);
}
return candidate;
}
async findAll(): Promise<CandidateDocument[]> {
return this.candidateModel.find().exec();
}
private async extractResumeMetadata(resumeText: string): Promise<{ skills: string[], experienceSummary: string }> {
this.logger.log(`Extracting metadata from resume via Cerebras`);
const apiKey = process.env.CEREBRAS_API_KEY;
if (!apiKey || !resumeText) return { skills: [], experienceSummary: '' };
try {
const prompt = `Extract the core technical skills and a brief professional experience summary from this resume.
Return strictly a JSON object with this exact structure, nothing else:
{
"skills": ["skill1", "skill2"],
"experienceSummary": "A concise 2-sentence summary of their professional background."
}
Resume Text:
${resumeText.substring(0, 5000)}`;
const response = await fetch('https://api.cerebras.ai/v1/chat/completions', {
method: 'POST',
headers: {
'Authorization': `Bearer ${apiKey}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'llama3.1-8b',
messages: [{ role: 'user', content: prompt }],
temperature: 0.1,
response_format: { type: 'json_object' }
}),
});
const data = await response.json();
const content = data?.choices?.[0]?.message?.content;
if (content) {
const parsed = JSON.parse(content);
return {
skills: parsed.skills || [],
experienceSummary: parsed.experienceSummary || ''
};
}
} catch (err) {
this.logger.error(`Failed to extract metadata from resume via Cerebras: ${err}`);
}
return { skills: [], experienceSummary: '' };
}
}

View File

@ -0,0 +1,18 @@
import { IsEmail, IsNotEmpty, IsOptional, IsString } from 'class-validator';
/**
* DTO for creating a new candidate.
* Resume and profile picture are handled via Multer file uploads.
*/
export class CreateCandidateDto {
@IsNotEmpty()
@IsString()
name: string;
@IsEmail()
email: string;
@IsOptional()
@IsString()
experienceSummary?: string;
}

View File

@ -0,0 +1,39 @@
import { Prop, Schema, SchemaFactory } from '@nestjs/mongoose';
import { Document } from 'mongoose';
export type CandidateDocument = Candidate & Document;
@Schema({ timestamps: true })
export class Candidate {
@Prop({ required: true, trim: true })
name: string;
@Prop({ required: true, trim: true, lowercase: true })
email: string;
@Prop({ default: '' })
resumeText: string;
@Prop({ type: [Number], default: [] })
faceDescriptor: number[];
@Prop({ default: false })
captureFaceOnCall: boolean;
@Prop({ default: '' })
profilePicturePath: string;
@Prop({ default: '' })
resumePath: string;
@Prop({ default: '' })
experienceSummary: string;
@Prop({ type: [String], default: [] })
skills: string[];
@Prop({ type: Object, default: {} })
evaluation: any;
}
export const CandidateSchema = SchemaFactory.createForClass(Candidate);

View File

@ -0,0 +1,94 @@
import { Injectable, Logger } from '@nestjs/common';
import * as fs from 'fs';
import * as path from 'path';
/**
* OcrService extracts text from uploaded resume files.
*
* Strategy:
* 1. If the file is a PDF, try `pdf-parse` first (fast, works on text PDFs).
* 2. If pdf-parse returns little/no text, fall back to `tesseract.js` OCR.
* 3. If the file is an image, go straight to tesseract.
*
* tesseract.js runs in its own worker, so the NestJS event loop is not blocked.
*/
@Injectable()
export class OcrService {
private readonly logger = new Logger(OcrService.name);
/**
* Extract text from a resume file.
* @param filePath absolute path to the uploaded file
* @returns the extracted plain text
*/
async extractText(filePath: string): Promise<string> {
const ext = path.extname(filePath).toLowerCase();
if (ext === '.pdf') {
return this.extractFromPdf(filePath);
}
// Image files (.png, .jpg, .jpeg, .bmp, .tiff)
return this.extractFromImage(filePath);
}
// ──────────────────── PDF extraction ────────────────────
private async extractFromPdf(filePath: string): Promise<string> {
try {
// Dynamic import to keep the module optional if not installed
const pdfParseModule = await import('pdf-parse');
// Support both CommonJS and ESM-style imports
const pdfParse = (pdfParseModule as any).default || pdfParseModule;
const buffer = fs.readFileSync(filePath);
const data = await pdfParse(buffer);
// If meaningful text was extracted, return it
if (data.text && data.text.trim().length > 50) {
this.logger.log(
`pdf-parse extracted ${data.text.length} chars from ${filePath}`,
);
return data.text.trim();
}
// Scanned PDF → fall back to OCR
this.logger.warn(
'pdf-parse returned little text; falling back to tesseract OCR',
);
return this.extractFromImage(filePath);
} catch (error) {
this.logger.error(`pdf-parse failed: ${error}; falling back to OCR`);
return this.extractFromImage(filePath);
}
}
// ──────────────────── Image / OCR extraction ────────────────────
private async extractFromImage(filePath: string): Promise<string> {
const ext = path.extname(filePath).toLowerCase();
if (ext === '.pdf') {
this.logger.warn(`Tesseract OCR cannot process PDF files directly: ${filePath}`);
return '';
}
try {
const { createWorker } = await import('tesseract.js');
const worker = await createWorker('eng');
const {
data: { text },
} = await worker.recognize(filePath);
await worker.terminate();
this.logger.log(
`tesseract.js extracted ${text.length} chars from ${filePath}`,
);
return text.trim();
} catch (error) {
this.logger.error(`Tesseract OCR failed: ${error}`);
return '';
}
}
}

View File

@ -0,0 +1,8 @@
import { Module } from '@nestjs/common';
import { FaceAuthService } from './face-auth.service';
@Module({
providers: [FaceAuthService],
exports: [FaceAuthService],
})
export class FaceAuthModule { }

View File

@ -0,0 +1,123 @@
import { Injectable, Logger, OnModuleInit } from '@nestjs/common';
import * as path from 'path';
/**
* FaceAuthService handles face descriptor extraction and verification
* using face-api.js running on the Node.js canvas backend.
*
* On module init it loads the required neural-network models from disk.
* If the models aren't present yet, it logs a warning and works in
* "passthrough" mode (all verifications return true).
*/
@Injectable()
export class FaceAuthService implements OnModuleInit {
private readonly logger = new Logger(FaceAuthService.name);
private faceapi: any = null;
private modelsLoaded = false;
/** Directory where face-api.js model weight files live */
private readonly modelsPath = path.join(
__dirname,
'..',
'..',
'face-models',
);
async onModuleInit() {
try {
// face-api.js requires the canvas package in Node
const canvas = await import('canvas');
const faceapi = await import('face-api.js');
// Patch face-api.js to use node-canvas
const { Canvas, Image, ImageData } = canvas;
faceapi.env.monkeyPatch({
Canvas: Canvas as any,
Image: Image as any,
ImageData: ImageData as any,
});
// Load detection + recognition models
await faceapi.nets.ssdMobilenetv1.loadFromDisk(this.modelsPath);
await faceapi.nets.faceLandmark68Net.loadFromDisk(this.modelsPath);
await faceapi.nets.faceRecognitionNet.loadFromDisk(this.modelsPath);
this.faceapi = faceapi;
this.modelsLoaded = true;
this.logger.log('face-api.js models loaded successfully');
} catch (err) {
this.logger.warn(
`face-api.js initialization failed (models may be missing): ${err}. ` +
'Face verification will run in passthrough mode.',
);
}
}
/**
* Extract a 128-dimensional face descriptor from an image file.
* Returns null if no face is detected or models aren't loaded.
*/
async extractDescriptor(imagePath: string): Promise<Float32Array | null> {
if (!this.modelsLoaded || !this.faceapi) {
this.logger.warn('Models not loaded skipping descriptor extraction');
return null;
}
const canvas = await import('canvas');
const img = await canvas.loadImage(imagePath);
const detection = await this.faceapi
.detectSingleFace(img as any)
.withFaceLandmarks()
.withFaceDescriptor();
if (!detection) {
this.logger.warn(`No face detected in ${imagePath}`);
return null;
}
return detection.descriptor;
}
/**
* Extract a face descriptor from a raw image buffer (e.g. a webcam frame).
*/
async extractDescriptorFromBuffer(
buffer: Buffer,
): Promise<Float32Array | null> {
if (!this.modelsLoaded || !this.faceapi) return null;
const canvas = await import('canvas');
const img = await canvas.loadImage(buffer);
const detection = await this.faceapi
.detectSingleFace(img as any)
.withFaceLandmarks()
.withFaceDescriptor();
return detection?.descriptor ?? null;
}
/**
* Compare two face descriptors. Returns match result and euclidean distance.
* Threshold of 0.6 is the standard for face-api.js.
*/
verifyFace(
incoming: Float32Array | number[],
stored: Float32Array | number[],
threshold = 0.6,
): { match: boolean; distance: number } {
if (!this.modelsLoaded || !this.faceapi) {
// Passthrough mode assume match
return { match: true, distance: 0 };
}
const distance = this.faceapi.euclideanDistance(
Array.from(incoming),
Array.from(stored),
);
return {
match: distance < threshold,
distance,
};
}
}

View File

@ -0,0 +1,162 @@
import { Logger } from '@nestjs/common';
import { Socket } from 'socket.io';
import { WebSocket } from 'ws';
import { BrainService } from '../brain/brain.service';
import { InterviewSessionDocument } from './schemas/interview-session.schema';
export class DeepgramClientHandler {
private readonly logger = new Logger(DeepgramClientHandler.name);
private readonly sttApiKey = process.env.DEEPGRAM_API_KEY || '';
private sttWs!: WebSocket;
private active = false;
private transcriptBuffer = '';
private silenceTimer?: NodeJS.Timeout;
private readonly SILENCE_DELAY = 1000;
constructor(
private readonly socket: Socket,
private readonly sessionId: string,
private readonly session: InterviewSessionDocument,
private readonly brainService: BrainService,
) { }
async init() {
this.active = true;
try {
const sttUrl = 'wss://api.deepgram.com/v1/listen?encoding=linear16&sample_rate=16000&channels=1&interim_results=true&endpointing=300';
this.sttWs = new WebSocket(sttUrl, {
headers: {
Authorization: `Token ${this.sttApiKey}`
}
});
this.sttWs.on('open', () => {
this.logger.log(`Deepgram STT ready for session ${this.sessionId}`);
});
this.sttWs.on('message', async (data: any) => {
if (!this.active) return;
try {
const response = JSON.parse(data.toString());
const transcript = response.channel?.alternatives?.[0]?.transcript?.trim();
const isFinal = response.is_final;
const speechFinal = response.speech_final;
if (transcript) {
this.logger.log(`[${this.sessionId}] Deepgram STT returned: "${transcript}"`);
this.transcriptBuffer += (this.transcriptBuffer ? ' ' : '') + transcript;
this.socket.emit('ai-transcript', { text: transcript });
}
if (speechFinal || (isFinal && transcript)) {
this.resetSilenceTimer();
}
} catch (e) {
this.logger.error(`Error parsing Deepgram message: ${e}`);
}
});
this.sttWs.on('error', (err) => this.logger.error(`Deepgram STT error: ${err}`));
} catch (err) {
this.logger.error(`Failed to initialize Deepgram connections: ${err}`);
}
}
private resetSilenceTimer() {
if (this.silenceTimer) clearTimeout(this.silenceTimer);
this.silenceTimer = setTimeout(async () => {
if (this.transcriptBuffer.trim()) {
await this.processUserInput(this.transcriptBuffer.trim());
this.transcriptBuffer = '';
}
}, this.SILENCE_DELAY);
}
async processUserInput(text: string) {
this.logger.log(`[${this.sessionId}] User said: "${text.slice(0, 100)}..."`);
this.socket.emit('ai-state', { state: 'thinking' });
try {
this.session.transcriptLogs.push({
role: 'user',
text,
timestamp: new Date()
});
const aiResponse = await this.brainService.generateResponse(
this.sessionId,
this.session.candidateId.toString(),
text,
this.session.transcriptLogs,
);
this.session.transcriptLogs.push({
role: 'ai',
text: aiResponse,
timestamp: new Date()
});
await this.session.save();
this.socket.emit('ai-transcript', { text: aiResponse });
this.socket.emit('ai-state', { state: 'speaking' });
await this.sendToTts(aiResponse);
setTimeout(() => {
this.socket.emit('ai-state', { state: 'listening' });
}, 500);
} catch (err) {
this.logger.error(`Pipeline error: ${err}`);
this.socket.emit('error', { message: 'Pipeline processing failed' });
this.socket.emit('ai-state', { state: 'listening' });
}
}
private async sendToTts(text: string) {
try {
this.logger.log(`[${this.sessionId}] Calling Deepgram TTS for response...`);
const url = `https://api.deepgram.com/v1/speak?model=aura-asteria-en`;
const response = await fetch(url, {
method: 'POST',
headers: {
Authorization: `Token ${this.sttApiKey}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({ text })
});
if (!response.ok) throw new Error(`Deepgram TTS failed: ${response.statusText}`);
const arrayBuffer = await response.arrayBuffer();
this.logger.log(`[${this.sessionId}] Deepgram TTS returned audio arraybuffer of length: ${arrayBuffer.byteLength}`);
this.socket.emit('ai-audio', Buffer.from(arrayBuffer));
} catch (err) {
this.logger.error(`Failed to stream Deepgram TTS: ${err}`);
}
}
handleAudioChunk(audioBase64: string) {
if (!this.active || !this.sttWs || this.sttWs.readyState !== 1) return;
try {
const buffer = Buffer.from(audioBase64, 'base64');
this.sttWs.send(buffer);
} catch (err) {
this.logger.error(`Transcribe error: ${err}`);
}
}
cleanup() {
if (!this.active) return;
this.active = false;
if (this.silenceTimer) clearTimeout(this.silenceTimer);
try {
this.sttWs?.close?.();
this.logger.log(`Cleaned up Deepgram session ${this.sessionId}`);
} catch (err) {
this.logger.warn(`Cleanup error: ${err}`);
}
}
}

View File

@ -0,0 +1,47 @@
import { Controller, Post, Param, Get, Body } from '@nestjs/common';
import { OrchestratorService } from './services/orchestrator.service';
import { BrainService } from '../brain/brain.service';
/**
* REST endpoints for interview session management (non-realtime operations).
*/
@Controller('interviews')
export class InterviewController {
constructor(
private readonly orchestrator: OrchestratorService,
private readonly brainService: BrainService,
) { }
/** POST /interviews — create a new interview session */
@Post()
async createSession(@Body() body: { candidateId: string }) {
return this.orchestrator.createSession(body.candidateId);
}
/** GET /interviews/:id — get session details */
@Get(':id')
async getSession(@Param('id') id: string) {
return this.orchestrator.getSession(id);
}
/**
* POST /interviews/:id/evaluate end the interview and trigger
* LLM-based evaluation of the full transcript.
*/
@Post(':id/evaluate')
async evaluate(@Param('id') id: string) {
const session = await this.orchestrator.getSession(id);
await this.orchestrator.endSession(id);
const evaluation = await this.brainService.generateFinalReport(
session.transcriptLogs,
);
// Persist evaluation on the session
session.evaluation = evaluation;
session.status = 'completed';
await session.save();
return { sessionId: id, evaluation };
}
}

View File

@ -0,0 +1,242 @@
import {
WebSocketGateway,
WebSocketServer,
SubscribeMessage,
MessageBody,
ConnectedSocket,
OnGatewayInit,
OnGatewayConnection,
OnGatewayDisconnect,
} from '@nestjs/websockets';
import { Logger } from '@nestjs/common';
import { Server, Socket } from 'socket.io';
import { OrchestratorService } from './services/orchestrator.service';
import { FaceAuthService } from '../face-auth/face-auth.service';
import { CandidateService } from '../candidate/candidate.service';
/**
* InterviewGateway real-time WebSocket entry point for AI interviews.
*
* Handles:
* - Room joining (linking a socket to a session)
* - Streaming audio chunks STT (real-time via WebSocket)
* - VAD events are emitted automatically by the STT provider
* - TTS audio chunks are streamed back as they are generated
* - Manual end-of-speech fallback for providers without VAD
* - Face verification (single frame from webcam)
*/
@WebSocketGateway({
cors: { origin: '*' },
namespace: '/interview',
})
export class InterviewGateway
implements OnGatewayInit, OnGatewayConnection, OnGatewayDisconnect {
@WebSocketServer()
server: Server;
private readonly logger = new Logger(InterviewGateway.name);
/** Map socket → sessionId for cleanup */
private socketSessions: Map<string, string> = new Map();
constructor(
private readonly orchestrator: OrchestratorService,
private readonly faceAuth: FaceAuthService,
private readonly candidateService: CandidateService,
) { }
afterInit() {
this.logger.log('Interview WebSocket Gateway initialized');
}
handleConnection(client: Socket) {
this.logger.log(`Client connected: ${client.id}`);
}
handleDisconnect(client: Socket) {
const sessionId = this.socketSessions.get(client.id);
if (sessionId) {
this.logger.log(
`Client ${client.id} disconnected — cleaning up session ${sessionId}`,
);
this.orchestrator.endSession(sessionId).catch((err) => {
this.logger.error(`Cleanup error: ${err}`);
});
this.socketSessions.delete(client.id);
}
}
// ──────────────── Socket Events ────────────────
/**
* Client joins an interview session room.
* Initializes the streaming STT + TTS connections for this session.
*
* Payload: { candidateId: string }
*/
@SubscribeMessage('join-room')
async handleJoinRoom(
@ConnectedSocket() client: Socket,
@MessageBody() data: { candidateId: string },
) {
try {
const session = await this.orchestrator.createSession(data.candidateId);
const sessionId = session._id.toString();
client.join(sessionId);
this.socketSessions.set(client.id, sessionId);
// Initialize streaming with the socket directly
await this.orchestrator.initStreaming(sessionId, client);
this.logger.log(
`Client ${client.id} joined session ${sessionId} — streaming ready`,
);
client.emit('session-created', {
sessionId,
stage: session.currentStage,
});
} catch (err) {
client.emit('error', { message: `Failed to join room: ${err}` });
}
}
/**
* Receive a base64-encoded audio chunk from the client's microphone.
* Piped directly to the STT streaming WebSocket (no buffering).
*
* Payload: { audio: string } (base64) or Buffer (binary array from Socket.io)
*/
@SubscribeMessage('audio-chunk')
handleAudioChunk(
@ConnectedSocket() client: Socket,
@MessageBody() data: any,
) {
const sessionId = this.socketSessions.get(client.id);
if (!sessionId) {
client.emit('error', { message: 'Not in a session' });
return;
}
let audioBase64: string;
if (Buffer.isBuffer(data)) {
// It's a binary buffer from Socket.io
audioBase64 = data.toString('base64');
} else if (data instanceof ArrayBuffer) {
audioBase64 = Buffer.from(data).toString('base64');
} else if (typeof data === 'string') {
audioBase64 = data;
} else if (data && data.audio) {
audioBase64 = data.audio;
} else {
this.logger.warn(`Unknown audio chunk format received: ${typeof data}`);
return;
}
// Print dot to let us know chunks are flowing without spamming logs
process.stdout.write('.');
this.orchestrator.streamAudioChunk(sessionId, audioBase64);
}
/**
* Manual end-of-speech signal (fallback for providers without VAD).
* Triggers the STT Brain TTS pipeline with accumulated transcript.
*/
@SubscribeMessage('end-of-speech')
async handleEndOfSpeech(@ConnectedSocket() client: Socket) {
const sessionId = this.socketSessions.get(client.id);
if (!sessionId) return;
try {
await this.orchestrator.triggerPipeline(sessionId);
} catch (err) {
this.logger.error(`Pipeline error: ${err}`);
client.emit('error', { message: 'Pipeline processing failed' });
client.emit('ai-state', { state: 'listening' });
}
}
/**
* Receive a video frame (JPEG buffer) for face verification.
* Payload: { candidateId: string, frame: Buffer }
*/
@SubscribeMessage('face-verify')
async handleFaceVerify(
@ConnectedSocket() client: Socket,
@MessageBody() data: { candidateId: string; frame: Buffer },
) {
try {
const candidate = await this.candidateService.findById(data.candidateId);
// Extract descriptor from the incoming frame
const frameBuffer = Buffer.from(data.frame);
const incomingDescriptor =
await this.faceAuth.extractDescriptorFromBuffer(frameBuffer);
if (!incomingDescriptor) {
client.emit('face-result', {
verified: false,
message: 'No face detected in frame',
});
return;
}
// If no stored descriptor, save this one as reference
if (
!candidate.faceDescriptor ||
candidate.faceDescriptor.length === 0
) {
candidate.faceDescriptor = Array.from(incomingDescriptor);
candidate.captureFaceOnCall = false;
await candidate.save();
client.emit('face-result', {
verified: true,
message: 'Reference face captured and saved',
});
return;
}
// Compare with stored descriptor
const result = this.faceAuth.verifyFace(
incomingDescriptor,
candidate.faceDescriptor,
);
// Update session verification status
const sessionId = this.socketSessions.get(client.id);
if (sessionId) {
const session = await this.orchestrator.getSession(sessionId);
session.faceVerified = result.match;
await session.save();
}
client.emit('face-result', {
verified: result.match,
distance: result.distance,
message: result.match
? 'Face verified successfully'
: 'Face mismatch — flagged in report',
});
} catch (err) {
this.logger.error(`Face verify error: ${err}`);
client.emit('face-result', {
verified: false,
message: `Verification error: ${err}`,
});
}
}
/**
* End the interview session closes streaming connections.
*/
@SubscribeMessage('end-interview')
async handleEndInterview(@ConnectedSocket() client: Socket) {
const sessionId = this.socketSessions.get(client.id);
if (!sessionId) return;
await this.orchestrator.endSession(sessionId);
client.emit('interview-ended', { sessionId });
this.socketSessions.delete(client.id);
}
}

View File

@ -0,0 +1,37 @@
import { Module, forwardRef } from '@nestjs/common';
import { MongooseModule } from '@nestjs/mongoose';
import { ConfigModule } from '@nestjs/config';
import {
InterviewSession,
InterviewSessionSchema,
} from './schemas/interview-session.schema';
import {
ConversationState,
ConversationStateSchema,
} from './schemas/conversation-state.schema';
import { InterviewGateway } from './interview.gateway';
import { InterviewController } from './interview.controller';
import { OrchestratorService } from './services/orchestrator.service';
import { BrainModule } from '../brain/brain.module';
import { FaceAuthModule } from '../face-auth/face-auth.module';
import { CandidateModule } from '../candidate/candidate.module';
@Module({
imports: [
MongooseModule.forFeature([
{ name: InterviewSession.name, schema: InterviewSessionSchema },
{ name: ConversationState.name, schema: ConversationStateSchema },
]),
ConfigModule,
BrainModule,
FaceAuthModule,
forwardRef(() => CandidateModule),
],
controllers: [InterviewController],
providers: [
InterviewGateway,
OrchestratorService,
],
exports: [OrchestratorService],
})
export class InterviewModule { }

View File

@ -0,0 +1,79 @@
import { Injectable, Logger } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import { createClient, LiveTranscriptionEvents } from '@deepgram/sdk';
import { IStreamingSTT } from './voice-provider.interface';
/**
* Deepgram streaming STT provider.
* Uses Deepgram's live transcription WebSocket (Nova-2 model).
*
* Env: DEEPGRAM_API_KEY, DEEPGRAM_STT_LANGUAGE (default: en-US), DEEPGRAM_STT_MODEL (default: nova-2)
*/
@Injectable()
export class DeepgramSttProvider implements IStreamingSTT {
readonly name = 'deepgram-stt';
private readonly logger = new Logger(DeepgramSttProvider.name);
private connection: any = null;
private transcriptCb?: (text: string) => void;
private readonly apiKey: string;
private readonly language: string;
private readonly model: string;
constructor(private readonly config: ConfigService) {
this.apiKey = this.config.get<string>('DEEPGRAM_API_KEY', '');
this.language = this.config.get<string>('DEEPGRAM_STT_LANGUAGE', 'en-US');
this.model = this.config.get<string>('DEEPGRAM_STT_MODEL', 'nova-2');
}
async connect(): Promise<void> {
if (!this.apiKey) {
this.logger.warn('DEEPGRAM_API_KEY not set — STT will not work');
return;
}
const dg = createClient(this.apiKey);
this.connection = dg.listen.live({
model: this.model,
language: this.language,
smart_format: true,
interim_results: false,
});
await new Promise<void>((resolve, reject) => {
this.connection!.on(LiveTranscriptionEvents.Open, resolve);
this.connection!.on(LiveTranscriptionEvents.Error, reject);
});
this.logger.log('Deepgram STT WebSocket connected');
this.connection.on(LiveTranscriptionEvents.Transcript, (data: any) => {
const transcript = data.channel?.alternatives?.[0]?.transcript;
if (transcript) {
this.transcriptCb?.(transcript);
}
});
}
transcribe(audioBase64: string): void {
if (!this.connection) return;
const buffer = Buffer.from(audioBase64, 'base64');
this.connection.send(buffer as any);
}
onTranscript(cb: (text: string) => void): void {
this.transcriptCb = cb;
}
/** Deepgram doesn't natively emit VAD events in the same way — no-op */
onVadEvent(_cb: (event: any) => void): void {
// Not supported by Deepgram live transcription
}
close(): void {
this.connection?.requestClose?.();
this.logger.log('Deepgram STT WebSocket closed');
}
}

View File

@ -0,0 +1,80 @@
import { Injectable, Logger } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import { createClient as createDeepgramClient } from '@deepgram/sdk';
import { IStreamingTTS } from './voice-provider.interface';
/**
* Deepgram streaming TTS provider.
* Uses Deepgram's speak.live WebSocket for real-time audio generation.
*
* Env: DEEPGRAM_API_KEY, DEEPGRAM_TTS_MODEL (default: aura-asteria-en),
* DEEPGRAM_TTS_CODEC (default: wav)
*/
@Injectable()
export class DeepgramTtsProvider implements IStreamingTTS {
readonly name = 'deepgram-tts';
private readonly logger = new Logger(DeepgramTtsProvider.name);
private conn?: any;
private audioCb?: (audioBase64: string) => void;
private errorCb?: (error: Error) => void;
private readonly apiKey: string;
private readonly model: string;
private readonly codec: string;
constructor(private readonly config: ConfigService) {
this.apiKey = this.config.get<string>('DEEPGRAM_API_KEY', '');
this.model = this.config.get<string>('DEEPGRAM_TTS_MODEL', 'aura-asteria-en');
this.codec = this.config.get<string>('DEEPGRAM_TTS_CODEC', 'wav');
}
async connect(): Promise<void> {
if (!this.apiKey) {
this.logger.warn('DEEPGRAM_API_KEY not set — TTS will not work');
return;
}
const client = createDeepgramClient(this.apiKey);
this.conn = client.speak.live({
model: this.model,
encoding: this.codec as any,
sample_rate: 8000,
});
await new Promise<void>((resolve, reject) => {
this.conn!.on('open', resolve);
this.conn!.on('error', reject);
});
this.logger.log('Deepgram TTS WebSocket connected');
this.conn!.on('data', (chunk: ArrayBuffer) => {
try {
const audioBase64 = Buffer.from(chunk).toString('base64');
this.audioCb?.(audioBase64);
} catch (err) {
this.errorCb?.(err as Error);
}
});
}
async speak(text: string): Promise<void> {
if (!this.conn) throw new Error('Deepgram TTS not connected');
this.conn.sendText(text);
}
onAudio(cb: (audioBase64: string) => void): void {
this.audioCb = cb;
}
onError(cb: (error: Error) => void): void {
this.errorCb = cb;
}
close(): void {
this.conn?.requestClose?.();
this.logger.log('Deepgram TTS WebSocket closed');
}
}

View File

@ -0,0 +1,3 @@
// This file has been replaced by deepgram-stt.provider.ts and deepgram-tts.provider.ts
// It can be safely deleted.
export { };

View File

@ -0,0 +1,121 @@
import { Injectable, Logger } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import { SarvamAIClient } from 'sarvamai';
import { SpeechToTextStreamingSocket } from 'sarvamai/dist/cjs/api/resources/speechToTextStreaming/client/Socket';
import { IStreamingSTT } from './voice-provider.interface';
/**
* Sarvam AI streaming STT provider.
* Uses Sarvam's `speechToTextStreaming` WebSocket with VAD signals enabled.
*
* Env: SARVAM_API_KEY, SARVAM_STT_LANGUAGE (default: en-IN), SARVAM_STT_MODEL (default: saarika:v2.5)
*/
@Injectable()
export class SarvamSttProvider implements IStreamingSTT {
readonly name = 'sarvam-stt';
private readonly logger = new Logger(SarvamSttProvider.name);
private ws!: SpeechToTextStreamingSocket;
private transcriptCb?: (text: string) => void;
private vadCb?: (event: any) => void;
private connected = false;
private readonly apiKey: string;
private readonly language: string;
private readonly model: string;
constructor(private readonly config: ConfigService) {
this.apiKey = this.config.get<string>('SARVAM_API_KEY', '');
this.language = this.config.get<string>('SARVAM_STT_LANGUAGE', 'en-IN');
this.model = this.config.get<string>('SARVAM_STT_MODEL', 'saarika:v2.5');
}
async connect(): Promise<void> {
if (!this.apiKey) {
this.logger.warn('SARVAM_API_KEY not set — STT will not work');
return;
}
try {
const client = new SarvamAIClient({ apiSubscriptionKey: this.apiKey });
this.logger.log('Connecting to Sarvam STT WebSocket...');
this.ws = await client.speechToTextStreaming.connect({
'Api-Subscription-Key': this.apiKey,
'language-code': this.language as any,
model: this.model as any,
high_vad_sensitivity: 'false',
vad_signals: 'true',
});
await this.ws.waitForOpen();
this.connected = true;
this.logger.log('Sarvam STT WebSocket connected');
this.ws.on('message', (msg: any) => {
try {
const data = typeof msg === 'string' ? JSON.parse(msg) : msg;
this.logger.debug(`STT message received: ${JSON.stringify(data).slice(0, 200)}`);
// Transcript events
if (data?.data?.transcript) {
this.transcriptCb?.(data.data.transcript);
}
// VAD events (speech_start / speech_end / etc.)
if (data?.type === 'events') {
this.vadCb?.(data.data);
}
} catch (parseErr) {
this.logger.error(`STT message parse error: ${parseErr}`);
}
});
this.ws.on('error', (err: any) => {
this.logger.error(`Sarvam STT WebSocket error: ${err}`);
this.connected = false;
});
this.ws.on('close', () => {
this.logger.warn('Sarvam STT WebSocket closed unexpectedly');
this.connected = false;
});
} catch (err) {
this.logger.error(`Failed to connect Sarvam STT: ${err}`);
this.connected = false;
throw err;
}
}
transcribe(audioBase64: string): void {
if (!this.connected || !this.ws) {
this.logger.warn('STT not connected — dropping audio chunk');
return;
}
try {
this.ws.transcribe({
audio: audioBase64,
encoding: 'audio/wav',
sample_rate: 16000,
});
} catch (err) {
this.logger.error(`STT transcribe error: ${err}`);
}
}
onTranscript(cb: (text: string) => void): void {
this.transcriptCb = cb;
}
onVadEvent(cb: (event: any) => void): void {
this.vadCb = cb;
}
close(): void {
this.connected = false;
this.ws?.close?.();
this.logger.log('Sarvam STT WebSocket closed');
}
}

View File

@ -0,0 +1,104 @@
import { Injectable, Logger } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import { SarvamAIClient } from 'sarvamai';
import { TextToSpeechStreamingSocket } from 'sarvamai/dist/cjs/api/resources/textToSpeechStreaming/client/Socket';
import { IStreamingTTS } from './voice-provider.interface';
/**
* Sarvam AI streaming TTS provider **Bulbul V3**.
*
* Key V3 changes vs V2:
* - `pitch` and `loudness` are REMOVED
* - `pace` (0.52.0) controls speech rate
* - `temperature` (0.012.0, default 0.6) controls expressiveness
* - preprocessing is always enabled
*
* Env: SARVAM_API_KEY, SARVAM_TTS_LANGUAGE, SARVAM_TTS_SPEAKER, SARVAM_TTS_PACE,
* SARVAM_TTS_TEMPERATURE, SARVAM_TTS_CODEC
*/
@Injectable()
export class SarvamTtsProvider implements IStreamingTTS {
readonly name = 'sarvam-tts';
private readonly logger = new Logger(SarvamTtsProvider.name);
private client!: SarvamAIClient;
private ws?: TextToSpeechStreamingSocket;
private audioCb?: (audioBase64: string) => void;
private errorCb?: (error: Error) => void;
private readonly apiKey: string;
private readonly language: string;
private readonly speaker: string;
private readonly pace: number;
private readonly temperature: number;
private readonly codec: string;
constructor(private readonly config: ConfigService) {
this.apiKey = this.config.get<string>('SARVAM_API_KEY', '');
this.language = this.config.get<string>('SARVAM_TTS_LANGUAGE', 'en-IN');
this.speaker = this.config.get<string>('SARVAM_TTS_SPEAKER', 'anushka');
this.pace = this.config.get<number>('SARVAM_TTS_PACE', 1.0);
this.temperature = this.config.get<number>('SARVAM_TTS_TEMPERATURE', 0.6);
this.codec = this.config.get<string>('SARVAM_TTS_CODEC', 'wav');
}
async connect(): Promise<void> {
if (!this.apiKey) {
this.logger.warn('SARVAM_API_KEY not set — TTS will not work');
return;
}
this.client = new SarvamAIClient({ apiSubscriptionKey: this.apiKey });
// Connect with Bulbul V3 model
this.ws = await this.client.textToSpeechStreaming.connect({
'Api-Subscription-Key': this.apiKey,
model: 'bulbul:v3' as any,
});
await this.ws.waitForOpen();
this.logger.log('Sarvam TTS WebSocket connected (bulbul:v3)');
// Configure connection with V3-specific parameters
this.ws.configureConnection({
target_language_code: this.language as any,
speaker: this.speaker as any,
pace: this.pace,
// V3: no pitch, no loudness — temperature controls expressiveness
min_buffer_size: 160,
output_audio_bitrate: '32k' as any,
speech_sample_rate: 8000,
max_chunk_length: 50,
output_audio_codec: this.codec as any,
} as any);
// Listen for audio chunks
this.ws.on('message', (msg: any) => {
try {
if (msg.type === 'audio' && msg.data?.audio) {
this.audioCb?.(msg.data.audio);
}
} catch (err) {
this.errorCb?.(err as Error);
}
});
}
async speak(text: string): Promise<void> {
if (!this.ws) throw new Error('Sarvam TTS not connected');
this.ws.convert(text);
this.ws.flush();
}
onAudio(cb: (audioBase64: string) => void): void {
this.audioCb = cb;
}
onError(cb: (error: Error) => void): void {
this.errorCb = cb;
}
close(): void {
this.ws?.close();
this.logger.log('Sarvam TTS WebSocket closed');
}
}

View File

@ -0,0 +1,3 @@
// This file has been replaced by sarvam-stt.provider.ts and sarvam-tts.provider.ts
// It can be safely deleted.
export { };

View File

@ -0,0 +1,49 @@
/**
* Streaming STT interface WebSocket-based real-time transcription.
* Providers maintain an open connection and stream transcripts back.
*/
export interface IStreamingSTT {
readonly name: string;
/** Open the WebSocket connection to the STT service */
connect(): Promise<void>;
/** Send a base64-encoded audio chunk to be transcribed */
transcribe(audioBase64: string): void;
/** Register callback for final transcript text */
onTranscript(cb: (text: string) => void): void;
/** Register callback for VAD events (speech_start, speech_end, etc.) */
onVadEvent?(cb: (event: any) => void): void;
/** Gracefully close the connection */
close(): void;
}
/**
* Streaming TTS interface WebSocket-based real-time speech synthesis.
* Providers stream audio chunks back as they are generated.
*/
export interface IStreamingTTS {
readonly name: string;
/** Open the WebSocket connection to the TTS service */
connect(): Promise<void>;
/** Send text to be converted to speech */
speak(text: string): Promise<void>;
/** Register callback for receiving audio chunks (base64) */
onAudio(cb: (audioBase64: string) => void): void;
/** Register callback for errors */
onError?(cb: (error: Error) => void): void;
/** Gracefully close the connection */
close(): void;
}
/** Injection tokens */
export const STT_PROVIDER = 'STT_PROVIDER';
export const TTS_PROVIDER = 'TTS_PROVIDER';

View File

@ -0,0 +1,192 @@
import { Logger } from '@nestjs/common';
import { Socket } from 'socket.io';
import { SarvamAIClient } from 'sarvamai';
import { SpeechToTextStreamingSocket } from 'sarvamai/dist/cjs/api/resources/speechToTextStreaming/client/Socket';
import { TextToSpeechStreamingSocket } from 'sarvamai/dist/cjs/api/resources/textToSpeechStreaming/client/Socket';
import { BrainService } from '../brain/brain.service';
import { InterviewSessionDocument } from './schemas/interview-session.schema';
import { pcm16ToWav } from '../utils/audio.utils';
export class SarvamClientHandler {
private readonly logger = new Logger(SarvamClientHandler.name);
private readonly apiKey = process.env.SARVAM_API_KEY || '';
private sttWs!: SpeechToTextStreamingSocket;
private ttsWs!: TextToSpeechStreamingSocket;
private active = false;
private userSpeaking = false;
constructor(
private readonly socket: Socket,
private readonly sessionId: string,
private readonly session: InterviewSessionDocument,
private readonly brainService: BrainService,
) { }
async init() {
this.active = true;
const sarvam = new SarvamAIClient({ apiSubscriptionKey: this.apiKey });
try {
this.sttWs = await sarvam.speechToTextStreaming.connect({
'Api-Subscription-Key': this.apiKey,
'language-code': 'en-IN',
model: 'saaras:v2.5' as any,
high_vad_sensitivity: 'false',
vad_signals: 'true',
} as any);
this.ttsWs = await sarvam.textToSpeechStreaming.connect({
'Api-Subscription-Key': this.apiKey,
model: 'bulbul:v3' as any,
} as any);
await Promise.all([this.sttWs.waitForOpen(), this.ttsWs.waitForOpen()]);
this.logger.log(`STT/TTS sockets ready for session ${this.sessionId}`);
this.setupSttHandlers();
this.setupTtsHandlers();
this.ttsWs.configureConnection({
target_language_code: 'en-IN',
speaker: 'anushka' as any,
pace: 1,
min_buffer_size: 160,
output_audio_bitrate: '32k' as any,
speech_sample_rate: 8000,
max_chunk_length: 50,
output_audio_codec: 'wav' as any,
} as any);
} catch (err) {
this.logger.error(`Failed to initialize Sarvam connections: ${err}`);
}
}
private setupSttHandlers() {
this.sttWs.on('message', async (msg: any) => {
if (!this.active) return;
const data = typeof msg === 'string' ? JSON.parse(msg) : msg;
if (data.type === 'events') {
const signal = data?.data?.signal_type;
if (signal === 'START_SPEECH') {
this.userSpeaking = true;
this.logger.debug(`User started speaking [${this.sessionId}]`);
} else if (signal === 'END_SPEECH') {
this.userSpeaking = false;
this.logger.debug(`User ended speaking [${this.sessionId}]`);
}
}
if (!data?.data?.transcript) return;
const transcript = data.data.transcript;
this.logger.log(`[${this.sessionId}] Sarvam STT returned transcript: "${transcript}"`);
this.socket.emit('ai-transcript', { text: transcript });
await this.processUserInput(transcript);
});
this.sttWs.on('error', (err) => this.logger.error(`STT error: ${err}`));
}
private setupTtsHandlers() {
this.ttsWs.on('message', (msg: any) => {
if (!this.active || msg.type !== 'audio') return;
if (this.userSpeaking) {
return; // Drop TTS audio if user is speaking
}
try {
if (msg.data?.audio) {
this.logger.log(`[${this.sessionId}] Sarvam TTS returned audio chunk of length: ${msg.data.audio.length}`);
this.socket.emit('ai-audio', Buffer.from(msg.data.audio, 'base64'));
}
} catch (err) {
this.logger.error(`TTS audio parsing error: ${err}`);
}
});
this.ttsWs.on('error', (err) => this.logger.error(`TTS error: ${err}`));
}
async processUserInput(text: string) {
this.logger.log(`[${this.sessionId}] User said: "${text.slice(0, 100)}..."`);
this.socket.emit('ai-state', { state: 'thinking' });
try {
this.session.transcriptLogs.push({
role: 'user',
text,
timestamp: new Date()
});
const aiResponse = await this.brainService.generateResponse(
this.sessionId,
this.session.candidateId.toString(),
text,
this.session.transcriptLogs,
);
this.session.transcriptLogs.push({
role: 'ai',
text: aiResponse,
timestamp: new Date()
});
await this.session.save();
this.socket.emit('ai-transcript', { text: aiResponse });
this.socket.emit('ai-state', { state: 'speaking' });
if (!this.userSpeaking) {
this.logger.log(`[${this.sessionId}] Calling Sarvam TTS for response...`);
this.ttsWs.convert(aiResponse);
this.ttsWs.flush();
} else {
this.logger.log(`[${this.sessionId}] Dropped TTS call because user is speaking.`);
}
setTimeout(() => {
this.socket.emit('ai-state', { state: 'listening' });
}, 500);
} catch (err) {
this.logger.error(`Pipeline error: ${err}`);
this.socket.emit('error', { message: 'Pipeline processing failed' });
this.socket.emit('ai-state', { state: 'listening' });
}
}
handleAudioChunk(audioBase64: string) {
if (!this.active || !this.sttWs || this.sttWs.readyState !== 1) return;
try {
// Sarvam STT expects audio/wav payload strings
// Our frontend streams raw PCM. Wrap it in a WAV header per chunk!
const rawPcm = Buffer.from(audioBase64, 'base64');
const wavBuffer = pcm16ToWav(rawPcm, 16000);
this.sttWs.transcribe({
audio: wavBuffer.toString('base64'),
encoding: 'audio/wav',
sample_rate: 16000,
});
} catch (err) {
this.logger.error(`Transcribe error: ${err}`);
}
}
cleanup() {
if (!this.active) return;
this.active = false;
try {
this.sttWs?.close?.();
this.ttsWs?.close?.();
this.logger.log(`Cleaned up Sarvam session ${this.sessionId}`);
} catch (err) {
this.logger.warn(`Cleanup error: ${err}`);
}
}
}

View File

@ -0,0 +1,28 @@
import { Prop, Schema, SchemaFactory } from '@nestjs/mongoose';
import { Document, Types } from 'mongoose';
export type ConversationStateDocument = ConversationState & Document;
@Schema({ timestamps: true })
export class ConversationState {
@Prop({ type: Types.ObjectId, ref: 'InterviewSession', required: true, unique: true })
sessionId: Types.ObjectId;
@Prop({
required: true,
enum: ['INTRODUCTION', 'EXPERIENCE_DEEP_DIVE', 'TECHNICAL_CORE', 'ODD_SKILL_PROBE', 'BEHAVIORAL', 'CONCLUSION'],
default: 'INTRODUCTION',
})
currentStrategy: string;
@Prop({ type: [String], default: [] })
oddSkillsDetected: string[];
@Prop({ default: 0 })
technicalQuestionsAsked: number;
@Prop({ default: '' })
lastQuestionAsked: string;
}
export const ConversationStateSchema = SchemaFactory.createForClass(ConversationState);

View File

@ -0,0 +1,72 @@
import { Prop, Schema, SchemaFactory } from '@nestjs/mongoose';
import { Document, Types } from 'mongoose';
/** A single turn in the conversation transcript */
export class TranscriptEntry {
@Prop({ required: true, enum: ['user', 'ai'] })
role: 'user' | 'ai';
@Prop({ required: true })
text: string;
@Prop({ default: () => new Date() })
timestamp: Date;
}
/** Structured evaluation rating for a single dimension */
export class EvaluationDimension {
@Prop({ min: 0, max: 10, default: 0 })
rating: number;
@Prop({ default: '' })
review_message: string;
}
/** Final evaluation output */
export class Evaluation {
@Prop({ type: EvaluationDimension, default: () => ({}) })
communication: EvaluationDimension;
@Prop({ type: EvaluationDimension, default: () => ({}) })
technical: EvaluationDimension;
@Prop({ type: EvaluationDimension, default: () => ({}) })
behaviour: EvaluationDimension;
@Prop({ type: EvaluationDimension, default: () => ({}) })
experience: EvaluationDimension;
}
export type InterviewSessionDocument = InterviewSession & Document;
@Schema({ timestamps: true })
export class InterviewSession {
@Prop({ type: Types.ObjectId, ref: 'Candidate', required: true })
candidateId: Types.ObjectId;
@Prop({
required: true,
enum: ['waiting', 'active', 'completed', 'cancelled'],
default: 'waiting',
})
status: string;
@Prop({
required: true,
enum: ['INTRODUCTION', 'EXPERIENCE_DEEP_DIVE', 'TECHNICAL_CORE', 'ODD_SKILL_PROBE', 'BEHAVIORAL', 'CONCLUSION'],
default: 'INTRODUCTION',
})
currentStage: string;
@Prop({ type: [TranscriptEntry], default: [] })
transcriptLogs: TranscriptEntry[];
@Prop({ default: false })
faceVerified: boolean;
@Prop({ type: Evaluation, default: () => ({}) })
evaluation: Evaluation;
}
export const InterviewSessionSchema =
SchemaFactory.createForClass(InterviewSession);

View File

@ -0,0 +1,106 @@
import { Injectable, Logger } from '@nestjs/common';
import { InjectModel } from '@nestjs/mongoose';
import { ConfigService } from '@nestjs/config';
import { Model } from 'mongoose';
import { Socket } from 'socket.io';
import {
InterviewSession,
InterviewSessionDocument,
} from '../schemas/interview-session.schema';
import { BrainService } from '../../brain/brain.service';
import { SarvamClientHandler } from '../sarvam.handler';
import { DeepgramClientHandler } from '../deepgram.handler';
@Injectable()
export class OrchestratorService {
private readonly logger = new Logger(OrchestratorService.name);
private sessions: Map<string, SarvamClientHandler | DeepgramClientHandler> = new Map();
private readonly providerName: string;
constructor(
@InjectModel(InterviewSession.name)
private readonly sessionModel: Model<InterviewSessionDocument>,
private readonly configService: ConfigService,
private readonly brainService: BrainService,
) {
this.providerName = this.configService.get<string>('VOICE_PROVIDER', 'deepgram');
}
async createSession(candidateId: string): Promise<InterviewSessionDocument> {
const session = new this.sessionModel({
candidateId,
status: 'active',
currentStage: 'INTRODUCTION',
});
return session.save();
}
async getSession(sessionId: string): Promise<InterviewSessionDocument> {
const session = await this.sessionModel.findById(sessionId).exec();
if (!session) throw new Error(`Session ${sessionId} not found`);
return session;
}
async initStreaming(
sessionId: string,
socket: Socket,
): Promise<void> {
const session = await this.getSession(sessionId);
let handler: SarvamClientHandler | DeepgramClientHandler;
if (this.providerName === 'sarvam') {
handler = new SarvamClientHandler(socket, sessionId, session, this.brainService);
} else {
handler = new DeepgramClientHandler(socket, sessionId, session, this.brainService);
}
await handler.init();
this.sessions.set(sessionId, handler);
this.logger.log(`[${sessionId}] Streaming initialized with ${this.providerName}`);
}
streamAudioChunk(sessionId: string, audioBase64: string): void {
const handler = this.sessions.get(sessionId);
if (!handler) {
this.logger.warn(`[${sessionId}] No active streaming session`);
return;
}
handler.handleAudioChunk(audioBase64);
}
async triggerPipeline(sessionId: string): Promise<void> {
const handler = this.sessions.get(sessionId);
if (!handler) return;
// Since the pipeline handles variables itself via websocket, manual trigger might not be needed
// but if the frontend sends "end-of-speech", we let Deepgram or Sarvam's silence handle it
this.logger.log(`Manual trigger pipeline called on ${sessionId}`);
}
async advanceStage(sessionId: string): Promise<string> {
const session = await this.getSession(sessionId);
const stages = ['INTRODUCTION', 'EXPERIENCE_DEEP_DIVE', 'TECHNICAL_CORE', 'ODD_SKILL_PROBE', 'BEHAVIORAL', 'CONCLUSION'];
const currentIdx = stages.indexOf(session.currentStage);
const nextStage = stages[Math.min(currentIdx + 1, stages.length - 1)];
session.currentStage = nextStage;
await session.save();
this.logger.log(`[${sessionId}] Advanced to stage: ${nextStage}`);
return nextStage;
}
async endSession(sessionId: string): Promise<void> {
const handler = this.sessions.get(sessionId);
if (handler) {
handler.cleanup();
this.sessions.delete(sessionId);
}
const session = await this.getSession(sessionId);
session.status = 'completed';
await session.save();
this.logger.log(`[${sessionId}] Session completed`);
}
}

28
server/src/main.ts Normal file
View File

@ -0,0 +1,28 @@
import { NestFactory } from '@nestjs/core';
import { ValidationPipe } from '@nestjs/common';
import { AppModule } from './app.module';
async function bootstrap() {
const app = await NestFactory.create(AppModule);
// Global validation pipe for DTO validation
app.useGlobalPipes(
new ValidationPipe({
whitelist: true,
forbidNonWhitelisted: true,
transform: true,
}),
);
// CORS for frontend
app.enableCors({
origin: process.env.CORS_ORIGIN || 'http://localhost:5173',
credentials: true,
});
const port = process.env.PORT || 3001;
await app.listen(port);
console.log(`🚀 AI Interviewer server running on http://localhost:${port}`);
}
bootstrap();

2
server/src/types.d.ts vendored Normal file
View File

@ -0,0 +1,2 @@
declare module 'pdf-parse';
declare module 'canvas';

View File

@ -0,0 +1,29 @@
export function pcm16ToWav(pcm16Buffer: Buffer, sampleRate = 16000) {
const numChannels = 1;
const bitsPerSample = 16;
const byteRate = (sampleRate * numChannels * bitsPerSample) / 8;
const blockAlign = (numChannels * bitsPerSample) / 8;
const wavBuffer = Buffer.alloc(44 + pcm16Buffer.length);
// RIFF header
wavBuffer.write('RIFF', 0);
wavBuffer.writeUInt32LE(36 + pcm16Buffer.length, 4);
wavBuffer.write('WAVE', 8);
// fmt chunk
wavBuffer.write('fmt ', 12);
wavBuffer.writeUInt32LE(16, 16); // Subchunk1Size (PCM)
wavBuffer.writeUInt16LE(1, 20); // PCM format
wavBuffer.writeUInt16LE(numChannels, 22);
wavBuffer.writeUInt32LE(sampleRate, 24);
wavBuffer.writeUInt32LE(byteRate, 28);
wavBuffer.writeUInt16LE(blockAlign, 32);
wavBuffer.writeUInt16LE(bitsPerSample, 34);
// data chunk
wavBuffer.write('data', 36);
wavBuffer.writeUInt32LE(pcm16Buffer.length, 40);
pcm16Buffer.copy(wavBuffer, 44);
return wavBuffer;
}

View File

@ -0,0 +1,4 @@
{
"extends": "./tsconfig.json",
"exclude": ["node_modules", "test", "dist", "**/*spec.ts"]
}

24
server/tsconfig.json Normal file
View File

@ -0,0 +1,24 @@
{
"compilerOptions": {
"module": "commonjs",
"declaration": true,
"removeComments": true,
"emitDecoratorMetadata": true,
"experimentalDecorators": true,
"allowSyntheticDefaultImports": true,
"target": "ES2021",
"sourceMap": true,
"outDir": "./dist",
"baseUrl": "./",
"incremental": true,
"skipLibCheck": true,
"strictNullChecks": true,
"noImplicitAny": true,
"strictBindCallApply": true,
"forceConsistentCasingInFileNames": true,
"noFallthroughCasesInSwitch": true,
"paths": {
"@/*": ["src/*"]
}
}
}