FASTAPI-FAISS-RAG/app/services/llm.py

import httpx
from app.core.config import settings
import logging
import time

logging.basicConfig(level=logging.INFO)

def log_latency(func):
    def wrapper(*args, **kwargs):
        start_time = time.time()
        result = func(*args, **kwargs)
        end_time = time.time()
        latency_ms = (end_time - start_time) * 1000
        logging.info(f"Latency for {func.__name__}: {latency_ms:.2f} ms")
        return result
    return wrapper

@log_latency
async def chat_completion(prompt: str) -> str:
    """Get completion from OpenRouter API."""
    headers = {
        "Authorization": f"Bearer {settings.OPENROUTER_API_KEY}",
        "Content-Type": "application/json"
    }
    data = {
        "model": "google/gemma-3-27b-it:free",
        "messages": [
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ],
        "temperature": 0.1,
    }
    async with httpx.AsyncClient(timeout=120) as client:
        r = await client.post(
            "https://openrouter.ai/api/v1/chat/completions",
            headers=headers,
            json=data
        )
        r.raise_for_status()
        return r.json()["choices"][0]["message"]["content"]