Compare commits

..

1 Commits

Author SHA1 Message Date
ad9e4b553a faiss-index 2025-05-19 19:20:43 +05:30
11 changed files with 404 additions and 107 deletions

2
.gitignore vendored
View File

@ -144,6 +144,8 @@ venv.bak/
# Rope project settings # Rope project settings
.ropeproject .ropeproject
config.py
# mkdocs documentation # mkdocs documentation
/site /site

80
app.py
View File

@ -1,12 +1,11 @@
import os import os
import uuid import json
import numpy as np import numpy as np
import faiss import faiss
import httpx import httpx
from fastapi import FastAPI, UploadFile, Form from fastapi import FastAPI, HTTPException
from langchain_community.document_loaders import Docx2txtLoader from langchain_community.document_loaders import Docx2txtLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.text_splitter import RecursiveCharacterTextSplitter
from pydantic import BaseModel
from dotenv import load_dotenv from dotenv import load_dotenv
from sentence_transformers import SentenceTransformer from sentence_transformers import SentenceTransformer
@ -19,6 +18,10 @@ OPENROUTER_API_KEY = "sk-or-v1-7420d7a6c2f5ab366682e2270c543a1802399485ec0a56c4f
app = FastAPI(title="RAG-via-FastAPI") app = FastAPI(title="RAG-via-FastAPI")
# Create necessary directories
os.makedirs("indices", exist_ok=True)
os.makedirs("chunks", exist_ok=True)
# --- helpers --------------------------------------------------------- # --- helpers ---------------------------------------------------------
def docx_to_chunks(file_path): def docx_to_chunks(file_path):
@ -39,6 +42,28 @@ def build_faiss_index(vectors):
index.add(vectors) index.add(vectors)
return index return index
def save_index(index, doc_id):
index_path = f"indices/{doc_id}.index"
faiss.write_index(index, index_path)
def save_chunks(chunks, doc_id):
chunks_path = f"chunks/{doc_id}.json"
with open(chunks_path, "w") as f:
json.dump(chunks, f)
def load_index(doc_id):
index_path = f"indices/{doc_id}.index"
if not os.path.exists(index_path):
raise HTTPException(status_code=404, detail=f"Index not found for doc_id: {doc_id}")
return faiss.read_index(index_path)
def load_chunks(doc_id):
chunks_path = f"chunks/{doc_id}.json"
if not os.path.exists(chunks_path):
raise HTTPException(status_code=404, detail=f"Chunks not found for doc_id: {doc_id}")
with open(chunks_path, "r") as f:
return json.load(f)
async def openrouter_chat(prompt): async def openrouter_chat(prompt):
headers = { headers = {
"Authorization": f"Bearer {OPENROUTER_API_KEY}", "Authorization": f"Bearer {OPENROUTER_API_KEY}",
@ -46,7 +71,7 @@ async def openrouter_chat(prompt):
"Content-Type": "application/json" "Content-Type": "application/json"
} }
data = { data = {
"model": "google/gemma-3-27b-it:free", # any OpenRouter-hosted model "model": "google/gemma-3-27b-it:free",
"messages": [ "messages": [
{"role": "system", "content": "You are a helpful assistant."}, {"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt} {"role": "user", "content": prompt}
@ -63,50 +88,3 @@ async def openrouter_chat(prompt):
r.raise_for_status() r.raise_for_status()
return r.json()["choices"][0]["message"]["content"] return r.json()["choices"][0]["message"]["content"]
# --- request / response models --------------------------------------
class QueryRequest(BaseModel):
query: str
class RAGResponse(BaseModel):
answer: str
sources: list[int]
# --- endpoints ------------------------------------------------------
@app.post("/rag", response_model=RAGResponse)
async def rag_endpoint(file: UploadFile, query: str = Form(...)):
# Create temp directory if it doesn't exist
os.makedirs("temp", exist_ok=True)
# Use a local temp directory instead of /tmp for Windows compatibility
tmp_path = f"temp/{uuid.uuid4()}.docx"
try:
# Save uploaded file
with open(tmp_path, "wb") as f:
f.write(await file.read())
# 1. chunk
chunks = docx_to_chunks(tmp_path)
# 2. embed
embeddings = get_embeddings(chunks)
# 3. search
index = build_faiss_index(embeddings)
q_vec = get_embeddings([query])
distances, idxs = index.search(q_vec, k=3)
context = "\n\n".join(chunks[i] for i in idxs[0])
# 4. generate
prompt = (f"Use ONLY the context to answer the question.\n\n"
f"Context:\n{context}\n\nQuestion: {query}\n\nAnswer:")
answer = await openrouter_chat(prompt)
return {"answer": answer, "sources": idxs[0].tolist()}
finally:
# Cleanup temp file
if os.path.exists(tmp_path):
os.remove(tmp_path)

View File

@ -1,3 +1,9 @@
""" """
RAG (Retrieval Augmented Generation) application package. RAG (Retrieval Augmented Generation) application package.
""" """
from fastapi import FastAPI
from app.api.routes import router
app = FastAPI(title="RAG-via-FastAPI")
app.include_router(router)

View File

@ -0,0 +1,3 @@
"""
API routes package.
"""

View File

@ -1,72 +1,109 @@
import os import os
import uuid import uuid
import time from fastapi import APIRouter, UploadFile, HTTPException
import logging from pydantic import BaseModel
from fastapi import APIRouter, UploadFile, Form from typing import List, Optional
from app.core.models import RAGResponse
from app.services.document import docx_to_chunks from app.core import (
from app.services.embedding import embedding_service docx_to_chunks,
from app.services.llm import chat_completion get_embeddings,
from app.core.config import settings build_faiss_index,
save_index,
save_chunks,
load_index,
load_chunks,
openrouter_chat
)
logging.basicConfig(level=logging.INFO)
router = APIRouter() router = APIRouter()
def log_latency(func): # --- request / response models --------------------------------------
def wrapper(*args, **kwargs):
start_time = time.time()
result = func(*args, **kwargs)
end_time = time.time()
latency_ms = (end_time - start_time) * 1000
logging.info(f"Latency for {func.__name__}: {latency_ms:.2f} ms")
return result
return wrapper
@log_latency class UploadResponse(BaseModel):
def search_faiss(index, query_vector, k, chunks): # Added chunks parameter message: str
"""Search the FAISS index for similar vectors.""" doc_id: str
distances, idxs = index.search(query_vector, k)
# Log top k results with their distances and text preview class QueryRequest(BaseModel):
for i in range(k): doc_id: str
chunk_text = chunks[idxs[0][i]] query: str
preview = ' '.join(chunk_text.split()[:20]) # Get first 50 words top_k: Optional[int] = 5
logging.info(f"Top {i+1} result - Index: {idxs[0][i]}, Distance: {distances[0][i]:.4f}")
logging.info(f"Text preview: {preview}...") class QueryResponse(BaseModel):
return distances, idxs answer: str
sources: List[int]
class DocumentsResponse(BaseModel):
documents: List[str]
# --- endpoints ------------------------------------------------------
@router.get("/documents", response_model=DocumentsResponse)
async def list_documents():
"""List all indexed documents."""
try:
# Get all .index files from the indices directory
doc_ids = [
os.path.splitext(f)[0] # Remove .index extension
for f in os.listdir("indices")
if f.endswith(".index")
]
return {"documents": doc_ids}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.post("/upload", response_model=UploadResponse)
async def upload_endpoint(file: UploadFile):
if not file.filename.endswith('.docx'):
raise HTTPException(status_code=400, detail="Only .docx files are supported")
# Generate doc_id from filename
doc_id = str(uuid.uuid5(uuid.NAMESPACE_DNS, file.filename))
@router.post("/rag", response_model=RAGResponse)
async def rag_endpoint(file: UploadFile, query: str = Form(...)):
# Create temp directory if it doesn't exist # Create temp directory if it doesn't exist
os.makedirs("temp", exist_ok=True) os.makedirs("temp", exist_ok=True)
tmp_path = f"temp/{doc_id}.docx"
# Use a local temp directory for Windows compatibility
tmp_path = f"temp/{uuid.uuid4()}.docx"
try: try:
# Save uploaded file # Save uploaded file
with open(tmp_path, "wb") as f: with open(tmp_path, "wb") as f:
f.write(await file.read()) f.write(await file.read())
# 1. chunk # Process document
chunks = docx_to_chunks(tmp_path) chunks = docx_to_chunks(tmp_path)
embeddings = get_embeddings(chunks)
# 2. embed # Save index and chunks
embeddings = embedding_service.get_embeddings(chunks) index = build_faiss_index(embeddings)
save_index(index, doc_id)
save_chunks(chunks, doc_id)
# 3. search return {"message": "Document indexed", "doc_id": doc_id}
index = embedding_service.build_faiss_index(embeddings)
q_vec = embedding_service.get_embeddings([query])
distances, idxs = search_faiss(index, q_vec, settings.TOP_K_RESULTS, chunks)
# 4. generate
context = "\n\n".join(chunks[i] for i in idxs[0])
prompt = (f"Use ONLY the context to answer the question.\n\n"
f"Context:\n{context}\n\nQuestion: {query}\n\nAnswer:")
answer = await chat_completion(prompt)
return {"answer": answer, "sources": idxs[0].tolist()}
finally: finally:
# Cleanup temp file # Cleanup temp file
if os.path.exists(tmp_path): if os.path.exists(tmp_path):
os.remove(tmp_path) os.remove(tmp_path)
@router.post("/query", response_model=QueryResponse)
async def query_endpoint(request: QueryRequest):
try:
# Load stored data
index = load_index(request.doc_id)
chunks = load_chunks(request.doc_id)
# Process query
q_vec = get_embeddings([request.query])
distances, idxs = index.search(q_vec, k=request.top_k)
# Get relevant chunks
context = "\n\n".join(chunks[i] for i in idxs[0])
# Generate answer
prompt = (f"Use ONLY the context to answer the question.\n\n"
f"Context:\n{context}\n\nQuestion: {request.query}\n\nAnswer:")
answer = await openrouter_chat(prompt)
return {"answer": answer, "sources": idxs[0].tolist()}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))

85
app/core.py Normal file
View File

@ -0,0 +1,85 @@
import os
import json
import numpy as np
import faiss
import httpx
from fastapi import HTTPException
from langchain_community.document_loaders import Docx2txtLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from dotenv import load_dotenv
from sentence_transformers import SentenceTransformer
# Load environment variables
load_dotenv()
# Initialize the embedding model
MODEL = SentenceTransformer("all-MiniLM-L6-v2")
OPENROUTER_API_KEY = "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"
# Create necessary directories
os.makedirs("indices", exist_ok=True)
os.makedirs("chunks", exist_ok=True)
def docx_to_chunks(file_path):
pages = Docx2txtLoader(file_path).load()
chunks = RecursiveCharacterTextSplitter(
chunk_size=1000, chunk_overlap=200
).split_documents(pages)
return [c.page_content for c in chunks]
def get_embeddings(texts):
# Generate embeddings using SentenceTransformer
embeddings = MODEL.encode(texts, convert_to_tensor=False)
return np.array(embeddings, dtype="float32")
def build_faiss_index(vectors):
dim = vectors.shape[1]
index = faiss.IndexFlatL2(dim)
index.add(vectors)
return index
def save_index(index, doc_id):
index_path = f"indices/{doc_id}.index"
faiss.write_index(index, index_path)
def save_chunks(chunks, doc_id):
chunks_path = f"chunks/{doc_id}.json"
with open(chunks_path, "w") as f:
json.dump(chunks, f)
def load_index(doc_id):
index_path = f"indices/{doc_id}.index"
if not os.path.exists(index_path):
raise HTTPException(status_code=404, detail=f"Index not found for doc_id: {doc_id}")
return faiss.read_index(index_path)
def load_chunks(doc_id):
chunks_path = f"chunks/{doc_id}.json"
if not os.path.exists(chunks_path):
raise HTTPException(status_code=404, detail=f"Chunks not found for doc_id: {doc_id}")
with open(chunks_path, "r") as f:
return json.load(f)
async def openrouter_chat(prompt):
headers = {
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
"HTTP-Referer": "https://yourapp.example",
"Content-Type": "application/json"
}
data = {
"model": "google/gemma-3-27b-it:free",
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
],
"temperature": 0.3,
"max_tokens": 512
}
async with httpx.AsyncClient(timeout=120) as client:
r = await client.post(
"https://openrouter.ai/api/v1/chat/completions",
headers=headers,
json=data
)
r.raise_for_status()
return r.json()["choices"][0]["message"]["content"]

View File

@ -0,0 +1,89 @@
"""
Core functionality for the RAG application.
"""
import os
import json
import numpy as np
import faiss
import httpx
from fastapi import HTTPException
from langchain_community.document_loaders import Docx2txtLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from dotenv import load_dotenv
from sentence_transformers import SentenceTransformer
# Load environment variables
load_dotenv()
# Initialize the embedding model
MODEL = SentenceTransformer("all-MiniLM-L6-v2")
OPENROUTER_API_KEY = "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"
# Create necessary directories
os.makedirs("indices", exist_ok=True)
os.makedirs("chunks", exist_ok=True)
def docx_to_chunks(file_path):
pages = Docx2txtLoader(file_path).load()
chunks = RecursiveCharacterTextSplitter(
chunk_size=1000, chunk_overlap=200
).split_documents(pages)
return [c.page_content for c in chunks]
def get_embeddings(texts):
# Generate embeddings using SentenceTransformer
embeddings = MODEL.encode(texts, convert_to_tensor=False)
return np.array(embeddings, dtype="float32")
def build_faiss_index(vectors):
dim = vectors.shape[1]
index = faiss.IndexFlatL2(dim)
index.add(vectors)
return index
def save_index(index, doc_id):
index_path = f"indices/{doc_id}.index"
faiss.write_index(index, index_path)
def save_chunks(chunks, doc_id):
chunks_path = f"chunks/{doc_id}.json"
with open(chunks_path, "w") as f:
json.dump(chunks, f)
def load_index(doc_id):
index_path = f"indices/{doc_id}.index"
if not os.path.exists(index_path):
raise HTTPException(status_code=404, detail=f"Index not found for doc_id: {doc_id}")
return faiss.read_index(index_path)
def load_chunks(doc_id):
chunks_path = f"chunks/{doc_id}.json"
if not os.path.exists(chunks_path):
raise HTTPException(status_code=404, detail=f"Chunks not found for doc_id: {doc_id}")
with open(chunks_path, "r") as f:
return json.load(f)
async def openrouter_chat(prompt):
headers = {
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
"HTTP-Referer": "https://yourapp.example",
"Content-Type": "application/json"
}
data = {
"model": "google/gemma-3-27b-it:free",
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
],
"temperature": 0.3,
"max_tokens": 512
}
async with httpx.AsyncClient(timeout=120) as client:
r = await client.post(
"https://openrouter.ai/api/v1/chat/completions",
headers=headers,
json=data
)
r.raise_for_status()
return r.json()["choices"][0]["message"]["content"]

View File

@ -0,0 +1 @@
["Refund for Pay on Delivery Orders:\n\nFor Pay on Delivery orders, refunds will be processed either to your bank account (via National Electronic Funds Transfer - NEFT) or to your Amazon account (as Amazon Pay balance).\n\nTo receive the refund to your bank account, update your bank account details via the Returns Centre when returning an item.\n\nSteps to add bank account:\n\nThrough Mobile App:\n\nGo to Orders\n\nClick on the order you want to return\n\nSelect Return or Replace items\n\nChoose Refund to your bank account\n\nSelect Choose a bank account\n\nSelect Add a new bank account and enter the bank details\n\nThrough Website:\n\nGo to Orders\n\nClick on the order you want to return\n\nSelect Return or Replacement items\n\nChoose Refund to your bank account\n\nSelect Choose a bank account\n\nSelect Add a new bank account and enter the bank details\n\nNote: Refunds cannot be processed to third-party accounts. The name on your Amazon account should match the name of the bank account holder.\n\n\n\nPaper Cheque Clearing:", "Note: Refunds cannot be processed to third-party accounts. The name on your Amazon account should match the name of the bank account holder.\n\n\n\nPaper Cheque Clearing:\n\nAll cheque refunds will be in the form of \"at par\" Deutsche Bank cheques. These cheques are cleared locally in the following cities:\n\nAhmedabad, Aurangabad, Bangalore, Chennai, Delhi, Gurgaon, Kolhapur, Kolkata, Ludhiana, Moradabad, Mumbai, Noida, Pune, Salem, Surat, Vellore\n\nIf presenting the cheque in another city:\n\nEnsure the cheque is sent for outstation clearing\n\nIf dropping the cheque in a clearance box:\n\nIn cities listed above, use the Local Cheques box\n\nIn other cities, use the Outstation Cheques box\n\nFailing to follow these instructions may result in the cheque not being processed and a penalty.\n\nNote: Once a cheque is issued, Amazon will email tracking details of the refund cheque within 4 business days from the date of refund.\n\n\n\nShipping Cost Refunds:", "Note: Once a cheque is issued, Amazon will email tracking details of the refund cheque within 4 business days from the date of refund.\n\n\n\nShipping Cost Refunds:\n\nFor Fulfilled by Amazon and Prime Eligible items, return shipping costs up to Rs. 100 will be refunded.\n\nGift-wrapping charges, if any, will also be refunded.\n\nRefunds in such cases will be issued via cheque.\n\nNote: For return shipping charges over Rs.100 (for large/heavy items), contact Amazon for an additional refund with proof of payment (e.g., courier receipt).\n\nFor Seller-Fulfilled items:\n\nYou can request the seller to reimburse return shipping costs.\n\nThe seller may ask for the courier receipt as proof."]

Binary file not shown.

View File

@ -1,5 +1,5 @@
from fastapi import FastAPI import uvicorn
from app.api.routes import router from app import app
app = FastAPI(title="RAG-via-FastAPI") if __name__ == "__main__":
app.include_router(router) uvicorn.run(app, host="0.0.0.0", port=8000)

96
routes.py Normal file
View File

@ -0,0 +1,96 @@
import os
import uuid
import json
import numpy as np
import faiss
import httpx
from fastapi import APIRouter, UploadFile, HTTPException
from pydantic import BaseModel
from typing import List, Optional
from app import (
docx_to_chunks,
get_embeddings,
build_faiss_index,
save_index,
save_chunks,
load_index,
load_chunks,
openrouter_chat
)
router = APIRouter()
# --- request / response models --------------------------------------
class UploadResponse(BaseModel):
message: str
doc_id: str
class QueryRequest(BaseModel):
doc_id: str
query: str
top_k: Optional[int] = 5
class QueryResponse(BaseModel):
answer: str
sources: List[int]
# --- endpoints ------------------------------------------------------
@router.post("/upload", response_model=UploadResponse)
async def upload_endpoint(file: UploadFile):
if not file.filename.endswith('.docx'):
raise HTTPException(status_code=400, detail="Only .docx files are supported")
# Generate doc_id from filename
doc_id = str(uuid.uuid5(uuid.NAMESPACE_DNS, file.filename))
# Create temp directory if it doesn't exist
os.makedirs("temp", exist_ok=True)
tmp_path = f"temp/{doc_id}.docx"
try:
# Save uploaded file
with open(tmp_path, "wb") as f:
f.write(await file.read())
# Process document
chunks = docx_to_chunks(tmp_path)
embeddings = get_embeddings(chunks)
# Save index and chunks
index = build_faiss_index(embeddings)
save_index(index, doc_id)
save_chunks(chunks, doc_id)
return {"message": "Document indexed", "doc_id": doc_id}
finally:
# Cleanup temp file
if os.path.exists(tmp_path):
os.remove(tmp_path)
@router.post("/query", response_model=QueryResponse)
async def query_endpoint(request: QueryRequest):
try:
# Load stored data
index = load_index(request.doc_id)
chunks = load_chunks(request.doc_id)
# Process query
q_vec = get_embeddings([request.query])
distances, idxs = index.search(q_vec, k=request.top_k)
# Get relevant chunks
context = "\n\n".join(chunks[i] for i in idxs[0])
# Generate answer
prompt = (f"Use ONLY the context to answer the question.\n\n"
f"Context:\n{context}\n\nQuestion: {request.query}\n\nAnswer:")
answer = await openrouter_chat(prompt)
return {"answer": answer, "sources": idxs[0].tolist()}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))