from langchain_community.document_loaders import Docx2txtLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from app.core.config import settings import time import logging logging.basicConfig(level=logging.INFO) def log_latency(func): def wrapper(*args, **kwargs): start_time = time.time() result = func(*args, **kwargs) end_time = time.time() latency_ms = (end_time - start_time) * 1000 logging.info(f"Latency for {func.__name__}: {latency_ms:.2f} ms") return result return wrapper @log_latency def docx_to_chunks(file_path: str) -> list[str]: """Convert a DOCX file to text chunks.""" pages = Docx2txtLoader(file_path).load() chunks = RecursiveCharacterTextSplitter( chunk_size=settings.CHUNK_SIZE, chunk_overlap=settings.CHUNK_OVERLAP ).split_documents(pages) return [c.page_content for c in chunks]