AlexFoxalt's picture
Init commit
64f63d2
import os
from operator import itemgetter
import chainlit as cl
from dotenv import load_dotenv
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
from langchain_cohere import CohereRerank
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableLambda
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_openai import ChatOpenAI
from langchain_qdrant import FastEmbedSparse, QdrantVectorStore, RetrievalMode
from qdrant_client import QdrantClient
load_dotenv()
client = QdrantClient(
url=os.environ["QDRANT_URL"], api_key=os.environ["QDRANT_API_KEY"]
)
embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-exp-03-07")
vectorstore = QdrantVectorStore(
client=client,
collection_name="collection_v_0_3",
embedding=embeddings,
sparse_embedding=FastEmbedSparse(model_name="Qdrant/bm25"),
retrieval_mode=RetrievalMode.HYBRID,
vector_name="dense",
sparse_vector_name="sparse",
)
retriever = vectorstore.as_retriever(search_kwargs={"k": 20})
compressor = CohereRerank(model="rerank-v3.5")
compression_retriever = ContextualCompressionRetriever(
base_compressor=compressor, base_retriever=retriever
)
prompt = """\
You are a helpful and kind assistant. Use the context provided below to answer the question.
If you do not know the answer, or are unsure, say you don't know.
Query:
{question}
Context:
{context}
"""
def format_docs(docs):
return [doc.page_content for doc in docs]
llm_model = ChatOpenAI(model="gpt-4.1-mini", temperature=0, streaming=True)
chain = (
{
"context": itemgetter("question")
| compression_retriever
| RunnableLambda(format_docs),
"question": itemgetter("question"),
}
| ChatPromptTemplate.from_template(prompt)
| llm_model
)
@cl.on_chat_start
async def start_chat():
await cl.Message(content="Ask your questions...").send()
@cl.on_message
async def main(message: cl.Message):
response = cl.Message("")
await response.send()
async for chunk in chain.astream({"question": message.content}):
response.content += chunk.content
await response.update()