import os from operator import itemgetter import chainlit as cl from dotenv import load_dotenv from langchain.retrievers.contextual_compression import ContextualCompressionRetriever from langchain_cohere import CohereRerank from langchain_core.prompts import ChatPromptTemplate from langchain_core.runnables import RunnableLambda from langchain_google_genai import GoogleGenerativeAIEmbeddings from langchain_openai import ChatOpenAI from langchain_qdrant import FastEmbedSparse, QdrantVectorStore, RetrievalMode from qdrant_client import QdrantClient load_dotenv() client = QdrantClient( url=os.environ["QDRANT_URL"], api_key=os.environ["QDRANT_API_KEY"] ) embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-exp-03-07") vectorstore = QdrantVectorStore( client=client, collection_name="collection_v_0_3", embedding=embeddings, sparse_embedding=FastEmbedSparse(model_name="Qdrant/bm25"), retrieval_mode=RetrievalMode.HYBRID, vector_name="dense", sparse_vector_name="sparse", ) retriever = vectorstore.as_retriever(search_kwargs={"k": 20}) compressor = CohereRerank(model="rerank-v3.5") compression_retriever = ContextualCompressionRetriever( base_compressor=compressor, base_retriever=retriever ) prompt = """\ You are a helpful and kind assistant. Use the context provided below to answer the question. If you do not know the answer, or are unsure, say you don't know. Query: {question} Context: {context} """ def format_docs(docs): return [doc.page_content for doc in docs] llm_model = ChatOpenAI(model="gpt-4.1-mini", temperature=0, streaming=True) chain = ( { "context": itemgetter("question") | compression_retriever | RunnableLambda(format_docs), "question": itemgetter("question"), } | ChatPromptTemplate.from_template(prompt) | llm_model ) @cl.on_chat_start async def start_chat(): await cl.Message(content="Ask your questions...").send() @cl.on_message async def main(message: cl.Message): response = cl.Message("") await response.send() async for chunk in chain.astream({"question": message.content}): response.content += chunk.content await response.update()