Spaces:
Sleeping
Sleeping
import os | |
from operator import itemgetter | |
import chainlit as cl | |
from dotenv import load_dotenv | |
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever | |
from langchain_cohere import CohereRerank | |
from langchain_core.prompts import ChatPromptTemplate | |
from langchain_core.runnables import RunnableLambda | |
from langchain_google_genai import GoogleGenerativeAIEmbeddings | |
from langchain_openai import ChatOpenAI | |
from langchain_qdrant import FastEmbedSparse, QdrantVectorStore, RetrievalMode | |
from qdrant_client import QdrantClient | |
load_dotenv() | |
client = QdrantClient( | |
url=os.environ["QDRANT_URL"], api_key=os.environ["QDRANT_API_KEY"] | |
) | |
embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-exp-03-07") | |
vectorstore = QdrantVectorStore( | |
client=client, | |
collection_name="collection_v_0_3", | |
embedding=embeddings, | |
sparse_embedding=FastEmbedSparse(model_name="Qdrant/bm25"), | |
retrieval_mode=RetrievalMode.HYBRID, | |
vector_name="dense", | |
sparse_vector_name="sparse", | |
) | |
retriever = vectorstore.as_retriever(search_kwargs={"k": 20}) | |
compressor = CohereRerank(model="rerank-v3.5") | |
compression_retriever = ContextualCompressionRetriever( | |
base_compressor=compressor, base_retriever=retriever | |
) | |
prompt = """\ | |
You are a helpful and kind assistant. Use the context provided below to answer the question. | |
If you do not know the answer, or are unsure, say you don't know. | |
Query: | |
{question} | |
Context: | |
{context} | |
""" | |
def format_docs(docs): | |
return [doc.page_content for doc in docs] | |
llm_model = ChatOpenAI(model="gpt-4.1-mini", temperature=0, streaming=True) | |
chain = ( | |
{ | |
"context": itemgetter("question") | |
| compression_retriever | |
| RunnableLambda(format_docs), | |
"question": itemgetter("question"), | |
} | |
| ChatPromptTemplate.from_template(prompt) | |
| llm_model | |
) | |
async def start_chat(): | |
await cl.Message(content="Ask your questions...").send() | |
async def main(message: cl.Message): | |
response = cl.Message("") | |
await response.send() | |
async for chunk in chain.astream({"question": message.content}): | |
response.content += chunk.content | |
await response.update() | |