Spaces:
Sleeping
Sleeping
import dspy | |
from dotenv import load_dotenv | |
load_dotenv() | |
from langchain_community.vectorstores import Chroma | |
from langchain_openai import OpenAIEmbeddings | |
persist_directory = "embeddings_db" | |
lm = dspy.LM("openai/gpt-4o-mini") | |
dspy.configure(lm=lm) | |
embedding_function = OpenAIEmbeddings(model="text-embedding-3-small") | |
vectordb = Chroma( | |
persist_directory=persist_directory, embedding_function=embedding_function | |
) | |
retriever = vectordb.as_retriever() | |
def retrieve(inputs): | |
docs = retriever.invoke(inputs["question"]) | |
return docs | |
def get_source_pages(docs): | |
source_pages = [] | |
for doc in docs: | |
section = doc.metadata["source"].split("/")[-2] | |
page = doc.metadata["source"].split("/")[-1].split(".")[0] | |
source_pages.append(f"{section} - {page}") | |
source_pages = list(set(source_pages)) | |
return source_pages | |
class COT_RAG(dspy.Module): | |
def __init__(self): | |
self.respond = dspy.ChainOfThought("context, question -> response") | |
def forward(self, question): | |
question_ = "Given the context from Cory Booker's speech, please answer the question below." | |
question_ += f"\n\nQuestion: {question}\n\nStart your answer by specifying this was from Senator Booker." | |
docs = retrieve({"question": question}) | |
self.docs = docs | |
context = [doc.page_content for doc in docs] | |
return self.respond(context=context, question=question_) | |
def answer_question(question): | |
rag = COT_RAG() | |
answer = rag.forward(question) | |
return answer.response | |