import dspy from dotenv import load_dotenv load_dotenv() from langchain_community.vectorstores import Chroma from langchain_openai import OpenAIEmbeddings persist_directory = "embeddings_db" lm = dspy.LM("openai/gpt-4o-mini") dspy.configure(lm=lm) embedding_function = OpenAIEmbeddings(model="text-embedding-3-small") vectordb = Chroma( persist_directory=persist_directory, embedding_function=embedding_function ) retriever = vectordb.as_retriever() def retrieve(inputs): docs = retriever.invoke(inputs["question"]) return docs def get_source_pages(docs): source_pages = [] for doc in docs: section = doc.metadata["source"].split("/")[-2] page = doc.metadata["source"].split("/")[-1].split(".")[0] source_pages.append(f"{section} - {page}") source_pages = list(set(source_pages)) return source_pages class COT_RAG(dspy.Module): def __init__(self): self.respond = dspy.ChainOfThought("context, question -> response") def forward(self, question): question_ = "Given the context from Cory Booker's speech, please answer the question below." question_ += f"\n\nQuestion: {question}\n\nStart your answer by specifying this was from Senator Booker." docs = retrieve({"question": question}) self.docs = docs context = [doc.page_content for doc in docs] return self.respond(context=context, question=question_) def answer_question(question): rag = COT_RAG() answer = rag.forward(question) return answer.response