# qa_system.py from langchain.vectorstores import Pinecone from langchain_openai import OpenAIEmbeddings, ChatOpenAI import pinecone from langchain_pinecone import PineconeVectorStore from langchain.chains import create_retrieval_chain from langchain.chains.combine_documents import create_stuff_documents_chain from langchain_core.prompts import ChatPromptTemplate class PineconeQA: def __init__(self, pinecone_api_key, openai_api_key, index_name): # Initialize Pinecone self.pc = pinecone.Pinecone(api_key=pinecone_api_key) self.index = self.pc.Index(index_name) # Initialize embeddings self.embeddings = OpenAIEmbeddings( openai_api_key=openai_api_key ) # Create retriever self.retriever = PineconeVectorStore( index=self.index, embedding=self.embeddings ) # Initialize LLM self.llm = ChatOpenAI( openai_api_key=openai_api_key, model="gpt-4o", temperature=0.2 ) # Create the RAG chain self._create_rag_chain() def _create_rag_chain(self): # Define system prompt # system_prompt = ( # "You are an assistant for question-answering tasks. " # "Use the following pieces of retrieved context to answer " # "the question. If you don't know the answer, say that you " # "don't know. Use three sentences maximum and keep the " # "answer concise." # "\n\n" # "{context}" # ) system_prompt = ( "You are an expert assistant for biomedical question-answering tasks. " "You will be provided with context retrieved from medical literature." "The medical literature is all from PubMed Open Access Articles. " "Use this context to answer the question as accurately as possible. " "The response might not be added precisly, so try to derive the answers from it as much as possible." "If the context does not contain the required information, explain why. " "Provide a concise and accurate answer " "\n\n" "Context:\n{context}\n" ) # Create chat prompt template prompt = ChatPromptTemplate.from_messages([ ("system", system_prompt), ("human", "{input}"), ]) # Create question-answer chain question_answer_chain = create_stuff_documents_chain( self.llm, prompt ) # Create the RAG chain self.rag_chain = create_retrieval_chain( self.retriever.as_retriever(search_type="mmr"), question_answer_chain ) def merge_relevant_chunks(self, retrieved_docs, question, max_tokens=1500): """ Merge document chunks based on their semantic relevance to the question. """ merged_context = "" current_tokens = 0 for doc in retrieved_docs: tokens = doc.page_content.split() if current_tokens + len(tokens) <= max_tokens: merged_context += doc.page_content + "\n" current_tokens += len(tokens) else: break return merged_context def ask(self, question): """ Ask a question and get response with sources """ # Initialize conversation history if it doesn't exist if not hasattr(self, "conversation_history"): self.conversation_history = [] try: system_prompt = ( "You are an expert assistant for biomedical question-answering tasks. " "You will be provided with context retrieved from medical literature, specifically PubMed Open Access Articles. " "Use the provided context to directly answer the question in the most accurate and concise manner possible. " "If the context does not provide sufficient information, state that the specific details are not available in the context." "Do not include statements about limitations of the context in your response. " "Your answer should sound authoritative and professional, tailored for a medical audience." "\n\n" "Context:\n{context}\n" ) # Create chat prompt template prompt = ChatPromptTemplate.from_messages([ ("system", system_prompt), ("human", "{input}"), ]) # Create question-answer chain question_answer_chain = create_stuff_documents_chain( self.llm, prompt ) results = create_retrieval_chain( self.retriever.as_retriever(seach_type="mmr"), question_answer_chain ).invoke({"input": question}) return { "answer": results["answer"], "context": results["context"] } except Exception as e: return { "error": str(e) }