Spaces:
Sleeping
Sleeping
import openai | |
import gradio as gr | |
from langchain.chains import RetrievalQA | |
from langchain.llms import OpenAI | |
from langchain.document_loaders import PyPDFLoader | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.vectorstores import FAISS | |
from langchain.chat_models import ChatOpenAI | |
from PyPDF2 import PdfReader | |
#Function to load and process the PDF document | |
def load_pdf(file): | |
#Load the PDF usign Langchain's PyPDFLoader | |
loader=PyPDFLoader(file.name) | |
documents=loader.load() | |
return documents | |
# Summarization function using GPT-4 | |
def summarize_pdf(file,openai_api_key): | |
#set the openAI API key dynamically | |
openai.api_key="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" | |
# Load and process the PDF | |
documents=load_pdf(file) | |
# Create embeddings for the documents | |
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key) | |
# Use Langchain's FAISS Vector Store to store and search the embeddings | |
vector_store=FAISS.from_documents(documents,embeddings) | |
# Create a RetrievalQA chain for summarization | |
llm = ChatOpenAI(model='gpt-40', openai_api_key=openai_api_key) #passing api key here | |
qa_chain=RetrivalQA.from_chain_type( | |
llm=llm, | |
chain_type="stuff", | |
retriever=vector_store.as_retriever() | |
) | |
# Query the model for a summary of the document | |
response = qa_chain.run("Summarize the content of the research paper.") | |
return response | |
#Function to handle user queries and provide answers from the document | |
def query_pdf(file,user_query,openai_api_key): | |
#set the openai api key dynamically | |
openai.api_key=openai_api_key | |
#Load and process the PDF | |
documents = load_pdf(file) | |
# Create embeddings for the documents | |
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key) | |
# Use Langchain's FAISS vector store to store and search the embeddings | |
vector_store = FAISS.from_documents(documents, embeddings) | |
# Create a RetrievalQA chain for querying the document | |
llm=ChatOpenAI(model="gpt-40", openai_api_key=openai_api_key) #passing api key here | |
qa_chain=RetrivalQA.from_chain_type( | |
llm=llm, | |
chain_type="stuff", | |
retriever=vector_store.as_retriever() | |
) | |
# Query the model for the user query | |
response = qa_chain.run(user_query) | |
return response | |
# Function to handle user queries and provide answers from the document | |
def query_pdf(file, user_query, openai_api_key): | |
# Set the OpenAI API key dynamically | |
openai.api_key = openai_api_key | |
# Load and process the PDF | |
documents = load_pdf(file) | |
# Create embeddings for the documents | |
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key) | |
# Use LangChain's FAISS Vector Store to store and search the embeddings | |
vector_store = FAISS.from_documents(documents, embeddings) | |
# Create a RetrievalQA chain for querying the document | |
llm = ChatOpenAI(model="gpt-4o", openai_api_key=openai_api_key) # Passing API key here | |
qa_chain = RetrievalQA.from_chain_type( | |
llm=llm, | |
chain_type="stuff", | |
retriever=vector_store.as_retriever() | |
) | |
# Query the model for the user query | |
response = qa_chain.run(user_query) | |
return response | |
# Define Gradio interface for the summarization | |
def create_gradio_interface(): | |
with gr.Blocks() as demo: | |
gr.Markdown("### ChatPDF and Research Paper Summarizer using GPT-4 and LangChain") | |
# Input field for API Key | |
with gr.Row(): | |
openai_api_key_input = gr.Textbox(label="Enter OpenAI API Key", type="password", placeholder="Enter your OpenAI API key here") | |
with gr.Tab("Summarize PDF"): | |
with gr.Row(): | |
pdf_file = gr.File(label="Upload PDF Document") | |
summarize_btn = gr.Button("Summarize") | |
summary_output = gr.Textbox(label="Summary", interactive=False) | |
clear_btn_summary = gr.Button("Clear Response") | |
# Summarize Button Logic | |
summarize_btn.click(summarize_pdf, inputs=[pdf_file, openai_api_key_input], outputs=summary_output) | |
# Clear Response Button Logic for Summary Tab | |
clear_btn_summary.click(lambda: "", inputs=[], outputs=summary_output) | |
with gr.Tab("Ask Questions"): | |
with gr.Row(): | |
pdf_file_q = gr.File(label="Upload PDF Document") | |
user_input = gr.Textbox(label="Enter your question") | |
answer_output = gr.Textbox(label="Answer", interactive=False) | |
clear_btn_answer = gr.Button("Clear Response") | |
# Submit Question Logic | |
user_input.submit(query_pdf, inputs=[pdf_file_q, user_input, openai_api_key_input], outputs=answer_output) | |
# Clear Response Button Logic for Answer Tab | |
clear_btn_answer.click(lambda: "", inputs=[], outputs=answer_output) | |
user_input.submit(None, None, answer_output) # Clear answer when typing new query | |
return demo | |
# Run Gradio app | |
if __name__ == "__main__": | |
demo = create_gradio_interface() | |
demo.launch(debug=True) | |