Spaces:
Sleeping
Sleeping
File size: 5,388 Bytes
3521cd8 d4deb71 3521cd8 d4deb71 d2ae2fe 3521cd8 563dd93 3521cd8 0e10e9c 3521cd8 f440171 3521cd8 f440171 3521cd8 f440171 3521cd8 f440171 3521cd8 f440171 3521cd8 f440171 3521cd8 f440171 3521cd8 f440171 3521cd8 f440171 3521cd8 f440171 3521cd8 f440171 3521cd8 f440171 3521cd8 f440171 3521cd8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
import openai
import gradio as gr
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from PyPDF2 import PdfReader
#Function to load and process the PDF document
def load_pdf(file):
#Load the PDF usign Langchain's PyPDFLoader
loader=PyPDFLoader(file.name)
documents=loader.load()
return documents
# Summarization function using GPT-4
def summarize_pdf(file,openai_api_key):
#set the openAI API key dynamically
openai.api_key="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"
# Load and process the PDF
documents=load_pdf(file)
# Create embeddings for the documents
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
# Use Langchain's FAISS Vector Store to store and search the embeddings
vector_store=FAISS.from_documents(documents,embeddings)
# Create a RetrievalQA chain for summarization
llm = ChatOpenAI(model='gpt-40', openai_api_key=openai_api_key) #passing api key here
qa_chain=RetrivalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=vector_store.as_retriever()
)
# Query the model for a summary of the document
response = qa_chain.run("Summarize the content of the research paper.")
return response
#Function to handle user queries and provide answers from the document
def query_pdf(file,user_query,openai_api_key):
#set the openai api key dynamically
openai.api_key=openai_api_key
#Load and process the PDF
documents = load_pdf(file)
# Create embeddings for the documents
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
# Use Langchain's FAISS vector store to store and search the embeddings
vector_store = FAISS.from_documents(documents, embeddings)
# Create a RetrievalQA chain for querying the document
llm=ChatOpenAI(model="gpt-40", openai_api_key=openai_api_key) #passing api key here
qa_chain=RetrivalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=vector_store.as_retriever()
)
# Query the model for the user query
response = qa_chain.run(user_query)
return response
# Function to handle user queries and provide answers from the document
def query_pdf(file, user_query, openai_api_key):
# Set the OpenAI API key dynamically
openai.api_key = openai_api_key
# Load and process the PDF
documents = load_pdf(file)
# Create embeddings for the documents
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
# Use LangChain's FAISS Vector Store to store and search the embeddings
vector_store = FAISS.from_documents(documents, embeddings)
# Create a RetrievalQA chain for querying the document
llm = ChatOpenAI(model="gpt-4o", openai_api_key=openai_api_key) # Passing API key here
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=vector_store.as_retriever()
)
# Query the model for the user query
response = qa_chain.run(user_query)
return response
# Define Gradio interface for the summarization
def create_gradio_interface():
with gr.Blocks() as demo:
gr.Markdown("### ChatPDF and Research Paper Summarizer using GPT-4 and LangChain")
# Input field for API Key
with gr.Row():
openai_api_key_input = gr.Textbox(label="Enter OpenAI API Key", type="password", placeholder="Enter your OpenAI API key here")
with gr.Tab("Summarize PDF"):
with gr.Row():
pdf_file = gr.File(label="Upload PDF Document")
summarize_btn = gr.Button("Summarize")
summary_output = gr.Textbox(label="Summary", interactive=False)
clear_btn_summary = gr.Button("Clear Response")
# Summarize Button Logic
summarize_btn.click(summarize_pdf, inputs=[pdf_file, openai_api_key_input], outputs=summary_output)
# Clear Response Button Logic for Summary Tab
clear_btn_summary.click(lambda: "", inputs=[], outputs=summary_output)
with gr.Tab("Ask Questions"):
with gr.Row():
pdf_file_q = gr.File(label="Upload PDF Document")
user_input = gr.Textbox(label="Enter your question")
answer_output = gr.Textbox(label="Answer", interactive=False)
clear_btn_answer = gr.Button("Clear Response")
# Submit Question Logic
user_input.submit(query_pdf, inputs=[pdf_file_q, user_input, openai_api_key_input], outputs=answer_output)
# Clear Response Button Logic for Answer Tab
clear_btn_answer.click(lambda: "", inputs=[], outputs=answer_output)
user_input.submit(None, None, answer_output) # Clear answer when typing new query
return demo
# Run Gradio app
if __name__ == "__main__":
demo = create_gradio_interface()
demo.launch(debug=True)
|