nsultan5 commited on
Commit
c38223a
·
verified ·
1 Parent(s): 6cb9908

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +110 -0
app.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+ import gradio as gr
3
+ from langchain.chains import RetrievalQA
4
+ from langchain.llms import OpenAI
5
+ from langchain.document_loaders import PyPDFLoader
6
+ from langchain.embeddings.openai import OpenAIEmbeddings
7
+ from langchain.vectorstores import FAISS
8
+ from langchain.chat_models import ChatOpenAI
9
+ from PyPDF2 import PdfReader
10
+
11
+ # Function to load and process multiple PDFs
12
+ def load_pdfs(files):
13
+ documents = []
14
+ for file in files:
15
+ loader = PyPDFLoader(file.name)
16
+ documents.extend(loader.load()) # Append documents from each file
17
+ return documents
18
+
19
+ # Summarization function using GPT-4 for multiple PDFs
20
+ def summarize_pdfs(files, openai_api_key):
21
+ openai.api_key = openai_api_key # Set OpenAI API key
22
+
23
+ # Load and process the PDFs
24
+ documents = load_pdfs(files)
25
+
26
+ # Create embeddings for the documents
27
+ embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
28
+
29
+ # Use Langchain's FAISS Vector Store to store and search the embeddings
30
+ vector_store = FAISS.from_documents(documents, embeddings)
31
+
32
+ # Create a RetrievalQA chain for summarization
33
+ llm = ChatOpenAI(model='gpt-4o', openai_api_key=openai_api_key)
34
+ qa_chain = RetrievalQA.from_chain_type(
35
+ llm=llm,
36
+ chain_type="stuff",
37
+ retriever=vector_store.as_retriever()
38
+ )
39
+
40
+ # Query the model for a summary of all PDFs
41
+ response = qa_chain.run("Summarize the content of the research papers.")
42
+ return response
43
+
44
+ # Function to handle user queries for multiple PDFs
45
+ def query_pdfs(files, user_query, openai_api_key):
46
+ openai.api_key = openai_api_key # Set OpenAI API key
47
+
48
+ # Load and process the PDFs
49
+ documents = load_pdfs(files)
50
+
51
+ # Create embeddings for the documents
52
+ embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
53
+
54
+ # Use LangChain's FAISS Vector Store to store and search the embeddings
55
+ vector_store = FAISS.from_documents(documents, embeddings)
56
+
57
+ # Create a RetrievalQA chain for querying the documents
58
+ llm = ChatOpenAI(model="gpt-4o", openai_api_key=openai_api_key)
59
+ qa_chain = RetrievalQA.from_chain_type(
60
+ llm=llm,
61
+ chain_type="stuff",
62
+ retriever=vector_store.as_retriever()
63
+ )
64
+
65
+ # Query the model for the user query
66
+ response = qa_chain.run(user_query)
67
+ return response
68
+
69
+ # Define Gradio interface for handling multiple PDFs
70
+ def create_gradio_interface():
71
+ with gr.Blocks() as demo:
72
+ gr.Markdown("### Multi-PDF Chat and Research Paper Summarizer using GPT-4 and LangChain")
73
+
74
+ # Input field for API Key
75
+ with gr.Row():
76
+ openai_api_key_input = gr.Textbox(label="Enter OpenAI API Key", type="password", placeholder="Enter your OpenAI API key here")
77
+
78
+ with gr.Tab("Summarize PDFs"):
79
+ with gr.Row():
80
+ pdf_files = gr.File(label="Upload PDF Documents", file_types=[".pdf"], multiple=True)
81
+ summarize_btn = gr.Button("Summarize")
82
+ summary_output = gr.Textbox(label="Summary", interactive=False)
83
+ clear_btn_summary = gr.Button("Clear Response")
84
+
85
+ # Summarize Button Logic
86
+ summarize_btn.click(summarize_pdfs, inputs=[pdf_files, openai_api_key_input], outputs=summary_output)
87
+
88
+ # Clear Response Button Logic for Summary Tab
89
+ clear_btn_summary.click(lambda: "", inputs=[], outputs=summary_output)
90
+
91
+ with gr.Tab("Ask Questions"):
92
+ with gr.Row():
93
+ pdf_files_q = gr.File(label="Upload PDF Documents", file_types=[".pdf"], multiple=True)
94
+ user_input = gr.Textbox(label="Enter your question")
95
+ answer_output = gr.Textbox(label="Answer", interactive=False)
96
+ query_btn = gr.Button("Ask")
97
+ clear_btn_answer = gr.Button("Clear Response")
98
+
99
+ # Submit Question Logic
100
+ query_btn.click(query_pdfs, inputs=[pdf_files_q, user_input, openai_api_key_input], outputs=answer_output)
101
+
102
+ # Clear Response Button Logic for Answer Tab
103
+ clear_btn_answer.click(lambda: "", inputs=[], outputs=answer_output)
104
+
105
+ return demo
106
+
107
+ # Run Gradio app
108
+ if __name__ == "__main__":
109
+ demo = create_gradio_interface()
110
+ demo.launch(debug=True)