nsultan5 commited on
Commit
62b0098
·
verified ·
1 Parent(s): a09f38b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -90
app.py CHANGED
@@ -1,110 +1,35 @@
1
- import openai
2
  import gradio as gr
3
- from langchain.chains import RetrievalQA
4
- from langchain.llms import OpenAI
5
  from langchain.document_loaders import PyPDFLoader
6
  from langchain.embeddings.openai import OpenAIEmbeddings
7
  from langchain.vectorstores import FAISS
8
  from langchain.chat_models import ChatOpenAI
9
- from PyPDF2 import PdfReader
10
 
11
- # Function to load and process multiple PDFs
12
- def load_pdfs(files):
13
- documents = []
14
  for file in files:
15
  loader = PyPDFLoader(file.name)
16
- documents.extend(loader.load()) # Append documents from each file
17
- return documents
18
-
19
- # Summarization function using GPT-4 for multiple PDFs
20
- def summarize_pdfs(files, openai_api_key):
21
- openai.api_key = openai_api_key # Set OpenAI API key
22
-
23
- # Load and process the PDFs
24
- documents = load_pdfs(files)
25
-
26
- # Create embeddings for the documents
27
- embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
28
-
29
- # Use Langchain's FAISS Vector Store to store and search the embeddings
30
- vector_store = FAISS.from_documents(documents, embeddings)
31
-
32
- # Create a RetrievalQA chain for summarization
33
- llm = ChatOpenAI(model='gpt-4o', openai_api_key=openai_api_key)
34
- qa_chain = RetrievalQA.from_chain_type(
35
- llm=llm,
36
- chain_type="stuff",
37
- retriever=vector_store.as_retriever()
38
- )
39
-
40
- # Query the model for a summary of all PDFs
41
- response = qa_chain.run("Summarize the content of the research papers.")
42
- return response
43
-
44
- # Function to handle user queries for multiple PDFs
45
- def query_pdfs(files, user_query, openai_api_key):
46
- openai.api_key = openai_api_key # Set OpenAI API key
47
 
48
- # Load and process the PDFs
49
- documents = load_pdfs(files)
50
-
51
- # Create embeddings for the documents
52
- embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
53
-
54
- # Use LangChain's FAISS Vector Store to store and search the embeddings
55
- vector_store = FAISS.from_documents(documents, embeddings)
56
-
57
- # Create a RetrievalQA chain for querying the documents
58
- llm = ChatOpenAI(model="gpt-4o", openai_api_key=openai_api_key)
59
- qa_chain = RetrievalQA.from_chain_type(
60
- llm=llm,
61
- chain_type="stuff",
62
- retriever=vector_store.as_retriever()
63
- )
64
-
65
- # Query the model for the user query
66
- response = qa_chain.run(user_query)
67
- return response
68
-
69
- # Define Gradio interface for handling multiple PDFs
70
  def create_gradio_interface():
 
71
  with gr.Blocks() as demo:
72
- gr.Markdown("### Multi-PDF Chat and Research Paper Summarizer using GPT-4 and LangChain")
73
 
74
- # Input field for API Key
75
- with gr.Row():
76
- openai_api_key_input = gr.Textbox(label="Enter OpenAI API Key", type="password", placeholder="Enter your OpenAI API key here")
77
 
78
- with gr.Tab("Summarize PDFs"):
79
- with gr.Row():
80
- pdf_files = gr.File(label="Upload PDF Documents", file_types=[".pdf"])
81
- summarize_btn = gr.Button("Summarize")
82
- summary_output = gr.Textbox(label="Summary", interactive=False)
83
- clear_btn_summary = gr.Button("Clear Response")
84
 
85
- # Summarize Button Logic
86
- summarize_btn.click(summarize_pdfs, inputs=[pdf_files, openai_api_key_input], outputs=summary_output)
87
 
88
- # Clear Response Button Logic for Summary Tab
89
- clear_btn_summary.click(lambda: "", inputs=[], outputs=summary_output)
90
-
91
- with gr.Tab("Ask Questions"):
92
- with gr.Row():
93
- pdf_files_q = gr.File(label="Upload PDF Documents", file_types=[".pdf"], multiple=True)
94
- user_input = gr.Textbox(label="Enter your question")
95
- answer_output = gr.Textbox(label="Answer", interactive=False)
96
- query_btn = gr.Button("Ask")
97
- clear_btn_answer = gr.Button("Clear Response")
98
-
99
- # Submit Question Logic
100
- query_btn.click(query_pdfs, inputs=[pdf_files_q, user_input, openai_api_key_input], outputs=answer_output)
101
-
102
- # Clear Response Button Logic for Answer Tab
103
- clear_btn_answer.click(lambda: "", inputs=[], outputs=answer_output)
104
 
105
  return demo
106
 
107
- # Run Gradio app
108
  if __name__ == "__main__":
109
  demo = create_gradio_interface()
110
- demo.launch(debug=True)
 
 
 
1
  import gradio as gr
 
 
2
  from langchain.document_loaders import PyPDFLoader
3
  from langchain.embeddings.openai import OpenAIEmbeddings
4
  from langchain.vectorstores import FAISS
5
  from langchain.chat_models import ChatOpenAI
6
+ from langchain.llms import OpenAI
7
 
8
+ def process_pdfs(files):
9
+ """Process uploaded PDFs and return extracted text."""
10
+ texts = []
11
  for file in files:
12
  loader = PyPDFLoader(file.name)
13
+ docs = loader.load()
14
+ texts.append("\n".join([doc.page_content for doc in docs]))
15
+ return "\n\n".join(texts)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  def create_gradio_interface():
18
+ """Create and launch the Gradio interface."""
19
  with gr.Blocks() as demo:
20
+ gr.Markdown("# PDF Text Extractor")
21
 
22
+ pdf_files = gr.Files(label="Upload PDF Documents", type="file") # Fixed multiple file issue
 
 
23
 
24
+ output_text = gr.Textbox(label="Extracted Text", lines=10)
 
 
 
 
 
25
 
26
+ extract_button = gr.Button("Extract Text")
 
27
 
28
+ extract_button.click(process_pdfs, inputs=[pdf_files], outputs=[output_text])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  return demo
31
 
 
32
  if __name__ == "__main__":
33
  demo = create_gradio_interface()
34
+ demo.launch()
35
+