File size: 5,388 Bytes
3521cd8
 
 
d4deb71
3521cd8
 
 
d4deb71
d2ae2fe
3521cd8
563dd93
 
3521cd8
 
 
 
 
 
 
 
 
 
0e10e9c
3521cd8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f440171
 
 
 
3521cd8
f440171
 
3521cd8
f440171
 
3521cd8
f440171
 
3521cd8
f440171
 
 
 
 
 
 
3521cd8
f440171
 
 
3521cd8
f440171
 
 
 
 
 
 
 
3521cd8
f440171
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3521cd8
f440171
 
3521cd8
f440171
 
3521cd8
f440171
 
3521cd8
 
 
f440171
3521cd8
 
 
f440171
3521cd8
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import openai
import gradio as gr
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from PyPDF2 import PdfReader



#Function to load and process the PDF document
def load_pdf(file):
    #Load the PDF usign Langchain's PyPDFLoader
    loader=PyPDFLoader(file.name)
    documents=loader.load()
    return documents

# Summarization function using GPT-4
def summarize_pdf(file,openai_api_key):
    #set the openAI API key dynamically
    openai.api_key="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"

    # Load and process the PDF
    documents=load_pdf(file)


    # Create embeddings for the documents
    embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

    # Use Langchain's FAISS Vector Store to store and search the embeddings
    vector_store=FAISS.from_documents(documents,embeddings)

    # Create a RetrievalQA chain for summarization
    llm = ChatOpenAI(model='gpt-40', openai_api_key=openai_api_key)   #passing api key here
    qa_chain=RetrivalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=vector_store.as_retriever()
    )


    # Query the model for a summary of the document
    response = qa_chain.run("Summarize the content of the research paper.")
    return response


#Function to handle user queries and provide answers from the document
def query_pdf(file,user_query,openai_api_key):
    #set the openai api key dynamically
    openai.api_key=openai_api_key

    #Load and process the PDF
    documents = load_pdf(file)

    # Create embeddings for the documents
    embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

    # Use Langchain's FAISS vector store to store and search the embeddings
    vector_store = FAISS.from_documents(documents, embeddings)

    # Create a RetrievalQA chain for querying the document
    llm=ChatOpenAI(model="gpt-40", openai_api_key=openai_api_key)  #passing api key here
    qa_chain=RetrivalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=vector_store.as_retriever()
    )

    # Query the model for the user query
    response = qa_chain.run(user_query)
    return response

# Function to handle user queries and provide answers from the document
def query_pdf(file, user_query, openai_api_key):
    # Set the OpenAI API key dynamically
    openai.api_key = openai_api_key

    # Load and process the PDF
    documents = load_pdf(file)

    # Create embeddings for the documents
    embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

    # Use LangChain's FAISS Vector Store to store and search the embeddings
    vector_store = FAISS.from_documents(documents, embeddings)

    # Create a RetrievalQA chain for querying the document
    llm = ChatOpenAI(model="gpt-4o", openai_api_key=openai_api_key)  # Passing API key here
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=vector_store.as_retriever()
    )

    # Query the model for the user query
    response = qa_chain.run(user_query)
    return response

# Define Gradio interface for the summarization
def create_gradio_interface():
    with gr.Blocks() as demo:
        gr.Markdown("### ChatPDF and Research Paper Summarizer using GPT-4 and LangChain")
        
        # Input field for API Key
        with gr.Row():
            openai_api_key_input = gr.Textbox(label="Enter OpenAI API Key", type="password", placeholder="Enter your OpenAI API key here")

        with gr.Tab("Summarize PDF"):
            with gr.Row():
                pdf_file = gr.File(label="Upload PDF Document")
                summarize_btn = gr.Button("Summarize")
                summary_output = gr.Textbox(label="Summary", interactive=False)
                clear_btn_summary = gr.Button("Clear Response")

            # Summarize Button Logic
            summarize_btn.click(summarize_pdf, inputs=[pdf_file, openai_api_key_input], outputs=summary_output)

            # Clear Response Button Logic for Summary Tab
            clear_btn_summary.click(lambda: "", inputs=[], outputs=summary_output)
        
        with gr.Tab("Ask Questions"):
            with gr.Row():
                pdf_file_q = gr.File(label="Upload PDF Document")
                user_input = gr.Textbox(label="Enter your question")
                answer_output = gr.Textbox(label="Answer", interactive=False)
                clear_btn_answer = gr.Button("Clear Response")

            # Submit Question Logic
            user_input.submit(query_pdf, inputs=[pdf_file_q, user_input, openai_api_key_input], outputs=answer_output)

            # Clear Response Button Logic for Answer Tab
            clear_btn_answer.click(lambda: "", inputs=[], outputs=answer_output)

            user_input.submit(None, None, answer_output)  # Clear answer when typing new query
    
    return demo

# Run Gradio app
if __name__ == "__main__":
    demo = create_gradio_interface()
    demo.launch(debug=True)