lakshmi324 commited on
Commit
203e2cd
·
1 Parent(s): e866a92

initial upload

Browse files
Files changed (1) hide show
  1. app.py +61 -0
app.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from langchain.chains import RetrievalQA
4
+ from langchain.llms import OpenAI
5
+ from langchain.document_loaders import PyPDFLoader
6
+ from langchain.text_splitter import CharacterTextSplitter
7
+ from langchain.embeddings import OpenAIEmbeddings
8
+ from langchain.vectorstores import Chroma
9
+
10
+ def qa_system(pdf_file, openai_key, prompt, chain_type, k):
11
+ os.environ["OPENAI_API_KEY"] = openai_key
12
+
13
+ # load document
14
+ loader = PyPDFLoader(pdf_file.name)
15
+ documents = loader.load()
16
+
17
+ # split the documents into chunks
18
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
19
+ texts = text_splitter.split_documents(documents)
20
+
21
+ # select which embeddings we want to use
22
+ embeddings = OpenAIEmbeddings()
23
+
24
+ # create the vectorestore to use as the index
25
+ db = Chroma.from_documents(texts, embeddings)
26
+
27
+ # expose this index in a retriever interface
28
+ retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": k})
29
+
30
+ # create a chain to answer questions
31
+ qa = RetrievalQA.from_chain_type(
32
+ llm=OpenAI(), chain_type=chain_type, retriever=retriever, return_source_documents=True)
33
+
34
+ # get the result
35
+ result = qa({"query": prompt})
36
+ return result['result'], [doc.page_content for doc in result["source_documents"]]
37
+
38
+ # define the Gradio interface
39
+ input_file = gr.inputs.File(label="PDF File")
40
+ openai_key = gr.inputs.Textbox(label="OpenAI API Key")
41
+ prompt = gr.inputs.Textbox(label="Question Prompt")
42
+ chain_type = gr.inputs.Radio(['stuff', 'map_reduce', "refine", "map_rerank"], label="Chain Type")
43
+ k = gr.inputs.Slider(minimum=1, maximum=5, default=1, label="Number of Relevant Chunks")
44
+
45
+ output_text = gr.outputs.Textbox(label="Answer")
46
+ output_docs = gr.outputs.Textbox(label="Relevant Source Text")
47
+
48
+ gr.Interface(qa_system, inputs=[input_file, openai_key, prompt, chain_type, k], outputs=[output_text, output_docs],
49
+ title="Question Answering with PDF File and OpenAI",
50
+ description="Upload a PDF file, enter your OpenAI API key, type a question prompt, select a chain type, and choose the number of relevant chunks to use for the answer.").launch(debug = True)
51
+
52
+
53
+
54
+
55
+
56
+
57
+
58
+
59
+
60
+
61
+