lakshmi324 commited on
Commit
5234b7a
·
1 Parent(s): 2665d9f

Upload app.py

Browse files

updated the gradio inputs

Files changed (1) hide show
  1. app.py +30 -37
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- import os
3
  from langchain.chains import RetrievalQA
4
  from langchain.llms import OpenAI
5
  from langchain.document_loaders import PyPDFLoader
@@ -7,55 +7,48 @@ from langchain.text_splitter import CharacterTextSplitter
7
  from langchain.embeddings import OpenAIEmbeddings
8
  from langchain.vectorstores import Chroma
9
 
10
- def qa_system(pdf_file, openai_key, prompt, chain_type, k):
11
  os.environ["OPENAI_API_KEY"] = openai_key
12
-
13
- # load document
14
- loader = PyPDFLoader(pdf_file.name)
15
- documents = loader.load()
16
-
17
- # split the documents into chunks
18
- text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
19
- texts = text_splitter.split_documents(documents)
20
-
 
 
 
 
21
  # select which embeddings we want to use
22
  embeddings = OpenAIEmbeddings()
23
-
24
  # create the vectorestore to use as the index
25
  db = Chroma.from_documents(texts, embeddings)
26
-
27
  # expose this index in a retriever interface
28
  retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": k})
29
-
30
- # create a chain to answer questions
31
  qa = RetrievalQA.from_chain_type(
32
  llm=OpenAI(), chain_type=chain_type, retriever=retriever, return_source_documents=True)
33
-
34
  # get the result
35
  result = qa({"query": prompt})
36
- return result['result'], ''.join(doc.page_content for doc in result["source_documents"])
37
 
38
  # define the Gradio interface
39
- input_file = gr.inputs.File(label="PDF File")
40
- openai_key = gr.inputs.Textbox(label="OpenAI API Key", type="password")
41
- prompt = gr.inputs.Textbox(label="Question Prompt")
42
- chain_type = gr.inputs.Radio(['stuff', 'map_reduce', "refine", "map_rerank"],default = 'map_reduce',label="Chain Type")
43
- k = gr.inputs.Slider(minimum=1, maximum=5, default=2, label="Number of Relevant Chunks")
44
 
45
- output_text = gr.outputs.Textbox(label="Answer")
46
- output_docs = gr.outputs.Textbox(label="Relevant Source Text")
47
 
48
- gr.Interface(qa_system, inputs=[input_file, openai_key, prompt, chain_type, k], outputs=[output_text, output_docs],
49
- title="Question Answering with PDF File and OpenAI",
50
  description="Upload a PDF file, enter your OpenAI API key, type a question prompt, select a chain type, and choose the number of relevant chunks to use for the answer.").launch(debug = True)
51
-
52
-
53
-
54
-
55
-
56
-
57
-
58
-
59
-
60
-
61
-
 
1
  import gradio as gr
2
+ import os
3
  from langchain.chains import RetrievalQA
4
  from langchain.llms import OpenAI
5
  from langchain.document_loaders import PyPDFLoader
 
7
  from langchain.embeddings import OpenAIEmbeddings
8
  from langchain.vectorstores import Chroma
9
 
10
+ def qa_system(pdf_files, openai_key, prompt, chain_type , k):
11
  os.environ["OPENAI_API_KEY"] = openai_key
12
+
13
+ texts = []
14
+
15
+ # load documents from PDF files
16
+ for pdf_file in pdf_files:
17
+ loader = PyPDFLoader(pdf_file.name)
18
+ documents = loader.load()
19
+
20
+ # split the documents into chunks
21
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
22
+ texts.extend(text_splitter.split_documents(documents))
23
+
24
+
25
  # select which embeddings we want to use
26
  embeddings = OpenAIEmbeddings()
27
+
28
  # create the vectorestore to use as the index
29
  db = Chroma.from_documents(texts, embeddings)
30
+
31
  # expose this index in a retriever interface
32
  retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": k})
33
+
34
+ # create a chain to answer questions
35
  qa = RetrievalQA.from_chain_type(
36
  llm=OpenAI(), chain_type=chain_type, retriever=retriever, return_source_documents=True)
37
+
38
  # get the result
39
  result = qa({"query": prompt})
40
+ return result['result'], ''.join(doc.page_content for doc in rest["source_documents"])
41
 
42
  # define the Gradio interface
43
+ input_file = gr.File(file_count="multiple",label="PDF File")
44
+ openai_key = gr.Textbox(label="OpenAI API Key", type="password")
45
+ prompt = gr.Textbox(label="Question Prompt")
46
+ chain_type = gr.Radio(['stuff', 'map_reduce', "refine", "map_rerank"], label="Chain Type",default = 'map_reduce')
47
+ k = gr.Slider(minimum=1, maximum=5, default=2, label="Number of Relevant Chunks")
48
 
49
+ output_text = gr.Textbox(label="Answer")
50
+ output_docs = gr.Textbox(label="Relevant Source Text")
51
 
52
+ gr.Interface(qa_system, inputs=[input_file, openai_key, prompt, chain_type, k], outputs=[output_text, output_docs],
53
+ title="DocuAI",
54
  description="Upload a PDF file, enter your OpenAI API key, type a question prompt, select a chain type, and choose the number of relevant chunks to use for the answer.").launch(debug = True)