AI-RESEARCHER-2024 commited on
Commit
1be0ed7
·
verified ·
1 Parent(s): a2118c4

Create main.py

Browse files
Files changed (1) hide show
  1. main.py +120 -0
main.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PyPDF2 import PdfReader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ import os
5
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
6
+ from langchain_community.vectorstores import FAISS
7
+ from langchain_google_genai import ChatGoogleGenerativeAI
8
+ from langchain.chains.question_answering import load_qa_chain
9
+ from langchain.prompts import PromptTemplate
10
+ #from dotenv import load_dotenv
11
+
12
+
13
+ #load_dotenv()
14
+ API_KEYS = [os.getenv("APIKEY1"), os.getenv("APIKEY2")]
15
+ current_key_index = -1
16
+
17
+
18
+
19
+
20
+ def switch_api_key():
21
+ global current_key_index
22
+ current_key_index = (current_key_index + 1) % len(API_KEYS)
23
+ return API_KEYS[current_key_index]
24
+
25
+
26
+
27
+
28
+ def get_pdf_text(pdf_docs):
29
+ text = ""
30
+ for pdf in pdf_docs:
31
+ pdf_reader = PdfReader(pdf)
32
+ for page in pdf_reader.pages:
33
+ text += page.extract_text()
34
+ return text
35
+
36
+
37
+
38
+
39
+ def get_text_chunks(text):
40
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
41
+ return text_splitter.split_text(text)
42
+
43
+
44
+
45
+
46
+ def get_vector_store(text_chunks):
47
+ api_key = switch_api_key()
48
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key)
49
+ vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
50
+ vector_store.save_local("faiss_index")
51
+
52
+
53
+
54
+
55
+ def get_conversational_chain():
56
+ api_key = switch_api_key()
57
+ prompt_template = """
58
+ You are a helpful assistant that only answers based on the context provided from the PDF documents.
59
+ Do not use any external knowledge or assumptions. If the answer is not found in the context below, reply with "I don't know."
60
+
61
+
62
+ Context:
63
+ {context}
64
+
65
+
66
+ Question:
67
+ {question}
68
+
69
+
70
+ Answer:
71
+ """
72
+ model = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0, google_api_key=api_key)
73
+ prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
74
+ chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
75
+ return chain
76
+
77
+
78
+
79
+
80
+ def user_input(user_question):
81
+ api_key = switch_api_key()
82
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key)
83
+ new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
84
+ docs = new_db.similarity_search(user_question)
85
+ chain = get_conversational_chain()
86
+
87
+
88
+ response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True)
89
+ st.write("Reply: ", response["output_text"])
90
+
91
+
92
+
93
+
94
+ # Streamlit application
95
+ def main():
96
+ st.set_page_config("Chat PDF")
97
+ st.header("CSC 121: Computers and Scientific Thinking (Chatbot)")
98
+ st.subheader("Ask a question ONLY from the CSC 121 textbook of Dr. Reed")
99
+
100
+
101
+ user_question = st.text_input("Ask a question")
102
+
103
+
104
+ if user_question:
105
+ user_input(user_question)
106
+
107
+
108
+ pdf_docs = st.file_uploader("Upload PDF files", accept_multiple_files=True)
109
+ if st.button("Submit & Process"):
110
+ with st.spinner("Processing..."):
111
+ raw_text = get_pdf_text(pdf_docs)
112
+ text_chunks = get_text_chunks(raw_text)
113
+ get_vector_store(text_chunks)
114
+ st.success("Done")
115
+
116
+
117
+
118
+
119
+ if __name__ == "__main__":
120
+ main()