Manasa1 commited on
Commit
4e886c9
·
verified ·
1 Parent(s): e7eb78a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -32
app.py CHANGED
@@ -9,24 +9,17 @@ from langchain.memory import ConversationBufferMemory
9
  from langchain.chains import ConversationalRetrievalChain
10
  import os
11
  import nltk
12
-
13
- # Load environment variables
14
- load_dotenv()
15
-
16
- # Download necessary NLTK data
17
- nltk.download('punkt')
18
- nltk.download('averaged_perceptron_tagger')
19
-
20
  # Install Poppler and Tesseract in the runtime environment
21
  os.system("apt-get update && apt-get install -y poppler-utils tesseract-ocr")
22
 
23
- # Retrieve API key
24
  secret = os.getenv('GROQ_API_KEY')
25
 
26
- # Get the working directory
27
  working_dir = os.path.dirname(os.path.abspath(__file__))
28
 
29
  def load_documents(file_path):
 
30
  loader = UnstructuredPDFLoader(
31
  file_path,
32
  poppler_path="/usr/bin",
@@ -38,7 +31,7 @@ def load_documents(file_path):
38
  def setup_vectorstore(documents):
39
  embeddings = HuggingFaceEmbeddings()
40
  text_splitter = CharacterTextSplitter(
41
- separator="\n",
42
  chunk_size=1000,
43
  chunk_overlap=200
44
  )
@@ -54,6 +47,8 @@ def create_chain(vectorstores):
54
  )
55
  retriever = vectorstores.as_retriever()
56
  memory = ConversationBufferMemory(
 
 
57
  memory_key="chat_history",
58
  return_messages=True
59
  )
@@ -68,7 +63,7 @@ def create_chain(vectorstores):
68
  # Streamlit page configuration
69
  st.set_page_config(
70
  page_title="Chat with your documents",
71
- page_icon="📁",
72
  layout="centered"
73
  )
74
 
@@ -91,23 +86,21 @@ if uploaded_file:
91
  if "conversation_chain" not in st.session_state:
92
  st.session_state.conversation_chain = create_chain(st.session_state.vectorstores)
93
 
94
- # Display chat history
95
- for message in st.session_state.chat_history:
96
- with st.chat_message(message["role"]):
97
- st.markdown(message["content"])
98
-
99
- # User input handling
100
- user_input = st.chat_input("Ask any questions relevant to uploaded pdf")
101
-
102
- if user_input:
103
- st.session_state.chat_history.append({"role": "user", "content": user_input})
104
- with st.chat_message("user"):
105
- st.markdown(user_input)
106
-
107
- with st.chat_message("assistant"):
108
- response = st.session_state.conversation_chain({"question": user_input})
109
- assistant_response = response["answer"]
110
- st.markdown(assistant_response)
111
- st.session_state.chat_history.append({"role": "assistant", "content": assistant_response})
112
- else:
113
- st.info("Please upload a PDF to start the conversation.")
 
9
  from langchain.chains import ConversationalRetrievalChain
10
  import os
11
  import nltk
12
+ nltk.download('punkt_tab')
13
+ nltk.download('averaged_perceptron_tagger_eng')
 
 
 
 
 
 
14
  # Install Poppler and Tesseract in the runtime environment
15
  os.system("apt-get update && apt-get install -y poppler-utils tesseract-ocr")
16
 
 
17
  secret = os.getenv('GROQ_API_KEY')
18
 
 
19
  working_dir = os.path.dirname(os.path.abspath(__file__))
20
 
21
  def load_documents(file_path):
22
+ # Specify poppler_path and tesseract_path to ensure compatibility
23
  loader = UnstructuredPDFLoader(
24
  file_path,
25
  poppler_path="/usr/bin",
 
31
  def setup_vectorstore(documents):
32
  embeddings = HuggingFaceEmbeddings()
33
  text_splitter = CharacterTextSplitter(
34
+ separator="/n",
35
  chunk_size=1000,
36
  chunk_overlap=200
37
  )
 
47
  )
48
  retriever = vectorstores.as_retriever()
49
  memory = ConversationBufferMemory(
50
+ llm=llm,
51
+ output_key="answer",
52
  memory_key="chat_history",
53
  return_messages=True
54
  )
 
63
  # Streamlit page configuration
64
  st.set_page_config(
65
  page_title="Chat with your documents",
66
+ page_icon="📑",
67
  layout="centered"
68
  )
69
 
 
86
  if "conversation_chain" not in st.session_state:
87
  st.session_state.conversation_chain = create_chain(st.session_state.vectorstores)
88
 
89
+ # Display chat history
90
+ for message in st.session_state.chat_history:
91
+ with st.chat_message(message["role"]):
92
+ st.markdown(message["content"])
93
+
94
+ # User input handling
95
+ user_input = st.chat_input("Ask any questions relevant to uploaded pdf")
96
+
97
+ if user_input:
98
+ st.session_state.chat_history.append({"role": "user", "content": user_input})
99
+ with st.chat_message("user"):
100
+ st.markdown(user_input)
101
+
102
+ with st.chat_message("assistant"):
103
+ response = st.session_state.conversation_chain({"question": user_input})
104
+ assistant_response = response["answer"]
105
+ st.markdown(assistant_response)
106
+ st.session_state.chat_history.append({"role": "assistant", "content": assistant_response})