NomiDecent commited on
Commit
ada8437
·
verified ·
1 Parent(s): 3608487

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +155 -0
app.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Adapted from https://docs.streamlit.io/knowledge-base/tutorials/build-conversational-apps#build-a-simple-chatbot-gui-with-streaming
2
+ import os
3
+
4
+ import base64
5
+ import gc
6
+ import random
7
+ import tempfile
8
+ import time
9
+ import uuid
10
+
11
+ from IPython.display import Markdown, display
12
+
13
+ import streamlit as st
14
+
15
+ import torch
16
+ import time
17
+ import numpy as np
18
+ from tqdm import tqdm
19
+ from pdf2image import convert_from_path
20
+
21
+ from rag_code import EmbedData, QdrantVDB_QB, Retriever, RAG
22
+
23
+ collection_name = "multimodal_rag_with_deepseek-new"
24
+
25
+ if "id" not in st.session_state:
26
+ st.session_state.id = uuid.uuid4()
27
+ st.session_state.file_cache = {}
28
+
29
+ session_id = st.session_state.id
30
+
31
+ def reset_chat():
32
+ st.session_state.messages = []
33
+ st.session_state.context = None
34
+ gc.collect()
35
+
36
+
37
+ def display_pdf(file):
38
+ # Opening file from file path
39
+
40
+ st.markdown("### PDF Preview")
41
+ base64_pdf = base64.b64encode(file.read()).decode("utf-8")
42
+
43
+ # Embedding PDF in HTML
44
+ pdf_display = f"""<iframe src="data:application/pdf;base64,{base64_pdf}" width="400" height="100%" type="application/pdf"
45
+ style="height:100vh; width:100%"
46
+ >
47
+ </iframe>"""
48
+
49
+ # Displaying File
50
+ st.markdown(pdf_display, unsafe_allow_html=True)
51
+
52
+
53
+ with st.sidebar:
54
+ st.header(f"Add your documents!")
55
+
56
+ uploaded_file = st.file_uploader("Choose your `.pdf` file", type="pdf")
57
+
58
+ if uploaded_file:
59
+ try:
60
+ with tempfile.TemporaryDirectory() as temp_dir:
61
+ file_path = os.path.join(temp_dir, uploaded_file.name)
62
+
63
+ with open(file_path, "wb") as f:
64
+ f.write(uploaded_file.getvalue())
65
+
66
+ file_key = f"{session_id}-{uploaded_file.name}"
67
+ st.write("Indexing your document...")
68
+
69
+ if file_key not in st.session_state.get('file_cache', {}):
70
+
71
+ # Store Pdf with convert_from_path function
72
+ images = convert_from_path(file_path)
73
+
74
+ for i in range(len(images)):
75
+
76
+ # Save pages as images in the pdf
77
+ images[i].save('./images/page'+ str(i) +'.jpg', 'JPEG')
78
+
79
+ # embed data
80
+ embeddata = EmbedData()
81
+ embeddata.embed(images)
82
+
83
+ # set up vector database
84
+ qdrant_vdb = QdrantVDB_QB(collection_name=collection_name,
85
+ vector_dim=128)
86
+ qdrant_vdb.define_client()
87
+ qdrant_vdb.create_collection()
88
+ qdrant_vdb.ingest_data(embeddata=embeddata)
89
+
90
+ # set up retriever
91
+ retriever = Retriever(vector_db=qdrant_vdb, embeddata=embeddata)
92
+
93
+ # set up rag
94
+ query_engine = RAG(retriever=retriever)
95
+
96
+ st.session_state.file_cache[file_key] = query_engine
97
+ else:
98
+ query_engine = st.session_state.file_cache[file_key]
99
+
100
+ # Inform the user that the file is processed and Display the PDF uploaded
101
+ st.success("Ready to Chat!")
102
+ display_pdf(uploaded_file)
103
+ except Exception as e:
104
+ st.error(f"An error occurred: {e}")
105
+ st.stop()
106
+
107
+ col1, col2 = st.columns([6, 1])
108
+
109
+ with col1:
110
+ # st.header("""
111
+ # # Agentic RAG powered by <img src="data:image/png;base64,{}" width="170" style="vertical-align: -3px;">
112
+ # """.format(base64.b64encode(open("assets/deep-seek.png", "rb").read()).decode()))
113
+ st.markdown("""
114
+ # Multimodal RAG powered by <img src="data:image/png;base64,{}" width="170" style="vertical-align: -3px;"> Janus""".format(base64.b64encode(open("assets/deep-seek.png", "rb").read()).decode()), unsafe_allow_html=True)
115
+
116
+
117
+ with col2:
118
+ st.button("Clear ↺", on_click=reset_chat)
119
+
120
+ # Initialize chat history
121
+ if "messages" not in st.session_state:
122
+ reset_chat()
123
+
124
+
125
+ # Display chat messages from history on app rerun
126
+ for message in st.session_state.messages:
127
+ with st.chat_message(message["role"]):
128
+ st.markdown(message["content"])
129
+
130
+
131
+ # Accept user input
132
+ if prompt := st.chat_input("What's up?"):
133
+ # Add user message to chat history
134
+ st.session_state.messages.append({"role": "user", "content": prompt})
135
+ # Display user message in chat message container
136
+ with st.chat_message("user"):
137
+ st.markdown(prompt)
138
+
139
+ # Display assistant response in chat message container
140
+ with st.chat_message("assistant"):
141
+
142
+ message_placeholder = st.empty()
143
+ full_response = ""
144
+
145
+ streaming_response = query_engine.query(prompt)
146
+
147
+ for chunk in streaming_response:
148
+ full_response += chunk
149
+ message_placeholder.markdown(full_response + "▌")
150
+
151
+ time.sleep(0.01)
152
+ message_placeholder.markdown(full_response)
153
+
154
+ # Add assistant response to chat history
155
+ st.session_state.messages.append({"role": "assistant", "content": full_response})