BaRiDo commited on
Commit
0527179
·
verified ·
1 Parent(s): ac88d86

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -52
app.py CHANGED
@@ -5,6 +5,7 @@ import sentence_transformers
5
  import streamlit as st
6
 
7
  VECTOR_DB ="bbf2ef09-875b-4737-a793-499409a108b0"
 
8
 
9
  IBM_API_KEY = os.getenv("IBM_API_KEY")
10
 
@@ -13,8 +14,8 @@ IBM_URL_CHAT = "https://us-south.ml.cloud.ibm.com/ml/v1/text/chat?version=2023-1
13
 
14
  if "messages" not in st.session_state:
15
  st.session_state.messages = []
16
- if "user_input" not in st.session_state:
17
- st.session_state.user_input = ""
18
 
19
  # Load the banner image from the same directory
20
  st.image("banner_policy.jpg", use_container_width=True)
@@ -47,8 +48,8 @@ def IBM_chat (messages):
47
  "project_id": os.getenv("IBM_PROJECT_ID"),
48
  "messages": messages,
49
  "max_tokens": 10000,
50
- "temperature": 0.3,
51
- "time_limit": 20000
52
  }
53
  headers = {
54
  "Accept": "application/json",
@@ -80,13 +81,14 @@ if "client" not in st.session_state:
80
  with st.spinner("⏳ Waking the wizard ..."):
81
  IBM_token()
82
  wml_credentials = get_credentials()
83
- st.session_state.client = APIClient(credentials=wml_credentials, project_id=os.getenv("IBM_PROJECT_ID"))
84
- vector_index_details = st.session_state.client.data_assets.get_details(VECTOR_DB)
85
- st.session_state.vector_index_properties = vector_index_details["entity"]["vector_index"]
86
-
87
- st.session_state.top_n = 20 if st.session_state.vector_index_properties["settings"].get("rerank") else int(st.session_state.vector_index_properties["settings"]["top_k"])
88
- st.session_state.emb = SentenceTransformerEmbeddings('sentence-transformers/all-MiniLM-L6-v2')
89
 
 
 
 
 
 
 
90
 
91
  def rerank( client, documents, query, top_n ):
92
  from ibm_watsonx_ai.foundation_models import Rerank
@@ -121,11 +123,16 @@ import random
121
  import string
122
 
123
  def hydrate_chromadb():
124
- data = st.session_state.client.data_assets.get_content(VECTOR_DB)
125
- content = gzip.decompress(data)
126
- stringified_vectors = str(content, "utf-8")
127
- vectors = json.loads(stringified_vectors)
128
-
 
 
 
 
 
129
  chroma_client = chromadb.PersistentClient(path="./chroma_db")
130
 
131
  # make sure collection is empty if it already existed
@@ -143,20 +150,23 @@ def hydrate_chromadb():
143
 
144
  for vector in vectors:
145
  vector_embeddings.append(vector["embedding"])
146
- vector_documents.append(vector["content"])
147
- metadata = vector["metadata"]
148
- lines = metadata["loc"]["lines"]
 
149
  clean_metadata = {}
150
- clean_metadata["asset_id"] = metadata["asset_id"]
151
- clean_metadata["asset_name"] = metadata["asset_name"]
152
- clean_metadata["url"] = metadata["url"]
153
- clean_metadata["from"] = lines["from"]
154
- clean_metadata["to"] = lines["to"]
 
155
  vector_metadatas.append(clean_metadata)
156
- asset_id = vector["metadata"]["asset_id"]
 
157
  random_string = ''.join(random.choices(string.ascii_uppercase + string.digits, k=10))
158
- id = "{}:{}-{}-{}".format(asset_id, lines["from"], lines["to"], random_string)
159
- vector_ids.append(id)
160
 
161
  collection.add(
162
  embeddings=vector_embeddings,
@@ -180,57 +190,65 @@ def proximity_search( question ):
180
 
181
  documents = list(reversed(query_result["documents"][0]))
182
 
183
- if st.session_state.vector_index_properties["settings"].get("rerank"):
184
- documents = rerank(st.session_state.client, documents, question, st.session_state.vector_index_properties["settings"]["top_k"])
185
 
186
  return "\n".join(documents)
187
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  # Streamlit UI
189
  st.title("🔍 Synergy Scroll")
190
  st.subheader("AI-Powered Project & Policy Matching")
191
  st.write("Explore the Lab Lab Library to find relevant past projects that align with your policy or new initiative.")
192
 
 
 
193
  # Suggested search queries as buttons
194
  col1, col2, col3 = st.columns(3)
195
 
196
  with col1:
197
- q = "Projects to link with Solarpunk"
198
  if st.button(q):
199
- st.session_state["user_input"] = q
200
 
201
  with col2:
202
- q = "Projects to implement DEI"
203
  if st.button(q):
204
- st.session_state["user_input"] = q
205
 
206
  with col3:
207
- q = "Projects with decentral focus"
208
  if st.button(q):
209
- st.session_state["user_input"] = q
210
 
211
  # User input in Streamlit
212
- user_input = st.text_input("Describe your policy or project to find relevant Lab Lab projects...", value=st.session_state.user_input, key="text_input")
213
- if user_input:
214
- st.session_state["user_input"] = user_input
215
 
216
  # Display chat history
217
  #for message in st.session_state.messages:
218
  # with st.chat_message(message["role"]):
219
  # st.markdown(message["content"])
220
 
221
- if st.session_state["user_input"]:
222
-
223
- # add the submissions as context (only in prompt, not in history)
224
- grounding = proximity_search(st.session_state["user_input"])
225
- prompt = st.session_state["user_input"] + ". For a project share the image as markdown and mention the url as well. The context for the question: " + grounding;
226
- messages = st.session_state.messages.copy()
227
- messages.append({"role": "user", "content": prompt})
228
- st.session_state.messages.append({"role": "user", "content": st.session_state["user_input"]})
229
-
230
- # Get response from IBM
231
- with st.spinner("Thinking..."):
232
- assistant_reply = IBM_chat(messages)
233
 
234
- # Display assistant message
235
- st.chat_message("assistant").markdown(assistant_reply)
236
- st.session_state.messages.append({"role": "assistant", "content": assistant_reply})
 
5
  import streamlit as st
6
 
7
  VECTOR_DB ="bbf2ef09-875b-4737-a793-499409a108b0"
8
+ JSON_DB = "f49e274a-b5c3-4573-81a2-32df8f96e97b"
9
 
10
  IBM_API_KEY = os.getenv("IBM_API_KEY")
11
 
 
14
 
15
  if "messages" not in st.session_state:
16
  st.session_state.messages = []
17
+ if "query" not in st.session_state:
18
+ st.session_state.query = ""
19
 
20
  # Load the banner image from the same directory
21
  st.image("banner_policy.jpg", use_container_width=True)
 
48
  "project_id": os.getenv("IBM_PROJECT_ID"),
49
  "messages": messages,
50
  "max_tokens": 10000,
51
+ "temperature": 0.7,
52
+ "time_limit": 30000
53
  }
54
  headers = {
55
  "Accept": "application/json",
 
81
  with st.spinner("⏳ Waking the wizard ..."):
82
  IBM_token()
83
  wml_credentials = get_credentials()
84
+ st.session_state.client = APIClient(credentials=wml_credentials, project_id=os.getenv("IBM_PROJECT_ID"))
 
 
 
 
 
85
 
86
+ #vector_index_details = st.session_state.client.data_assets.get_details(VECTOR_DB)
87
+ #st.session_state.vector_index_properties = vector_index_details["entity"]["vector_index"]
88
+ #st.session_state.top_n = 20 if st.session_state.vector_index_properties["settings"].get("rerank") else int(st.session_state.vector_index_properties["settings"]["top_k"])
89
+
90
+ st.session_state.emb = SentenceTransformerEmbeddings('sentence-transformers/all-MiniLM-L6-v2')
91
+ st.session_state.top_n = 10
92
 
93
  def rerank( client, documents, query, top_n ):
94
  from ibm_watsonx_ai.foundation_models import Rerank
 
123
  import string
124
 
125
  def hydrate_chromadb():
126
+ #data = st.session_state.client.data_assets.get_content(JSON_DB)
127
+ #stringified_vectors = str(content, "utf-8")
128
+ with open("lablab - json.txt", "r", encoding="utf-8") as f:
129
+ #with open("lablab.gzip", "rb") as f:
130
+ gz = f.read()
131
+ #content = gzip.decompress(gz)
132
+ #stringified_vectors = str(content, "utf-8")
133
+
134
+ vectors = json.loads(gz)
135
+
136
  chroma_client = chromadb.PersistentClient(path="./chroma_db")
137
 
138
  # make sure collection is empty if it already existed
 
150
 
151
  for vector in vectors:
152
  vector_embeddings.append(vector["embedding"])
153
+ vector_documents.append(vector["content"]
154
+ )
155
+ #metadata = vector["metadata"]
156
+ #lines = metadata["loc"]["lines"]
157
  clean_metadata = {}
158
+ clean_metadata["source"] = "Lablab website"
159
+ #clean_metadata["asset_id"] = metadata["asset_id"]
160
+ #clean_metadata["asset_name"] = metadata["asset_name"]
161
+ #clean_metadata["url"] = metadata["url"]
162
+ #clean_metadata["from"] = lines["from"]
163
+ #clean_metadata["to"] = lines["to"]
164
  vector_metadatas.append(clean_metadata)
165
+
166
+ #asset_id = vector["metadata"]["asset_id"]
167
  random_string = ''.join(random.choices(string.ascii_uppercase + string.digits, k=10))
168
+ #id = "{}:{}-{}-{}".format(asset_id, lines["from"], lines["to"], random_string)
169
+ vector_ids.append(random_string)
170
 
171
  collection.add(
172
  embeddings=vector_embeddings,
 
190
 
191
  documents = list(reversed(query_result["documents"][0]))
192
 
193
+ #if st.session_state.vector_index_properties["settings"].get("rerank"):
194
+ # documents = rerank(st.session_state.client, documents, question, 10) # st.session_state.vector_index_properties["settings"]["top_k"])
195
 
196
  return "\n".join(documents)
197
 
198
+ def do_query(query):
199
+ # add the submissions as context (only in prompt, not in history)
200
+ grounding = proximity_search(query)
201
+ prompt = query + ". For a project share the image as markdown and mention the url as well. The context for the question: " + grounding;
202
+ #messages = st.session_state.messages.copy()
203
+ #messages.append({"role": "user", "content": prompt})
204
+ #st.session_state.messages.append({"role": "user", "content": query})
205
+ messages = [{"role": "user", "content": prompt}]
206
+
207
+ # Get response from IBM
208
+ with st.spinner("Thinking..."):
209
+ assistant_reply = IBM_chat(messages)
210
+
211
+ # Display assistant message
212
+ st.chat_message("assistant").markdown(assistant_reply)
213
+ #st.session_state.messages.append({"role": "assistant", "content": assistant_reply})
214
+ #st.session_state.query = query
215
+
216
+
217
  # Streamlit UI
218
  st.title("🔍 Synergy Scroll")
219
  st.subheader("AI-Powered Project & Policy Matching")
220
  st.write("Explore the Lab Lab Library to find relevant past projects that align with your policy or new initiative.")
221
 
222
+ query = ""
223
+
224
  # Suggested search queries as buttons
225
  col1, col2, col3 = st.columns(3)
226
 
227
  with col1:
228
+ q = "Projects with a link with Solarpunk"
229
  if st.button(q):
230
+ query = q
231
 
232
  with col2:
233
+ q = "DEI aware projects"
234
  if st.button(q):
235
+ query = q
236
 
237
  with col3:
238
+ q = "Decentral projects"
239
  if st.button(q):
240
+ query = q
241
 
242
  # User input in Streamlit
243
+ user_input = st.text_input("Describe your policy or project to find relevant Lab Lab projects...", "")
 
 
244
 
245
  # Display chat history
246
  #for message in st.session_state.messages:
247
  # with st.chat_message(message["role"]):
248
  # st.markdown(message["content"])
249
 
250
+ if user_input:
251
+ do_query(user_input)
 
 
 
 
 
 
 
 
 
 
252
 
253
+ if query:
254
+ do_query(query)