Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,7 @@ import sentence_transformers
|
|
5 |
import streamlit as st
|
6 |
|
7 |
VECTOR_DB ="bbf2ef09-875b-4737-a793-499409a108b0"
|
|
|
8 |
|
9 |
IBM_API_KEY = os.getenv("IBM_API_KEY")
|
10 |
|
@@ -13,8 +14,8 @@ IBM_URL_CHAT = "https://us-south.ml.cloud.ibm.com/ml/v1/text/chat?version=2023-1
|
|
13 |
|
14 |
if "messages" not in st.session_state:
|
15 |
st.session_state.messages = []
|
16 |
-
if "
|
17 |
-
st.session_state.
|
18 |
|
19 |
# Load the banner image from the same directory
|
20 |
st.image("banner_policy.jpg", use_container_width=True)
|
@@ -47,8 +48,8 @@ def IBM_chat (messages):
|
|
47 |
"project_id": os.getenv("IBM_PROJECT_ID"),
|
48 |
"messages": messages,
|
49 |
"max_tokens": 10000,
|
50 |
-
"temperature": 0.
|
51 |
-
"time_limit":
|
52 |
}
|
53 |
headers = {
|
54 |
"Accept": "application/json",
|
@@ -80,13 +81,14 @@ if "client" not in st.session_state:
|
|
80 |
with st.spinner("⏳ Waking the wizard ..."):
|
81 |
IBM_token()
|
82 |
wml_credentials = get_credentials()
|
83 |
-
st.session_state.client = APIClient(credentials=wml_credentials, project_id=os.getenv("IBM_PROJECT_ID"))
|
84 |
-
vector_index_details = st.session_state.client.data_assets.get_details(VECTOR_DB)
|
85 |
-
st.session_state.vector_index_properties = vector_index_details["entity"]["vector_index"]
|
86 |
-
|
87 |
-
st.session_state.top_n = 20 if st.session_state.vector_index_properties["settings"].get("rerank") else int(st.session_state.vector_index_properties["settings"]["top_k"])
|
88 |
-
st.session_state.emb = SentenceTransformerEmbeddings('sentence-transformers/all-MiniLM-L6-v2')
|
89 |
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
|
91 |
def rerank( client, documents, query, top_n ):
|
92 |
from ibm_watsonx_ai.foundation_models import Rerank
|
@@ -121,11 +123,16 @@ import random
|
|
121 |
import string
|
122 |
|
123 |
def hydrate_chromadb():
|
124 |
-
data = st.session_state.client.data_assets.get_content(
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
|
|
|
|
|
|
|
|
|
|
129 |
chroma_client = chromadb.PersistentClient(path="./chroma_db")
|
130 |
|
131 |
# make sure collection is empty if it already existed
|
@@ -143,20 +150,23 @@ def hydrate_chromadb():
|
|
143 |
|
144 |
for vector in vectors:
|
145 |
vector_embeddings.append(vector["embedding"])
|
146 |
-
vector_documents.append(vector["content"]
|
147 |
-
|
148 |
-
|
|
|
149 |
clean_metadata = {}
|
150 |
-
clean_metadata["
|
151 |
-
clean_metadata["
|
152 |
-
clean_metadata["
|
153 |
-
clean_metadata["
|
154 |
-
clean_metadata["
|
|
|
155 |
vector_metadatas.append(clean_metadata)
|
156 |
-
|
|
|
157 |
random_string = ''.join(random.choices(string.ascii_uppercase + string.digits, k=10))
|
158 |
-
id = "{}:{}-{}-{}".format(asset_id, lines["from"], lines["to"], random_string)
|
159 |
-
vector_ids.append(
|
160 |
|
161 |
collection.add(
|
162 |
embeddings=vector_embeddings,
|
@@ -180,57 +190,65 @@ def proximity_search( question ):
|
|
180 |
|
181 |
documents = list(reversed(query_result["documents"][0]))
|
182 |
|
183 |
-
if st.session_state.vector_index_properties["settings"].get("rerank"):
|
184 |
-
|
185 |
|
186 |
return "\n".join(documents)
|
187 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
# Streamlit UI
|
189 |
st.title("🔍 Synergy Scroll")
|
190 |
st.subheader("AI-Powered Project & Policy Matching")
|
191 |
st.write("Explore the Lab Lab Library to find relevant past projects that align with your policy or new initiative.")
|
192 |
|
|
|
|
|
193 |
# Suggested search queries as buttons
|
194 |
col1, col2, col3 = st.columns(3)
|
195 |
|
196 |
with col1:
|
197 |
-
q = "Projects
|
198 |
if st.button(q):
|
199 |
-
|
200 |
|
201 |
with col2:
|
202 |
-
q = "
|
203 |
if st.button(q):
|
204 |
-
|
205 |
|
206 |
with col3:
|
207 |
-
q = "
|
208 |
if st.button(q):
|
209 |
-
|
210 |
|
211 |
# User input in Streamlit
|
212 |
-
user_input = st.text_input("Describe your policy or project to find relevant Lab Lab projects...",
|
213 |
-
if user_input:
|
214 |
-
st.session_state["user_input"] = user_input
|
215 |
|
216 |
# Display chat history
|
217 |
#for message in st.session_state.messages:
|
218 |
# with st.chat_message(message["role"]):
|
219 |
# st.markdown(message["content"])
|
220 |
|
221 |
-
if
|
222 |
-
|
223 |
-
# add the submissions as context (only in prompt, not in history)
|
224 |
-
grounding = proximity_search(st.session_state["user_input"])
|
225 |
-
prompt = st.session_state["user_input"] + ". For a project share the image as markdown and mention the url as well. The context for the question: " + grounding;
|
226 |
-
messages = st.session_state.messages.copy()
|
227 |
-
messages.append({"role": "user", "content": prompt})
|
228 |
-
st.session_state.messages.append({"role": "user", "content": st.session_state["user_input"]})
|
229 |
-
|
230 |
-
# Get response from IBM
|
231 |
-
with st.spinner("Thinking..."):
|
232 |
-
assistant_reply = IBM_chat(messages)
|
233 |
|
234 |
-
|
235 |
-
|
236 |
-
st.session_state.messages.append({"role": "assistant", "content": assistant_reply})
|
|
|
5 |
import streamlit as st
|
6 |
|
7 |
VECTOR_DB ="bbf2ef09-875b-4737-a793-499409a108b0"
|
8 |
+
JSON_DB = "f49e274a-b5c3-4573-81a2-32df8f96e97b"
|
9 |
|
10 |
IBM_API_KEY = os.getenv("IBM_API_KEY")
|
11 |
|
|
|
14 |
|
15 |
if "messages" not in st.session_state:
|
16 |
st.session_state.messages = []
|
17 |
+
if "query" not in st.session_state:
|
18 |
+
st.session_state.query = ""
|
19 |
|
20 |
# Load the banner image from the same directory
|
21 |
st.image("banner_policy.jpg", use_container_width=True)
|
|
|
48 |
"project_id": os.getenv("IBM_PROJECT_ID"),
|
49 |
"messages": messages,
|
50 |
"max_tokens": 10000,
|
51 |
+
"temperature": 0.7,
|
52 |
+
"time_limit": 30000
|
53 |
}
|
54 |
headers = {
|
55 |
"Accept": "application/json",
|
|
|
81 |
with st.spinner("⏳ Waking the wizard ..."):
|
82 |
IBM_token()
|
83 |
wml_credentials = get_credentials()
|
84 |
+
st.session_state.client = APIClient(credentials=wml_credentials, project_id=os.getenv("IBM_PROJECT_ID"))
|
|
|
|
|
|
|
|
|
|
|
85 |
|
86 |
+
#vector_index_details = st.session_state.client.data_assets.get_details(VECTOR_DB)
|
87 |
+
#st.session_state.vector_index_properties = vector_index_details["entity"]["vector_index"]
|
88 |
+
#st.session_state.top_n = 20 if st.session_state.vector_index_properties["settings"].get("rerank") else int(st.session_state.vector_index_properties["settings"]["top_k"])
|
89 |
+
|
90 |
+
st.session_state.emb = SentenceTransformerEmbeddings('sentence-transformers/all-MiniLM-L6-v2')
|
91 |
+
st.session_state.top_n = 10
|
92 |
|
93 |
def rerank( client, documents, query, top_n ):
|
94 |
from ibm_watsonx_ai.foundation_models import Rerank
|
|
|
123 |
import string
|
124 |
|
125 |
def hydrate_chromadb():
|
126 |
+
#data = st.session_state.client.data_assets.get_content(JSON_DB)
|
127 |
+
#stringified_vectors = str(content, "utf-8")
|
128 |
+
with open("lablab - json.txt", "r", encoding="utf-8") as f:
|
129 |
+
#with open("lablab.gzip", "rb") as f:
|
130 |
+
gz = f.read()
|
131 |
+
#content = gzip.decompress(gz)
|
132 |
+
#stringified_vectors = str(content, "utf-8")
|
133 |
+
|
134 |
+
vectors = json.loads(gz)
|
135 |
+
|
136 |
chroma_client = chromadb.PersistentClient(path="./chroma_db")
|
137 |
|
138 |
# make sure collection is empty if it already existed
|
|
|
150 |
|
151 |
for vector in vectors:
|
152 |
vector_embeddings.append(vector["embedding"])
|
153 |
+
vector_documents.append(vector["content"]
|
154 |
+
)
|
155 |
+
#metadata = vector["metadata"]
|
156 |
+
#lines = metadata["loc"]["lines"]
|
157 |
clean_metadata = {}
|
158 |
+
clean_metadata["source"] = "Lablab website"
|
159 |
+
#clean_metadata["asset_id"] = metadata["asset_id"]
|
160 |
+
#clean_metadata["asset_name"] = metadata["asset_name"]
|
161 |
+
#clean_metadata["url"] = metadata["url"]
|
162 |
+
#clean_metadata["from"] = lines["from"]
|
163 |
+
#clean_metadata["to"] = lines["to"]
|
164 |
vector_metadatas.append(clean_metadata)
|
165 |
+
|
166 |
+
#asset_id = vector["metadata"]["asset_id"]
|
167 |
random_string = ''.join(random.choices(string.ascii_uppercase + string.digits, k=10))
|
168 |
+
#id = "{}:{}-{}-{}".format(asset_id, lines["from"], lines["to"], random_string)
|
169 |
+
vector_ids.append(random_string)
|
170 |
|
171 |
collection.add(
|
172 |
embeddings=vector_embeddings,
|
|
|
190 |
|
191 |
documents = list(reversed(query_result["documents"][0]))
|
192 |
|
193 |
+
#if st.session_state.vector_index_properties["settings"].get("rerank"):
|
194 |
+
# documents = rerank(st.session_state.client, documents, question, 10) # st.session_state.vector_index_properties["settings"]["top_k"])
|
195 |
|
196 |
return "\n".join(documents)
|
197 |
|
198 |
+
def do_query(query):
|
199 |
+
# add the submissions as context (only in prompt, not in history)
|
200 |
+
grounding = proximity_search(query)
|
201 |
+
prompt = query + ". For a project share the image as markdown and mention the url as well. The context for the question: " + grounding;
|
202 |
+
#messages = st.session_state.messages.copy()
|
203 |
+
#messages.append({"role": "user", "content": prompt})
|
204 |
+
#st.session_state.messages.append({"role": "user", "content": query})
|
205 |
+
messages = [{"role": "user", "content": prompt}]
|
206 |
+
|
207 |
+
# Get response from IBM
|
208 |
+
with st.spinner("Thinking..."):
|
209 |
+
assistant_reply = IBM_chat(messages)
|
210 |
+
|
211 |
+
# Display assistant message
|
212 |
+
st.chat_message("assistant").markdown(assistant_reply)
|
213 |
+
#st.session_state.messages.append({"role": "assistant", "content": assistant_reply})
|
214 |
+
#st.session_state.query = query
|
215 |
+
|
216 |
+
|
217 |
# Streamlit UI
|
218 |
st.title("🔍 Synergy Scroll")
|
219 |
st.subheader("AI-Powered Project & Policy Matching")
|
220 |
st.write("Explore the Lab Lab Library to find relevant past projects that align with your policy or new initiative.")
|
221 |
|
222 |
+
query = ""
|
223 |
+
|
224 |
# Suggested search queries as buttons
|
225 |
col1, col2, col3 = st.columns(3)
|
226 |
|
227 |
with col1:
|
228 |
+
q = "Projects with a link with Solarpunk"
|
229 |
if st.button(q):
|
230 |
+
query = q
|
231 |
|
232 |
with col2:
|
233 |
+
q = "DEI aware projects"
|
234 |
if st.button(q):
|
235 |
+
query = q
|
236 |
|
237 |
with col3:
|
238 |
+
q = "Decentral projects"
|
239 |
if st.button(q):
|
240 |
+
query = q
|
241 |
|
242 |
# User input in Streamlit
|
243 |
+
user_input = st.text_input("Describe your policy or project to find relevant Lab Lab projects...", "")
|
|
|
|
|
244 |
|
245 |
# Display chat history
|
246 |
#for message in st.session_state.messages:
|
247 |
# with st.chat_message(message["role"]):
|
248 |
# st.markdown(message["content"])
|
249 |
|
250 |
+
if user_input:
|
251 |
+
do_query(user_input)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
252 |
|
253 |
+
if query:
|
254 |
+
do_query(query)
|
|