import os import requests import sentence_transformers import streamlit as st VECTOR_DB ="bbf2ef09-875b-4737-a793-499409a108b0" JSON_DB = "f49e274a-b5c3-4573-81a2-32df8f96e97b" IBM_API_KEY = os.getenv("IBM_API_KEY") IBM_URL_TOKEN = "https://iam.cloud.ibm.com/identity/token" IBM_URL_CHAT = "https://us-south.ml.cloud.ibm.com/ml/v1/text/chat?version=2023-10-25" if "messages" not in st.session_state: st.session_state.messages = [] if "query" not in st.session_state: st.session_state.query = "" if "extended_query" not in st.session_state: st.session_state.extended_query = "" ############################################## ## ## IBM API ## ############################################## def IBM_token(): # Define the headers headers = { "Content-Type": "application/x-www-form-urlencoded" } # Define the data payload data = { "grant_type": "urn:ibm:params:oauth:grant-type:apikey", "apikey": IBM_API_KEY } # Make the POST request response = requests.post(IBM_URL_TOKEN, headers=headers, data=data) st.session_state.IBM_ACCESS_TOKEN = response.json().get("access_token", "") def IBM_chat (messages, temperature = 0.7): body = { "model_id": "ibm/granite-3-8b-instruct", "project_id": os.getenv("IBM_PROJECT_ID"), "messages": messages, "max_tokens": 10000, "temperature": temperature, "time_limit": 40000 } headers = { "Accept": "application/json", "Content-Type": "application/json", "Authorization": "Bearer " + st.session_state.IBM_ACCESS_TOKEN } response = requests.post( IBM_URL_CHAT, headers=headers, json=body ) if response.status_code != 200: raise Exception("Non-200 response: " + str(response.text)) response = response.json() return response["choices"][0]["message"]["content"] def IBM_query (prompt, temperature = 0.7): messages = [{"role": "user", "content": prompt}] return IBM_chat(messages, temperature) def get_credentials(): return { "url" : "https://us-south.ml.cloud.ibm.com", "apikey" : os.getenv("IBM_API_KEY") } ############################################## ## ## Vector DB ## ############################################## from ibm_watsonx_ai.client import APIClient from ibm_watsonx_ai.foundation_models.embeddings.sentence_transformer_embeddings import SentenceTransformerEmbeddings def rerank( client, documents, query, top_n ): from ibm_watsonx_ai.foundation_models import Rerank reranker = Rerank( model_id="cross-encoder/ms-marco-minilm-l-12-v2", api_client=client, params={ "return_options": { "top_n": top_n }, "truncate_input_tokens": 512 } ) reranked_results = reranker.generate(query=query, inputs=documents)["results"] new_documents = [] for result in reranked_results: result_index = result["index"] new_documents.append(documents[result_index]) return new_documents import subprocess import gzip import json import chromadb import random import string def hydrate_chromadb(): #data = st.session_state.client.data_assets.get_content(JSON_DB) #stringified_vectors = str(content, "utf-8") with open("lablab - json.txt", "r", encoding="utf-8") as f: #with open("lablab.gzip", "rb") as f: gz = f.read() #content = gzip.decompress(gz) #stringified_vectors = str(content, "utf-8") vectors = json.loads(gz) chroma_client = chromadb.PersistentClient(path="./chroma_db") # make sure collection is empty if it already existed collection_name = "my_collection" try: collection = chroma_client.delete_collection(name=collection_name) except: print("Collection didn't exist - nothing to do.") collection = chroma_client.create_collection(name=collection_name) vector_embeddings = [] vector_documents = [] vector_metadatas = [] vector_ids = [] for vector in vectors: vector_embeddings.append(vector["embedding"]) vector_documents.append(vector["content"] ) #metadata = vector["metadata"] #lines = metadata["loc"]["lines"] clean_metadata = {} clean_metadata["source"] = "Lablab website" #clean_metadata["asset_id"] = metadata["asset_id"] #clean_metadata["asset_name"] = metadata["asset_name"] #clean_metadata["url"] = metadata["url"] #clean_metadata["from"] = lines["from"] #clean_metadata["to"] = lines["to"] vector_metadatas.append(clean_metadata) #asset_id = vector["metadata"]["asset_id"] random_string = ''.join(random.choices(string.ascii_uppercase + string.digits, k=10)) #id = "{}:{}-{}-{}".format(asset_id, lines["from"], lines["to"], random_string) vector_ids.append(random_string) collection.add( embeddings=vector_embeddings, documents=vector_documents, metadatas=vector_metadatas, ids=vector_ids ) return collection def proximity_search( question ): query_vectors = st.session_state.emb.embed_query(question) query_result = st.session_state.chroma_collection.query( query_embeddings=query_vectors, n_results=st.session_state.top_n, include=["documents", "metadatas", "distances"] ) documents = list(reversed(query_result["documents"][0])) #if st.session_state.vector_index_properties["settings"].get("rerank"): # documents = rerank(st.session_state.client, documents, question, 10) # st.session_state.vector_index_properties["settings"]["top_k"]) return "\n".join(documents) def do_query(query): # add the submissions as context (only in prompt, not in history) grounding = proximity_search(query) prompt = query + ". For a project share the image as markdown and mention the url as well. The context for the question: " + grounding; #messages = st.session_state.messages.copy() #messages.append({"role": "user", "content": prompt}) #st.session_state.messages.append({"role": "user", "content": query}) messages = [{"role": "user", "content": prompt}] # Get response from IBM with st.spinner("Thinking..."): assistant_reply = IBM_chat(messages, 0) ## no creativity here, just searching # Display assistant message st.chat_message("assistant").markdown(assistant_reply) #st.session_state.messages.append({"role": "assistant", "content": assistant_reply}) #st.session_state.query = query ############################ ## ## UI ## ############################ # Load the banner image from the same directory st.image("banner_policy.jpg", use_container_width=True) # set up sidebar st.sidebar.title("🧙 Synergy Scrolling") st.sidebar.write( "Synergy Scrolling analyzes policies and finds relevant past projects. " "This tool helps match your policy or business idea with projects from " "previous LabLab hackathons." ) ################ INIT if "client" not in st.session_state: with st.spinner("⏳ Waking the wizard ..."): IBM_token() wml_credentials = get_credentials() st.session_state.client = APIClient(credentials=wml_credentials, project_id=os.getenv("IBM_PROJECT_ID")) #vector_index_details = st.session_state.client.data_assets.get_details(VECTOR_DB) #st.session_state.vector_index_properties = vector_index_details["entity"]["vector_index"] #st.session_state.top_n = 20 if st.session_state.vector_index_properties["settings"].get("rerank") else int(st.session_state.vector_index_properties["settings"]["top_k"]) st.session_state.emb = SentenceTransformerEmbeddings('sentence-transformers/all-MiniLM-L6-v2') st.session_state.top_n = 10 if "chroma_collection" not in st.session_state: with st.spinner("⏳ Dusting off the scroll books ..."): st.session_state.chroma_collection = hydrate_chromadb() query = "" ################ main UI st.title("🔮 Policy Scroll") st.subheader("AI-Powered Project & Policy Matching") st.write("Explore the Lab Lab Library to find relevant past projects that align with your policy or new initiative.") ################ sidebar UI policy_input = st.sidebar.text_area("📝 Enter Your Policy or Business Idea:") if st.sidebar.button("🔗 Analyze with IBM Granite"): if policy_input.strip(): prompt = f"Define search criteria for projects to implement: {policy_input}" # Get response from IBM with st.spinner("Analyzing..."): result = IBM_query(prompt, 0.7) st.session_state["extended_query"] = "Find 3 projects that best match and explain why, with these criteria: " + result else: st.sidebar.warning("Please enter a policy or business idea first!") # Display AI result in another textarea st.sidebar.text_area("💡 Extended query:", value=st.session_state.get("extended_query", ""), height=150) if st.sidebar.button("🔍 Search for synergy"): query = st.session_state.get("extended_query", "") # Suggested search queries as buttons col1, col2, col3 = st.columns(3) with col1: q = "Projects with a link with Solarpunk" if st.button(q): query = q with col2: q = "DEI aware projects" if st.button(q): query = q with col3: q = "Decentral projects" if st.button(q): query = q # User input in Streamlit user_input = st.text_input("Describe your policy or project to find relevant Lab Lab projects...", "") # Display chat history #for message in st.session_state.messages: # with st.chat_message(message["role"]): # st.markdown(message["content"]) if user_input: do_query(user_input) if query: do_query(query)