Spaces:

BaRiDo
/

IBMHackRAG

Sleeping

App Files Files Community

IBMHackRAG / app.py

BaRiDo

Update app.py

3bc66ce verified 2 months ago

raw

history blame

5.22 kB

	import os
	import getpass

	import streamlit as st

	def get_credentials():
	return {
	"url" : "https://us-south.ml.cloud.ibm.com",
	"apikey" : os.getenv("IBM_API_KEY")
	}

	model_id = "ibm/granite-3-8b-instruct"

	parameters = {
	"decoding_method": "greedy",
	"max_new_tokens": 900,
	"min_new_tokens": 0,
	"repetition_penalty": 1
	}

	project_id = os.getenv("IBM_PROJECT_ID")
	space_id = os.getenv("IBM_SPACE_ID")

	from ibm_watsonx_ai.foundation_models import ModelInference

	model = ModelInference(
	model_id = model_id,
	params = parameters,
	credentials = get_credentials(),
	project_id = project_id,
	# space_id = space_id
	)

	from ibm_watsonx_ai.client import APIClient

	wml_credentials = get_credentials()
	client = APIClient(credentials=wml_credentials, project_id=project_id) #, space_id=space_id)

	vector_index_id = "14c14504-5f45-4e6c-8f0f-25f2378a1d99"
	vector_index_details = client.data_assets.get_details(vector_index_id)
	vector_index_properties = vector_index_details["entity"]["vector_index"]

	top_n = 20 if vector_index_properties["settings"].get("rerank") else int(vector_index_properties["settings"]["top_k"])

	def rerank( client, documents, query, top_n ):
	from ibm_watsonx_ai.foundation_models import Rerank

	reranker = Rerank(
	model_id="cross-encoder/ms-marco-minilm-l-12-v2",
	api_client=client,
	params={
	"return_options": {
	"top_n": top_n
	},
	"truncate_input_tokens": 512
	}
	)

	reranked_results = reranker.generate(query=query, inputs=documents)["results"]

	new_documents = []

	for result in reranked_results:
	result_index = result["index"]
	new_documents.append(documents[result_index])

	return new_documents

	from ibm_watsonx_ai.foundation_models.embeddings.sentence_transformer_embeddings import SentenceTransformerEmbeddings

	emb = SentenceTransformerEmbeddings('sentence-transformers/all-MiniLM-L6-v2')

	import subprocess
	import gzip
	import json
	import chromadb
	import random
	import string

	def hydrate_chromadb():
	data = client.data_assets.get_content(vector_index_id)
	content = gzip.decompress(data)
	stringified_vectors = str(content, "utf-8")
	vectors = json.loads(stringified_vectors)

	chroma_client = chromadb.Client()

	# make sure collection is empty if it already existed
	collection_name = "my_collection"
	try:
	collection = chroma_client.delete_collection(name=collection_name)
	except:
	print("Collection didn't exist - nothing to do.")
	collection = chroma_client.create_collection(name=collection_name)

	vector_embeddings = []
	vector_documents = []
	vector_metadatas = []
	vector_ids = []

	for vector in vectors:
	vector_embeddings.append(vector["embedding"])
	vector_documents.append(vector["content"])
	metadata = vector["metadata"]
	lines = metadata["loc"]["lines"]
	clean_metadata = {}
	clean_metadata["asset_id"] = metadata["asset_id"]
	clean_metadata["asset_name"] = metadata["asset_name"]
	clean_metadata["url"] = metadata["url"]
	clean_metadata["from"] = lines["from"]
	clean_metadata["to"] = lines["to"]
	vector_metadatas.append(clean_metadata)
	asset_id = vector["metadata"]["asset_id"]
	random_string = ''.join(random.choices(string.ascii_uppercase + string.digits, k=10))
	id = "{}:{}-{}-{}".format(asset_id, lines["from"], lines["to"], random_string)
	vector_ids.append(id)

	collection.add(
	embeddings=vector_embeddings,
	documents=vector_documents,
	metadatas=vector_metadatas,
	ids=vector_ids
	)
	return collection

	chroma_collection = hydrate_chromadb()

	def proximity_search( question ):
	query_vectors = emb.embed_query(question)
	query_result = chroma_collection.query(
	query_embeddings=query_vectors,
	n_results=top_n,
	include=["documents", "metadatas", "distances"]
	)

	documents = list(reversed(query_result["documents"][0]))

	if vector_index_properties["settings"].get("rerank"):
	documents = rerank(client, documents, question, vector_index_properties["settings"]["top_k"])

	return "\n".join(documents)

	# Streamlit UI
	st.title("🔍 IBM Watson RAG Chatbot")

	# User input in Streamlit
	question = st.text_input("Enter your question:")

	if question:
	# Retrieve relevant grounding context
	grounding = proximity_search(question)

	# Format the question with retrieved context
	formatted_question = f"""<\|start_of_role\|>user<\|end_of_role\|>Use the following pieces of context to answer the question.

	{grounding}

	Question: {question}<\|end_of_text\|>
	<\|start_of_role\|>assistant<\|end_of_role\|>"""

	# Placeholder for a prompt input (Optional)
	prompt_input = "" # Set this dynamically if needed
	prompt = f"""{prompt_input}{formatted_question}"""

	# Simulated AI response (Replace with actual model call)
	generated_response = f"AI Response based on: {prompt}"

	# Display results
	st.subheader("📌 Retrieved Context")
	st.write(grounding)

	st.subheader("🤖 AI Response")
	st.write(generated_response)