Spaces:

BaRiDo
/

IBMHackRAG

Sleeping

File size: 5,216 Bytes

import os
import getpass

import streamlit as st

def get_credentials():
	return {
		"url" : "https://us-south.ml.cloud.ibm.com",
		"apikey" : os.getenv("IBM_API_KEY")
	}

model_id = "ibm/granite-3-8b-instruct"

parameters = {
    "decoding_method": "greedy",
    "max_new_tokens": 900,
    "min_new_tokens": 0,
    "repetition_penalty": 1
}

project_id = os.getenv("IBM_PROJECT_ID")
space_id = os.getenv("IBM_SPACE_ID")

from ibm_watsonx_ai.foundation_models import ModelInference

model = ModelInference(
	model_id = model_id,
	params = parameters,
	credentials = get_credentials(),
	project_id = project_id,
	# space_id = space_id
	)

from ibm_watsonx_ai.client import APIClient

wml_credentials = get_credentials()
client = APIClient(credentials=wml_credentials, project_id=project_id)  #, space_id=space_id)

vector_index_id = "14c14504-5f45-4e6c-8f0f-25f2378a1d99"
vector_index_details = client.data_assets.get_details(vector_index_id)
vector_index_properties = vector_index_details["entity"]["vector_index"]

top_n = 20 if vector_index_properties["settings"].get("rerank") else int(vector_index_properties["settings"]["top_k"])

def rerank( client, documents, query, top_n ):
    from ibm_watsonx_ai.foundation_models import Rerank

    reranker = Rerank(
        model_id="cross-encoder/ms-marco-minilm-l-12-v2",
        api_client=client,
        params={
            "return_options": {
                "top_n": top_n
            },
            "truncate_input_tokens": 512
        }
    )

    reranked_results = reranker.generate(query=query, inputs=documents)["results"]

    new_documents = []
    
    for result in reranked_results:
        result_index = result["index"]
        new_documents.append(documents[result_index])
        
    return new_documents

from ibm_watsonx_ai.foundation_models.embeddings.sentence_transformer_embeddings import SentenceTransformerEmbeddings

emb = SentenceTransformerEmbeddings('sentence-transformers/all-MiniLM-L6-v2')

import subprocess
import gzip
import json
import chromadb
import random
import string

def hydrate_chromadb():
    data = client.data_assets.get_content(vector_index_id)
    content = gzip.decompress(data)
    stringified_vectors = str(content, "utf-8")
    vectors = json.loads(stringified_vectors)

    chroma_client = chromadb.Client()

    # make sure collection is empty if it already existed
    collection_name = "my_collection"
    try:
        collection = chroma_client.delete_collection(name=collection_name)
    except:
        print("Collection didn't exist - nothing to do.")
    collection = chroma_client.create_collection(name=collection_name)

    vector_embeddings = []
    vector_documents = []
    vector_metadatas = []
    vector_ids = []

    for vector in vectors:
        vector_embeddings.append(vector["embedding"])
        vector_documents.append(vector["content"])
        metadata = vector["metadata"]
        lines = metadata["loc"]["lines"]
        clean_metadata = {}
        clean_metadata["asset_id"] = metadata["asset_id"]
        clean_metadata["asset_name"] = metadata["asset_name"]
        clean_metadata["url"] = metadata["url"]
        clean_metadata["from"] = lines["from"]
        clean_metadata["to"] = lines["to"]
        vector_metadatas.append(clean_metadata)
        asset_id = vector["metadata"]["asset_id"]
        random_string = ''.join(random.choices(string.ascii_uppercase + string.digits, k=10))
        id = "{}:{}-{}-{}".format(asset_id, lines["from"], lines["to"], random_string)
        vector_ids.append(id)

    collection.add(
        embeddings=vector_embeddings,
        documents=vector_documents,
        metadatas=vector_metadatas,
        ids=vector_ids
    )
    return collection

chroma_collection = hydrate_chromadb()

def proximity_search( question ):
    query_vectors = emb.embed_query(question)
    query_result = chroma_collection.query(
        query_embeddings=query_vectors,
        n_results=top_n,
        include=["documents", "metadatas", "distances"]
    )

    documents = list(reversed(query_result["documents"][0]))

    if vector_index_properties["settings"].get("rerank"):
        documents = rerank(client, documents, question, vector_index_properties["settings"]["top_k"])

    return "\n".join(documents)

# Streamlit UI
st.title("🔍 IBM Watson RAG Chatbot")

# User input in Streamlit
question = st.text_input("Enter your question:")

if question:
    # Retrieve relevant grounding context
    grounding = proximity_search(question)

    # Format the question with retrieved context
    formatted_question = f"""<|start_of_role|>user<|end_of_role|>Use the following pieces of context to answer the question.

{grounding}

Question: {question}<|end_of_text|>
<|start_of_role|>assistant<|end_of_role|>"""

    # Placeholder for a prompt input (Optional)
    prompt_input = ""  # Set this dynamically if needed
    prompt = f"""{prompt_input}{formatted_question}"""

    # Simulated AI response (Replace with actual model call)
    generated_response = f"AI Response based on: {prompt}"

    # Display results
    st.subheader("📌 Retrieved Context")
    st.write(grounding)

    st.subheader("🤖 AI Response")
    st.write(generated_response)