Spaces:
Sleeping
Sleeping
File size: 1,700 Bytes
8b6399b 6dd905c 8b6399b 6dd905c 8b6399b d33a2d5 8b6399b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import streamlit as st
import os
from io import StringIO
from llama_index.llms import HuggingFaceInferenceAPI
from llama_index.embeddings import HuggingFaceInferenceAPIEmbedding
from llama_index import ServiceContext, VectorStoreIndex
from llama_index.schema import Document
import uuid
from llama_index.vector_stores.types import MetadataFilters, ExactMatchFilter
inference_api_key = st.secrets["INFRERENCE_API_TOKEN"]
llm = HuggingFaceInferenceAPI(
model_name="mistralai/Mistral-7B-Instruct-v0.2", token=inference_api_key)
embed_model = HuggingFaceInferenceAPIEmbedding(
model_name="Gooly/gte-small-en-fine-tuned-e-commerce",
token=inference_api_key,
model_kwargs={"device": ""},
encode_kwargs={"normalize_embeddings": True},
)
service_context = ServiceContext.from_defaults(
embed_model=embed_model, llm=llm)
html_file = st.file_uploader("Upload a html file", type=["html"])
if html_file is not None:
stringio = StringIO(html_file.getvalue().decode("utf-8"))
string_data = stringio.read()
with st.expander("Uploaded HTML"):
st.write(string_data)
document_id = str(uuid.uuid4())
document = Document(text=string_data)
document.metadata["id"] = document_id
documents = [document]
filters = MetadataFilters(
filters=[ExactMatchFilter(key="id", value=document_id)])
index = VectorStoreIndex.from_documents(
documents, show_progress=True, metadata={"source": "HTML"}, service_context=service_context)
query_engine = index.as_query_engine(
filters=filters, service_context=service_context)
response = query_engine.query("Get me all the information about the product")
st.write(response)
|