File size: 1,987 Bytes
8b6399b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6dd905c
 
8b6399b
6dd905c
8b6399b
 
 
 
 
 
 
 
 
 
 
df26c41
 
 
 
 
 
 
 
 
8b6399b
 
 
df26c41
 
8b6399b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import streamlit as st
import os
from io import StringIO
from llama_index.llms import HuggingFaceInferenceAPI
from llama_index.embeddings import HuggingFaceInferenceAPIEmbedding
from llama_index import ServiceContext, VectorStoreIndex
from llama_index.schema import Document
import uuid
from llama_index.vector_stores.types import MetadataFilters, ExactMatchFilter

inference_api_key = st.secrets["INFRERENCE_API_TOKEN"]

llm = HuggingFaceInferenceAPI(
    model_name="mistralai/Mistral-7B-Instruct-v0.2", token=inference_api_key)

embed_model = HuggingFaceInferenceAPIEmbedding(
    model_name="Gooly/gte-small-en-fine-tuned-e-commerce",
    token=inference_api_key,
    model_kwargs={"device": ""},
    encode_kwargs={"normalize_embeddings": True},
)

service_context = ServiceContext.from_defaults(
    embed_model=embed_model, llm=llm)

html_file = st.file_uploader("Upload a html file", type=["html"])

if html_file is not None:
    stringio = StringIO(html_file.getvalue().decode("utf-8"))
    string_data = stringio.read()
    with st.expander("Uploaded HTML"):
        st.write(string_data)

    document_id = str(uuid.uuid4())

    document = Document(text=string_data)
    document.metadata["id"] = document_id
    documents = [document]

    filters = MetadataFilters(
        filters=[ExactMatchFilter(key="id", value=document_id)])

    index = VectorStoreIndex.from_documents(
        documents, show_progress=True, metadata={"source": "HTML"}, service_context=service_context)

    retriever = index.as_retriever()

    ranked_nodes = retriever.retrieve(
        "Get me all the information about the product")

    with st.expander("Ranked Nodes"):
        for node in ranked_nodes:
            st.write(node.node.get_content(), "-> Score:", node.score)

    query_engine = index.as_query_engine(
        filters=filters, service_context=service_context)

    response = query_engine.query(
        "Get me all the information about the product")

    st.write(response)