Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -18,6 +18,7 @@ import re
|
|
18 |
from gradio_client import Client
|
19 |
from simple_salesforce import Salesforce, SalesforceLogin
|
20 |
from llama_index.llms.huggingface import HuggingFaceInferenceAPI
|
|
|
21 |
|
22 |
|
23 |
# Define Pydantic model for incoming request body
|
@@ -28,11 +29,7 @@ class MessageRequest(BaseModel):
|
|
28 |
# model=repo_id,
|
29 |
# token=os.getenv("HF_TOKEN"),
|
30 |
# )
|
31 |
-
|
32 |
-
llm_client = InferenceClient(
|
33 |
-
model=repo_id,
|
34 |
-
token=os.getenv("HF_TOKEN")
|
35 |
-
)
|
36 |
|
37 |
os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")
|
38 |
username = os.getenv("username")
|
@@ -84,12 +81,14 @@ templates = Jinja2Templates(directory="static")
|
|
84 |
# max_new_tokens=512,
|
85 |
# generate_kwargs={"temperature": 0.1},
|
86 |
# )
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
|
|
|
|
93 |
)
|
94 |
Settings.embed_model = HuggingFaceEmbedding(
|
95 |
model_name="BAAI/bge-small-en-v1.5"
|
|
|
18 |
from gradio_client import Client
|
19 |
from simple_salesforce import Salesforce, SalesforceLogin
|
20 |
from llama_index.llms.huggingface import HuggingFaceInferenceAPI
|
21 |
+
from llama_index.llms.huggingface import HuggingFaceLLM
|
22 |
|
23 |
|
24 |
# Define Pydantic model for incoming request body
|
|
|
29 |
# model=repo_id,
|
30 |
# token=os.getenv("HF_TOKEN"),
|
31 |
# )
|
32 |
+
|
|
|
|
|
|
|
|
|
33 |
|
34 |
os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")
|
35 |
username = os.getenv("username")
|
|
|
81 |
# max_new_tokens=512,
|
82 |
# generate_kwargs={"temperature": 0.1},
|
83 |
# )
|
84 |
+
# Configure Llama index settings
|
85 |
+
Settings.llm = HuggingFaceLLM(
|
86 |
+
model_name="google/flan-t5-small",
|
87 |
+
tokenizer_name="google/flan-t5-small",
|
88 |
+
context_window=512, # flan-t5-small has a max context window of 512 tokens
|
89 |
+
max_new_tokens=256,
|
90 |
+
generate_kwargs={"temperature": 0.1, "do_sample": True},
|
91 |
+
device_map="auto" # Automatically use GPU if available, else CPU
|
92 |
)
|
93 |
Settings.embed_model = HuggingFaceEmbedding(
|
94 |
model_name="BAAI/bge-small-en-v1.5"
|