redfernstech commited on
Commit
c2dfc52
·
verified ·
1 Parent(s): 94d6668

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -11
app.py CHANGED
@@ -18,6 +18,7 @@ import re
18
  from gradio_client import Client
19
  from simple_salesforce import Salesforce, SalesforceLogin
20
  from llama_index.llms.huggingface import HuggingFaceInferenceAPI
 
21
 
22
 
23
  # Define Pydantic model for incoming request body
@@ -28,11 +29,7 @@ class MessageRequest(BaseModel):
28
  # model=repo_id,
29
  # token=os.getenv("HF_TOKEN"),
30
  # )
31
- repo_id = "mistralai/Mistral-7B-Instruct-v0.1"
32
- llm_client = InferenceClient(
33
- model=repo_id,
34
- token=os.getenv("HF_TOKEN")
35
- )
36
 
37
  os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")
38
  username = os.getenv("username")
@@ -84,12 +81,14 @@ templates = Jinja2Templates(directory="static")
84
  # max_new_tokens=512,
85
  # generate_kwargs={"temperature": 0.1},
86
  # )
87
- Settings.llm = HuggingFaceInferenceAPI(
88
- model_name="mistralai/Mistral-7B-Instruct-v0.1",
89
- token=os.getenv("HF_TOKEN"), # Your Hugging Face API token
90
- context_window=4096, # Mistral-7B’s context window
91
- max_new_tokens=512,
92
- generate_kwargs={"temperature": 0.1}
 
 
93
  )
94
  Settings.embed_model = HuggingFaceEmbedding(
95
  model_name="BAAI/bge-small-en-v1.5"
 
18
  from gradio_client import Client
19
  from simple_salesforce import Salesforce, SalesforceLogin
20
  from llama_index.llms.huggingface import HuggingFaceInferenceAPI
21
+ from llama_index.llms.huggingface import HuggingFaceLLM
22
 
23
 
24
  # Define Pydantic model for incoming request body
 
29
  # model=repo_id,
30
  # token=os.getenv("HF_TOKEN"),
31
  # )
32
+
 
 
 
 
33
 
34
  os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")
35
  username = os.getenv("username")
 
81
  # max_new_tokens=512,
82
  # generate_kwargs={"temperature": 0.1},
83
  # )
84
+ # Configure Llama index settings
85
+ Settings.llm = HuggingFaceLLM(
86
+ model_name="google/flan-t5-small",
87
+ tokenizer_name="google/flan-t5-small",
88
+ context_window=512, # flan-t5-small has a max context window of 512 tokens
89
+ max_new_tokens=256,
90
+ generate_kwargs={"temperature": 0.1, "do_sample": True},
91
+ device_map="auto" # Automatically use GPU if available, else CPU
92
  )
93
  Settings.embed_model = HuggingFaceEmbedding(
94
  model_name="BAAI/bge-small-en-v1.5"