removed the space config
Browse files
app.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import torch
|
2 |
import gradio as gr
|
3 |
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
|
4 |
-
from huggingface_hub import login
|
5 |
import os
|
6 |
from threading import Thread
|
7 |
from openai import OpenAI
|
@@ -10,46 +10,6 @@ TOKEN = os.getenv('HF_AUTH_TOKEN')
|
|
10 |
login(token=TOKEN,
|
11 |
add_to_git_credential=False)
|
12 |
|
13 |
-
# Authenticate with hf api
|
14 |
-
api = HfApi()
|
15 |
-
|
16 |
-
# switch hardware function
|
17 |
-
def space_hardware_config(instance_size: str="gpu",
|
18 |
-
instance_type: str="1xL4",
|
19 |
-
vcpus: int=8,
|
20 |
-
memory: int=30):
|
21 |
-
"""
|
22 |
-
This will manually select what hardware we'll use in the space.
|
23 |
-
"""
|
24 |
-
|
25 |
-
api = HfApi()
|
26 |
-
token = HfFolder.get_token()
|
27 |
-
if token is None:
|
28 |
-
raise ValueError("Hugging Face token not found. Please log in using huggingface-cli or set the token manually.")
|
29 |
-
|
30 |
-
space_id = os.getenv("SPACE_ID")
|
31 |
-
if not space_id:
|
32 |
-
raise ValueError("SPACE_ID environment variable not found.")
|
33 |
-
|
34 |
-
space_info = api.repo_info(repo_id=space_id, repo_type="space", token=token)
|
35 |
-
print(space_info)
|
36 |
-
|
37 |
-
# # Hardware Configuration
|
38 |
-
# space.config["compute"] = {
|
39 |
-
# "instance_type": instance_type,
|
40 |
-
# "instance_size": instance_size,
|
41 |
-
# "disk_size": 50,
|
42 |
-
# "vcpus": vcpus, # number of virtual CPU's
|
43 |
-
# "memory": memory # amount of memory in gb
|
44 |
-
# }
|
45 |
-
|
46 |
-
# # Save updated space config
|
47 |
-
# api.push_to_hub(space)
|
48 |
-
# print("Hardware configuration successfull. Check the cuda command.")
|
49 |
-
|
50 |
-
# Automatically place to the standard config we need for loki
|
51 |
-
space_hardware_config()
|
52 |
-
|
53 |
# Open ai api key
|
54 |
API_KEY = os.getenv('OPEN_AI_API_KEY')
|
55 |
|
@@ -68,7 +28,6 @@ terminators = [
|
|
68 |
llama_tokenizer.convert_tokens_to_ids("<|eot_id|>")
|
69 |
]
|
70 |
|
71 |
-
|
72 |
# The output
|
73 |
def output_list(output: list):
|
74 |
"""
|
@@ -122,10 +81,6 @@ def loki_generation(input_text: str,
|
|
122 |
"""
|
123 |
Pass input texts, tokenize, output and back to text.
|
124 |
"""
|
125 |
-
space_hardware_config(instance_size="gpu",
|
126 |
-
instance_type="1xL4",
|
127 |
-
vcpus=8,
|
128 |
-
memory=30)
|
129 |
if mode == "llama":
|
130 |
conversation = []
|
131 |
for user, assistant in history:
|
@@ -259,10 +214,6 @@ def bot_comms(input_text: str,
|
|
259 |
yield "".join(outputs)
|
260 |
|
261 |
if llm_mode == "switch to gpt-4o":
|
262 |
-
space_hardware_config(instance_size="cpu",
|
263 |
-
instance_type="basic",
|
264 |
-
vcpus=2,
|
265 |
-
memory=16)
|
266 |
stream = gpt_generation(input=input_text,
|
267 |
llama_output="",
|
268 |
mode="gpt-4o")
|
@@ -275,10 +226,6 @@ def bot_comms(input_text: str,
|
|
275 |
yield "".join(outputs)
|
276 |
|
277 |
if llm_mode == "switch to gpt-3.5-turbo":
|
278 |
-
space_hardware_config(instance_size="cpu",
|
279 |
-
instance_type="basic",
|
280 |
-
vcpus=2,
|
281 |
-
memory=16)
|
282 |
stream = gpt_generation(input=input_text,
|
283 |
llama_output="",
|
284 |
mode="gpt-3.5-turbo")
|
|
|
1 |
import torch
|
2 |
import gradio as gr
|
3 |
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
|
4 |
+
from huggingface_hub import login
|
5 |
import os
|
6 |
from threading import Thread
|
7 |
from openai import OpenAI
|
|
|
10 |
login(token=TOKEN,
|
11 |
add_to_git_credential=False)
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
# Open ai api key
|
14 |
API_KEY = os.getenv('OPEN_AI_API_KEY')
|
15 |
|
|
|
28 |
llama_tokenizer.convert_tokens_to_ids("<|eot_id|>")
|
29 |
]
|
30 |
|
|
|
31 |
# The output
|
32 |
def output_list(output: list):
|
33 |
"""
|
|
|
81 |
"""
|
82 |
Pass input texts, tokenize, output and back to text.
|
83 |
"""
|
|
|
|
|
|
|
|
|
84 |
if mode == "llama":
|
85 |
conversation = []
|
86 |
for user, assistant in history:
|
|
|
214 |
yield "".join(outputs)
|
215 |
|
216 |
if llm_mode == "switch to gpt-4o":
|
|
|
|
|
|
|
|
|
217 |
stream = gpt_generation(input=input_text,
|
218 |
llama_output="",
|
219 |
mode="gpt-4o")
|
|
|
226 |
yield "".join(outputs)
|
227 |
|
228 |
if llm_mode == "switch to gpt-3.5-turbo":
|
|
|
|
|
|
|
|
|
229 |
stream = gpt_generation(input=input_text,
|
230 |
llama_output="",
|
231 |
mode="gpt-3.5-turbo")
|