Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -3,6 +3,7 @@ import tempfile
|
|
3 |
from pathlib import Path
|
4 |
|
5 |
import gradio as gr
|
|
|
6 |
from llama_cookbook.inference.model_utils import load_model as load_model_llamarecipes
|
7 |
from llama_cookbook.inference.model_utils import load_peft_model
|
8 |
from transformers import AutoTokenizer
|
@@ -28,7 +29,7 @@ inference_model = None
|
|
28 |
|
29 |
LLAMA_CKPT_PATH = "meta-llama/Meta-Llama-3.1-8B-Instruct"
|
30 |
|
31 |
-
|
32 |
def load_base_model():
|
33 |
"""Load the base Llama model and tokenizer once at startup."""
|
34 |
global base_model, tokenizer
|
@@ -58,7 +59,7 @@ def load_base_model():
|
|
58 |
tokenizer.pad_token = tokenizer.eos_token
|
59 |
print("Base model loaded successfully")
|
60 |
|
61 |
-
|
62 |
class FastLlamaInference:
|
63 |
def __init__(
|
64 |
self,
|
@@ -116,7 +117,7 @@ class FastLlamaInference:
|
|
116 |
|
117 |
return inference(**params)
|
118 |
|
119 |
-
|
120 |
def load_peft(model_name: str = "asr-10k"):
|
121 |
"""Load or switch PEFT model while reusing the base model."""
|
122 |
global base_model, current_peft_model, inference_model
|
@@ -189,6 +190,7 @@ def download_from_url(url, output_path):
|
|
189 |
return False, error_msg
|
190 |
|
191 |
|
|
|
192 |
def process_video(
|
193 |
video_file, video_url, model_name: str = "asr-10k", do_sample: bool = False
|
194 |
):
|
|
|
3 |
from pathlib import Path
|
4 |
|
5 |
import gradio as gr
|
6 |
+
import spaces
|
7 |
from llama_cookbook.inference.model_utils import load_model as load_model_llamarecipes
|
8 |
from llama_cookbook.inference.model_utils import load_peft_model
|
9 |
from transformers import AutoTokenizer
|
|
|
29 |
|
30 |
LLAMA_CKPT_PATH = "meta-llama/Meta-Llama-3.1-8B-Instruct"
|
31 |
|
32 |
+
@spaces.GPU
|
33 |
def load_base_model():
|
34 |
"""Load the base Llama model and tokenizer once at startup."""
|
35 |
global base_model, tokenizer
|
|
|
59 |
tokenizer.pad_token = tokenizer.eos_token
|
60 |
print("Base model loaded successfully")
|
61 |
|
62 |
+
@spaces.GPU
|
63 |
class FastLlamaInference:
|
64 |
def __init__(
|
65 |
self,
|
|
|
117 |
|
118 |
return inference(**params)
|
119 |
|
120 |
+
@spaces.GPU
|
121 |
def load_peft(model_name: str = "asr-10k"):
|
122 |
"""Load or switch PEFT model while reusing the base model."""
|
123 |
global base_model, current_peft_model, inference_model
|
|
|
190 |
return False, error_msg
|
191 |
|
192 |
|
193 |
+
@spaces.GPU
|
194 |
def process_video(
|
195 |
video_file, video_url, model_name: str = "asr-10k", do_sample: bool = False
|
196 |
):
|