lucas-ventura commited on
Commit
debd89e
·
verified ·
1 Parent(s): 676bad7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -3
app.py CHANGED
@@ -3,6 +3,7 @@ import tempfile
3
  from pathlib import Path
4
 
5
  import gradio as gr
 
6
  from llama_cookbook.inference.model_utils import load_model as load_model_llamarecipes
7
  from llama_cookbook.inference.model_utils import load_peft_model
8
  from transformers import AutoTokenizer
@@ -28,7 +29,7 @@ inference_model = None
28
 
29
  LLAMA_CKPT_PATH = "meta-llama/Meta-Llama-3.1-8B-Instruct"
30
 
31
-
32
  def load_base_model():
33
  """Load the base Llama model and tokenizer once at startup."""
34
  global base_model, tokenizer
@@ -58,7 +59,7 @@ def load_base_model():
58
  tokenizer.pad_token = tokenizer.eos_token
59
  print("Base model loaded successfully")
60
 
61
-
62
  class FastLlamaInference:
63
  def __init__(
64
  self,
@@ -116,7 +117,7 @@ class FastLlamaInference:
116
 
117
  return inference(**params)
118
 
119
-
120
  def load_peft(model_name: str = "asr-10k"):
121
  """Load or switch PEFT model while reusing the base model."""
122
  global base_model, current_peft_model, inference_model
@@ -189,6 +190,7 @@ def download_from_url(url, output_path):
189
  return False, error_msg
190
 
191
 
 
192
  def process_video(
193
  video_file, video_url, model_name: str = "asr-10k", do_sample: bool = False
194
  ):
 
3
  from pathlib import Path
4
 
5
  import gradio as gr
6
+ import spaces
7
  from llama_cookbook.inference.model_utils import load_model as load_model_llamarecipes
8
  from llama_cookbook.inference.model_utils import load_peft_model
9
  from transformers import AutoTokenizer
 
29
 
30
  LLAMA_CKPT_PATH = "meta-llama/Meta-Llama-3.1-8B-Instruct"
31
 
32
+ @spaces.GPU
33
  def load_base_model():
34
  """Load the base Llama model and tokenizer once at startup."""
35
  global base_model, tokenizer
 
59
  tokenizer.pad_token = tokenizer.eos_token
60
  print("Base model loaded successfully")
61
 
62
+ @spaces.GPU
63
  class FastLlamaInference:
64
  def __init__(
65
  self,
 
117
 
118
  return inference(**params)
119
 
120
+ @spaces.GPU
121
  def load_peft(model_name: str = "asr-10k"):
122
  """Load or switch PEFT model while reusing the base model."""
123
  global base_model, current_peft_model, inference_model
 
190
  return False, error_msg
191
 
192
 
193
+ @spaces.GPU
194
  def process_video(
195
  video_file, video_url, model_name: str = "asr-10k", do_sample: bool = False
196
  ):