Spaces:

lucas-ventura
/

chapter-llama

Running on Zero

lucas-ventura commited on Apr 3

Commit

debd89e

verified ·

1 Parent(s): 676bad7

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import tempfile
 from pathlib import Path
 import gradio as gr
 from llama_cookbook.inference.model_utils import load_model as load_model_llamarecipes
 from llama_cookbook.inference.model_utils import load_peft_model
 from transformers import AutoTokenizer
@@ -28,7 +29,7 @@ inference_model = None
 LLAMA_CKPT_PATH = "meta-llama/Meta-Llama-3.1-8B-Instruct"
 def load_base_model():
     """Load the base Llama model and tokenizer once at startup."""
     global base_model, tokenizer
@@ -58,7 +59,7 @@ def load_base_model():
         tokenizer.pad_token = tokenizer.eos_token
         print("Base model loaded successfully")
 class FastLlamaInference:
     def __init__(
         self,
@@ -116,7 +117,7 @@ class FastLlamaInference:
         return inference(**params)
 def load_peft(model_name: str = "asr-10k"):
     """Load or switch PEFT model while reusing the base model."""
     global base_model, current_peft_model, inference_model
@@ -189,6 +190,7 @@ def download_from_url(url, output_path):
         return False, error_msg
 def process_video(
     video_file, video_url, model_name: str = "asr-10k", do_sample: bool = False
 ):

 from pathlib import Path
 import gradio as gr
+import spaces
 from llama_cookbook.inference.model_utils import load_model as load_model_llamarecipes
 from llama_cookbook.inference.model_utils import load_peft_model
 from transformers import AutoTokenizer
 LLAMA_CKPT_PATH = "meta-llama/Meta-Llama-3.1-8B-Instruct"
+@spaces.GPU
 def load_base_model():
     """Load the base Llama model and tokenizer once at startup."""
     global base_model, tokenizer
         tokenizer.pad_token = tokenizer.eos_token
         print("Base model loaded successfully")
+@spaces.GPU
 class FastLlamaInference:
     def __init__(
         self,
         return inference(**params)
+@spaces.GPU
 def load_peft(model_name: str = "asr-10k"):
     """Load or switch PEFT model while reusing the base model."""
     global base_model, current_peft_model, inference_model
         return False, error_msg
+@spaces.GPU
 def process_video(
     video_file, video_url, model_name: str = "asr-10k", do_sample: bool = False
 ):