Spaces:

lucas-ventura
/

chapter-llama

Running on Zero

lucas-ventura commited on Apr 3

Commit

b7915ab

verified ·

1 Parent(s): e27182c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import os
 import tempfile
 from pathlib import Path
 import gradio as gr
 import spaces
@@ -14,6 +15,7 @@ from src.models.llama_inference import inference
 from src.test.vidchapters import get_chapters
 from tools.download.models import download_base_model, download_model
 # Set up proxies
 # from urllib.request import getproxies
 # proxies = getproxies()
@@ -29,6 +31,23 @@ inference_model = None
 LLAMA_CKPT_PATH = "meta-llama/Meta-Llama-3.1-8B-Instruct"
 @spaces.GPU
 def load_base_model():
     """Load the base Llama model and tokenizer once at startup."""

 import os
 import tempfile
 from pathlib import Path
+import subprocess
 import gradio as gr
 import spaces
 from src.test.vidchapters import get_chapters
 from tools.download.models import download_base_model, download_model
 # Set up proxies
 # from urllib.request import getproxies
 # proxies = getproxies()
 LLAMA_CKPT_PATH = "meta-llama/Meta-Llama-3.1-8B-Instruct"
+def install_cudnn():
+  """Installs specific versions of libcudnn and configures torch for TF32."""
+  try:
+    subprocess.run(["apt-get", "update"], check=True)
+    subprocess.run(["apt-get", "install", "-y", "libcudnn8=8.9.2.26-1+cuda12.1"], check=True)
+    subprocess.run(["apt-get", "install", "-y", "libcudnn8-dev=8.9.2.26-1+cuda12.1"], check=True)
+    subprocess.run(["python", "-c", "import torch; torch.backends.cuda.matmul.allow_tf32 = True; torch.backends.cudnn.allow_tf32 = True"], check=True)
+    subprocess.run(["ln", "-s", "/usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so", "/usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8"], check=True)
+    print("cuDNN installation and configuration successful.")
+  except subprocess.CalledProcessError as e:
+    print(f"Error during cuDNN installation: {e}")
+  except FileNotFoundError:
+    print("apt-get or python not found. Ensure they are in your PATH.")
+install_cudnn()
 @spaces.GPU
 def load_base_model():
     """Load the base Llama model and tokenizer once at startup."""