Spaces:
Sleeping
Sleeping
File size: 592 Bytes
2f28750 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 |
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
def load_model() -> Llama:
"""Downlaod model from Huggingface Hub and load it."""
try:
model = Llama(
model_path=hf_hub_download(
repo_id="microsoft/Phi-3-mini-4k-instruct-gguf",
filename="Phi-3-mini-4k-instruct-q4.gguf",
),
n_ctx=4096,
n_threads=8,
n_gpu_layers=0,
stop=["\n", " Q:"],
)
return model
except Exception as e:
raise Exception(f"Failed to load model: {e}")
|