Spaces:
Sleeping
Sleeping
from llama_cpp import Llama | |
from huggingface_hub import hf_hub_download | |
def load_model() -> Llama: | |
"""Downlaod model from Huggingface Hub and load it.""" | |
try: | |
model = Llama( | |
model_path=hf_hub_download( | |
repo_id="microsoft/Phi-3-mini-4k-instruct-gguf", | |
filename="Phi-3-mini-4k-instruct-q4.gguf", | |
), | |
n_ctx=4096, | |
n_threads=8, | |
n_gpu_layers=0, | |
stop=["\n", " Q:"], | |
) | |
return model | |
except Exception as e: | |
raise Exception(f"Failed to load model: {e}") | |