Spaces:
Build error
Build error
import os | |
import gradio as gr | |
from llama_cpp import Llama | |
from huggingface_hub import hf_hub_download | |
model = Llama( | |
model_path=hf_hub_download( | |
repo_id=os.environ.get("REPO_ID", "unsloth/Qwen3-14B-GGUF"), | |
filename=os.environ.get("MODEL_FILE", "Qwen3-14B-UD-Q4_K_XL.gguf"), | |
) | |
) | |
def infer(message,history): | |
temp = "" | |
response = model.create_chat_completion( | |
messages=[{"role": "user", "content": message}], | |
temperature=temperature, | |
max_tokens=max_tokens, | |
top_p=top_p, | |
stream=True, | |
) | |
for streamed in response: | |
delta = streamed["choices"][0].get("delta", {}) | |
#print(delta) | |
text_chunk = delta.get("content", "") | |
temp += text_chunk | |
yield temp | |
with gr.Blocks() as app: | |
chat = gr.ChatInterface(fn=infer) | |
app.launch() |