Spaces:
Build error
Build error
import os | |
import gradio as gr | |
from llama_cpp import Llama | |
from huggingface_hub import hf_hub_download | |
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" | |
model = Llama( | |
model_path=hf_hub_download( | |
repo_id="unsloth/Qwen3-8B-GGUF", | |
filename="Qwen3-8B-UD-Q8_K_XL.gguf", | |
) | |
) | |
def infer(message,history): | |
temp = "" | |
response = model.create_chat_completion( | |
messages=[{"role": "user", "content": message}], | |
temperature=temperature, | |
max_tokens=max_tokens, | |
top_p=top_p, | |
stream=True, | |
) | |
for streamed in response: | |
delta = streamed["choices"][0].get("delta", {}) | |
#print(delta) | |
text_chunk = delta.get("content", "") | |
temp += text_chunk | |
yield temp | |
with gr.Blocks() as app: | |
chat = gr.ChatInterface(fn=infer) | |
app.launch() |