gguf-test-2 / app.py
broadfield-dev's picture
Update app.py
c281c55 verified
raw
history blame contribute delete
832 Bytes
import os
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
model = Llama(
model_path=hf_hub_download(
repo_id="unsloth/Qwen3-8B-GGUF",
filename="Qwen3-8B-UD-Q8_K_XL.gguf",
)
)
def infer(message,history):
temp = ""
response = model.create_chat_completion(
messages=[{"role": "user", "content": message}],
temperature=temperature,
max_tokens=max_tokens,
top_p=top_p,
stream=True,
)
for streamed in response:
delta = streamed["choices"][0].get("delta", {})
#print(delta)
text_chunk = delta.get("content", "")
temp += text_chunk
yield temp
with gr.Blocks() as app:
chat = gr.ChatInterface(fn=infer)
app.launch()