Spaces:

broadfield-dev
/

gguf-test-2

Build error

gguf-test-2 / app.py

Update app.py

c281c55 verified 10 days ago

832 Bytes

	import os
	import gradio as gr
	from llama_cpp import Llama
	from huggingface_hub import hf_hub_download
	os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"


	model = Llama(
	model_path=hf_hub_download(
	repo_id="unsloth/Qwen3-8B-GGUF",
	filename="Qwen3-8B-UD-Q8_K_XL.gguf",
	)
	)

	def infer(message,history):
	temp = ""
	response = model.create_chat_completion(
	messages=[{"role": "user", "content": message}],
	temperature=temperature,
	max_tokens=max_tokens,
	top_p=top_p,
	stream=True,
	)
	for streamed in response:
	delta = streamed["choices"][0].get("delta", {})
	#print(delta)
	text_chunk = delta.get("content", "")

	temp += text_chunk
	yield temp
	with gr.Blocks() as app:
	chat = gr.ChatInterface(fn=infer)
	app.launch()