Spaces:

ThongCoding
/

Gradio

Runtime error

App Files Files Community

Gradio / app.py

ThongCoding

Update app.py

6f0dccd verified about 1 month ago

raw

history blame

1.85 kB

	import os
	import gradio as gr
	from llama_cpp import Llama
	from huggingface_hub import hf_hub_download

	# --- Cấu hình model ---
	REPO_ID = "TheBloke/phi-2-GGUF"
	FILENAME = "phi-2.Q4_K_M.gguf"
	HF_TOKEN = os.getenv("HF_AUTH_TOKEN")

	# --- Tự động tải model ---
	model_path = hf_hub_download(
	repo_id=REPO_ID,
	filename=FILENAME,
	token=HF_TOKEN,
	)

	# --- Load model tối ưu RAM ---
	llm = Llama(
	model_path=model_path,
	n_ctx=2048,
	n_threads=os.cpu_count(), # Sử dụng tối đa CPU core
	n_batch=512, # Cỡ batch hợp lý để tiết kiệm RAM
	n_gpu_layers=0, # Vì Huggingface CPU Space nên để 0
	verbose=False,
	)

	# --- Hàm chat ---
	def chat_fn(message, history):
	if history is None:
	history = []
	system_prompt = {"role": "system", "content": "Bạn là một trợ lý AI giao tiếp bằng tiếng Việt, trả lời tự nhiên và thân thiện."}
	messages = [system_prompt]
	for user_msg, bot_msg in history:
	messages.append({"role": "user", "content": user_msg})
	messages.append({"role": "assistant", "content": bot_msg})
	messages.append({"role": "user", "content": message})

	response = llm.create_chat_completion(
	messages=messages,
	max_tokens=512,
	temperature=0.7,
	stop=["<\|user\|>", "<\|assistant\|>"],
	)
	reply = response["choices"][0]["message"]["content"].strip()
	history.append((message, reply)) # Thêm vào lịch sử
	return history # Trả về toàn bộ history


	# --- Giao diện Gradio ---
	gr.ChatInterface(
	fn=chat_fn,
	chatbot=gr.Chatbot(height=450),
	title="🤖 Chatbot Phi-2 (Tiếng Việt)",
	description="Trợ lý AI tiếng Việt chạy bằng Phi-2 - GGUF (nhẹ, nhanh, tiết kiệm RAM)",
	theme="soft",
	).launch()