Spaces:

WorldRWKV
/

WorldRWKV-0.4B-G1-SigLIP2

Running

App Files Files Community

WorldRWKV-0.4B-G1-SigLIP2 / app.py

Alic-Li

Update app.py

ec90bb6 verified 26 days ago

raw

history blame contribute delete

6.62 kB

	import gradio as gr
	from infer.worldmodel import Worldinfer
	from PIL import Image
	from huggingface_hub import hf_hub_download, snapshot_download
	import re
	import html
	# 初始化模型
	llm_path = hf_hub_download(repo_id="WorldRWKV/RWKV7-0.4B-G1-SigLIP2-ColdStart",filename="rwkv-0.pth",local_dir="./model_weights/")
	encoder_path = snapshot_download(repo_id="google/siglip2-base-patch16-384")
	# llm_path = "/mnt/B8E84E9EE84E5B30/rwkv-models/world_rwkv/world_weights/rwkv-0"
	# encoder_path = "/mnt/B8E84E9EE84E5B30/rwkv-models/world_rwkv/siglip2-base-patch16-384/"
	encoder_type = 'siglip'

	# 全局变量存储当前上传的图片和模型状态
	current_image = None
	current_state = None
	first_question = False # 存储模型状态
	# 初始化模型
	model = Worldinfer(model_path=llm_path, encoder_type=encoder_type, encoder_path=encoder_path)

	# 处理用户输入的核心逻辑
	def chat_fn(user_input, chat_history, image=None):
	global current_image, current_state, first_question

	# 如果上传了新图片，更新当前图片并重置状态
	if image is not None:
	current_image = image

	# 如果没有图片，提示用户上传
	if current_image is None:
	bot_response = "请先上传一张图片！"
	chat_history.append((user_input, bot_response))
	return "", chat_history

	# 确保图片是PIL Image格式
	if not isinstance(current_image, Image.Image) and current_image != 'none':
	current_image = Image.fromarray(current_image)

	# 构造提示文本
	prompt = f'\x16User: {user_input}\x17Assistant:'

	# 生成结果，传入当前状态
	try:
	if first_question:
	result, state = model.generate(prompt, current_image, state=None)
	else:
	result, state = model.generate(prompt, 'none', state=current_state)

	first_question = False
	bot_response, current_state = result, state

	# 解析<think>和</think>标签
	think_pattern = re.compile(r'<think>(.*?)</think>', re.DOTALL)
	think_matches = think_pattern.findall(bot_response)

	# 解析<answer></answer>标签
	answer_pattern = re.compile(r'<answer>(.*?)</answer>', re.DOTALL)
	answer_matches = answer_pattern.findall(bot_response)

	# 构造最终的输出
	final_response = ""
	for match in think_matches:
	final_response += f"<details><summary>Think 🤔 </summary>{html.escape(match)}</details>"

	for match in answer_matches:
	final_response += "Answer 💡"
	final_response += "\n"
	final_response += html.escape(match)

	# 转义HTML标签
	bot_response = final_response

	except Exception as e:
	bot_response = f"生成回复时出错: {str(e)}"
	current_state = None # 出错时重置状态

	# 更新对话历史
	chat_history.append((user_input, bot_response))

	# 返回更新后的组件状态
	return "", chat_history # 清空输入框，更新聊天记录
	# 处理图片上传
	def update_image(image):
	global current_image, current_state,first_question
	current_image = image
	current_state = None
	first_question = True
	# print('1111111111111111111',first_question) # 上传新图片时重置状态
	return "图片已上传成功！可以开始提问了。"

	# 清空图片
	def clear_image():
	global current_image, current_state
	current_image = None
	current_state = None # 清空图片时重置状态
	# 返回None给image组件，文本给status组件
	return None, "图片已清除，请上传新图片。"

	# 清空历史和图片
	def clear_all():
	global current_image, current_state
	current_image = None
	current_state = None # 清空所有时重置状态
	return [], "", "图片和对话已清空，请重新上传图片。"

	# 不使用图片输入的聊天函数
	def chat_without_image_update(user_input, chat_history):
	return chat_fn(user_input, chat_history)

	# 界面布局组件
	with gr.Blocks(title="WORLD RWKV", theme=gr.themes.Soft()) as demo:
	gr.Markdown("# WORLD RWKV")
	gr.Markdown("上传一张图片，然后可以进行多轮提问")

	with gr.Row():
	# 左侧图片上传区
	with gr.Column(scale=2):
	image_input = gr.Image(
	type="pil",
	label="上传图片",
	height=400
	)

	# 图片状态和操作
	with gr.Row():
	image_status = gr.Textbox(
	label="图片状态",
	value="请上传图片",
	interactive=False
	)
	clear_img_btn = gr.Button("删除图片")

	# 右侧对话区
	with gr.Column(scale=3):
	chatbot = gr.Chatbot(
	label="对话记录",
	bubble_full_width=False,
	height=500
	)

	# 控制区域
	with gr.Row():
	# 输入组件
	user_input = gr.Textbox(
	placeholder="请输入问题...",
	scale=7,
	container=False,
	label="问题输入"
	)

	# 操作按钮
	with gr.Column(scale=1):
	submit_btn = gr.Button("发送", variant="primary")
	clear_btn = gr.Button("清空所有")

	# 事件绑定
	# 图片上传事件
	image_input.change(
	fn=update_image,
	inputs=[image_input],
	outputs=[image_status]
	)

	# 删除图片按钮事件 - 修复输出顺序，确保类型匹配
	clear_img_btn.click(
	fn=lambda: (None, "图片已清除，请上传新图片。"), # 使用lambda直接返回正确类型
	inputs=None,
	outputs=[image_input, image_status]
	)

	# 发送按钮事件
	submit_btn.click(
	fn=chat_fn,
	inputs=[user_input, chatbot, image_input],
	outputs=[user_input, chatbot]
	)

	# 输入框回车事件 - 使用不需要图片参数的函数
	user_input.submit(
	fn=chat_without_image_update,
	inputs=[user_input, chatbot],
	outputs=[user_input, chatbot]
	)

	# 清空按钮事件
	clear_btn.click(
	fn=lambda: ([], "", "图片和对话已清空，请重新上传图片。", None), # 修复返回值
	inputs=None,
	outputs=[chatbot, user_input, image_status, image_input],
	queue=False
	)

	# 启动应用
	if __name__ == "__main__":
	demo.launch()