Test-03 / app.py
taybeyond's picture
Upload 3 files
8904d2b verified
raw
history blame
1.84 kB
import gradio as gr
import mediapipe as mp
import cv2
import numpy as np
from PIL import Image
import torch
from transformers import AutoProcessor, AutoTokenizer, AutoModelForCausalLM
from posture_utils import analyze_posture_by_keypoints
model_id = "Qwen/Qwen-VL-Chat"
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True, device_map="auto").eval()
mp_pose = mp.solutions.pose
def process(image: Image):
np_image = np.array(image)
with mp_pose.Pose(static_image_mode=True) as pose:
results = pose.process(cv2.cvtColor(np_image, cv2.COLOR_RGB2BGR))
if not results.pose_landmarks:
return "❗ 无法检测到人体,请上传包含上半身的清晰坐姿照片。"
posture_analysis = analyze_posture_by_keypoints(results.pose_landmarks)
prompt = f"<|im_start|>user\n请根据以下坐姿描述生成中英文提醒:\n{posture_analysis}\n<|im_end|>\n<|im_start|>assistant\n"
inputs = processor(images=image, text=prompt, return_tensors="pt").to(model.device)
output = model.generate(**inputs, max_new_tokens=512)
result = tokenizer.decode(output[0], skip_special_tokens=True)
return result.split("<|im_start|>assistant\n")[-1].strip()
demo = gr.Interface(
fn=process,
inputs=gr.Image(type="pil", label="上传你的坐姿照片"),
outputs=gr.Textbox(label="中英文坐姿分析结果"),
title="🪑 坐姿监测融合助手",
description="上传坐姿图像,先通过 Mediapipe 判断是否驼背、低头、含胸,再交由 Qwen-VL 生成提醒语。",
theme="soft",
allow_flagging="never"
)
if __name__ == "__main__":
demo.launch()