|
import gradio as gr |
|
import mediapipe as mp |
|
import cv2 |
|
import numpy as np |
|
from PIL import Image |
|
import torch |
|
from transformers import AutoProcessor, AutoTokenizer, AutoModelForCausalLM |
|
from posture_utils import analyze_posture_by_keypoints |
|
|
|
model_id = "Qwen/Qwen-VL-Chat" |
|
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) |
|
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True) |
|
model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True, device_map="auto").eval() |
|
|
|
mp_pose = mp.solutions.pose |
|
|
|
def process(image: Image): |
|
np_image = np.array(image) |
|
with mp_pose.Pose(static_image_mode=True) as pose: |
|
results = pose.process(cv2.cvtColor(np_image, cv2.COLOR_RGB2BGR)) |
|
|
|
if not results.pose_landmarks: |
|
return "❗ 无法检测到人体,请上传包含上半身的清晰坐姿照片。" |
|
|
|
posture_analysis = analyze_posture_by_keypoints(results.pose_landmarks) |
|
prompt = f"<|im_start|>user\n请根据以下坐姿描述生成中英文提醒:\n{posture_analysis}\n<|im_end|>\n<|im_start|>assistant\n" |
|
inputs = processor(images=image, text=prompt, return_tensors="pt").to(model.device) |
|
output = model.generate(**inputs, max_new_tokens=512) |
|
result = tokenizer.decode(output[0], skip_special_tokens=True) |
|
return result.split("<|im_start|>assistant\n")[-1].strip() |
|
|
|
demo = gr.Interface( |
|
fn=process, |
|
inputs=gr.Image(type="pil", label="上传你的坐姿照片"), |
|
outputs=gr.Textbox(label="中英文坐姿分析结果"), |
|
title="🪑 坐姿监测融合助手", |
|
description="上传坐姿图像,先通过 Mediapipe 判断是否驼背、低头、含胸,再交由 Qwen-VL 生成提醒语。", |
|
theme="soft", |
|
allow_flagging="never" |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |