import time
import gradio as gr
from PIL import Image
from transformers import AutoTokenizer, AutoProcessor, AutoModelForCausalLM

model_id = "Qwen/Qwen-VL-Chat"

# 自动加载模型到合适设备（防止 OOM）
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    device_map="auto"  # 自动拆分到 GPU/CPU
).eval()

def analyze_posture(image: Image):
    if image is None:
        return "❗ Please upload a clear photo showing your sitting posture.请上传一张坐姿清晰的照片。"

    if image.size[0] < 300 or image.size[1] < 300:
        return "⚠️ 图像分辨率过低，建议上传更清晰的坐姿照片（宽高 > 300px）"

    question = "这个人坐姿是否良好？是否驼背？用简洁中文回答，再用英文总结。"
    prompt = f"<|im_start|>user\n{question}<|im_end|>\n<|im_start|>assistant\n"

    start = time.time()
    try:
        inputs = processor(images=image, text=prompt, return_tensors="pt").to(model.device)
        outputs = model.generate(**inputs, max_new_tokens=512)
        result = tokenizer.decode(outputs[0], skip_special_tokens=True)
        final = result.split("<|im_start|>assistant\n")[-1].strip()
        end = time.time()
        return f"⏱️ 回答时间：{round(end - start, 2)} 秒\n\n{final}"
    except Exception as e:
        return f"❌ 出现错误：{str(e)}\n\n💡 建议：确认图片清晰，或稍后重试。"

# UI 设置
demo = gr.Interface(
    fn=analyze_posture,
    inputs=gr.Image(type="pil", label="上传你的坐姿照片"),
    outputs=gr.Textbox(label="Analysis Result (Chinese + English Reminder) 分析结果（中文+英文提醒）"),
    title="🪑 Posture Monitoring Demo  坐姿监测Demo",
    description="Upload a photo to detect whether your sitting posture is good or if you have a hunchback issue. Automatically generates reminders in both Chinese and English (powered by Qwen-VL).上传照片，识别你是否坐姿良好或有驼背问题，自动生成中英文提醒（由 Qwen-VL 支持）",
    theme="soft",
    allow_flagging="never"
)

if __name__ == "__main__":
    demo.launch()