File size: 2,604 Bytes
c132fb4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import gradio as gr
import torch
import cv2
from PIL import Image
from auto_gptq import AutoGPTQForCausalLM
from transformers import AutoTokenizer
from processing_qwen_vl import QWenVLProcessor
import os
model_id = "Qwen/Qwen-VL-Chat-Int4"
processor = QWenVLProcessor.from_pretrained(model_id, trust_remote_code=True)
model = AutoGPTQForCausalLM.from_quantized(
model_id,
device="cuda" if torch.cuda.is_available() else "cpu",
trust_remote_code=True
).eval()
def capture_photo(filename="sitting.jpg"):
cap = cv2.VideoCapture(0)
ret, frame = cap.read()
cap.release()
if ret:
cv2.imwrite(filename, frame)
return filename
return None
def speak_text(text, lang="zh"):
voice = "zh-CN-XiaoxiaoNeural" if lang == "zh" else "en-US-AriaNeural"
os.system(f'edge-tts --text "{text}" --voice "{voice}" --write-media output.mp3')
os.system('start output.mp3' if os.name == 'nt' else 'afplay output.mp3')
def analyze_posture(image=None, auto_capture=False):
if auto_capture:
image_path = capture_photo()
if image_path is None:
return "β η‘ζ³εεζει ", None
image = Image.open(image_path)
elif image is None:
return "β θ«δΈε³εηζεη¨θͺεζη
§", None
question = "θ«ε€ζ·ιεδΊΊζ―ε¦εε§ΏδΈθ―οΌε¦ι§θγεεΎζζͺζοΌη¨δΈθ±ζεηγ"
inputs = processor(text=question, images=image, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=512)
answer = processor.batch_decode(outputs, skip_special_tokens=True)[0].strip()
if "θ«" in answer:
speak_text(answer, lang="zh")
if "please" in answer.lower():
speak_text(answer, lang="en")
return answer, image
def run_auto_capture():
return analyze_posture(auto_capture=True)
with gr.Blocks(title="ι§θθε₯ε©ζ") as demo:
gr.Markdown("## πͺ Qwen-VL-Chat-Int4 ι§θθε₯ Demo")
with gr.Row():
with gr.Column():
auto_btn = gr.Button("π· θͺεζει ζη
§δΈ¦ε€ζ·")
image_input = gr.Image(type="pil", label="ζζεδΈε³εη")
submit_btn = gr.Button("π€ δΈε³δΈ¦ε€ζ·")
with gr.Column():
output_text = gr.Textbox(label="π§ 樑εε€ζ·η΅ζ", lines=6)
output_image = gr.Image(type="pil", label="εζεη")
auto_btn.click(fn=run_auto_capture, outputs=[output_text, output_image])
submit_btn.click(fn=analyze_posture, inputs=[image_input], outputs=[output_text, output_image])
demo.launch(share=True) |