import gradio as gr
import requests
import base64
from PIL import Image
import io

# **本地 GPU 服务器 API**
LOCAL_SERVER_URL = "http://169.233.7.2:5000/infer"

def image_to_base64(image):
    """PIL Image -> Base64"""
    buffer = io.BytesIO()
    image.save(buffer, format="PNG")
    return base64.b64encode(buffer.getvalue()).decode("utf-8")

def llava_infer(image, text):
    """把用户输入的图片+文本发送到本地服务器"""
    if image is None or text.strip() == "":
        return "请提供图片和文本输入"

    image_base64 = image_to_base64(image)
    payload = {"image": image_base64, "text": text}

    try:
        response = requests.post(LOCAL_SERVER_URL, json=payload)
        response_data = response.json()
        return response_data["response"]
    except Exception as e:
        return f"服务器错误: {e}"

# **Gradio Web UI**
with gr.Blocks(title="LLaVA Remote Web UI") as demo:
    gr.Markdown("# 🌋 LLaVA Web Interface (Remote Inference)")
    gr.Markdown("上传图片并输入文本，LLaVA 将在远程 GPU 服务器推理")

    with gr.Row():
        with gr.Column(scale=3):
            image_input = gr.Image(type="pil", label="上传图片")
            text_input = gr.Textbox(placeholder="输入文本...", label="输入文本")
            submit_button = gr.Button("提交")

        with gr.Column(scale=7):
            chatbot_output = gr.Textbox(label="LLaVA 输出", interactive=False)

    submit_button.click(fn=llava_infer, inputs=[image_input, text_input], outputs=chatbot_output)

# **启动 Hugging Face Web UI**
demo.launch(server_name="0.0.0.0", server_port=7860, share=True)