File size: 7,422 Bytes
62a9d23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43db9df
62a9d23
 
69b835c
43db9df
 
 
 
 
 
 
 
 
 
62a9d23
 
 
 
 
 
 
 
 
43db9df
62a9d23
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import gradio as gr
import requests
import mimetypes
import json, os
import asyncio
import aiohttp

LLM_API = os.environ.get("LLM_API")
LLM_URL = os.environ.get("LLM_URL")
USER_ID = "HuggingFace Space"  # Placeholder user ID

async def send_chat_message(LLM_URL, LLM_API, user_input, file_id):
    payload = {
        "inputs": {},
        "query": user_input,
        "response_mode": "streaming",
        "conversation_id": "",
        "user": USER_ID,
        "files": [
            {
                "type": "image",
                "transfer_method": "local_file",
                "upload_file_id": file_id
            }
        ]
    }
    print("Sending chat message payload:", payload)  # Debug information
    async with aiohttp.ClientSession() as session:
        async with session.post(
            f"{LLM_URL}/chat-messages",
            headers={"Authorization": f"Bearer {LLM_API}"},
            json=payload
        ) as response:
            print("Request URL:", f"{LLM_URL}/chat-messages")
            print("Response status code:", response.status)

            if response.status == 404:
                return "Error: Endpoint not found (404)"
            
            last_thought = None
            async for line in response.content:
                if line:
                    try:
                        # 去掉前面的 "data: " 字串並解析 JSON
                        line_data = json.loads(line.decode("utf-8").replace("data: ", ""))
                        print("Line data:", line_data)  # Debug: 輸出每行的資料內容
                        
                        # 提取含有 `thought` 或 `answer` 的資料
                        if line_data.get("data", {}).get("outputs", {}).get("answer"):
                            last_thought = line_data["data"]["outputs"]["answer"]
                            break  # 找到答案後退出迴圈
                    except (IndexError, json.JSONDecodeError) as e:
                        print("Error parsing line:", e)  # Debug: 輸出解析錯誤訊息
                        continue
            
            if last_thought:
                return last_thought.strip()
            else:
                return "Error: No thought or answer found in the response"


async def upload_file(LLM_URL, LLM_API, file_path, user_id):
    if not os.path.exists(file_path):
        return f"Error: File {file_path} not found"
    mime_type, _ = mimetypes.guess_type(file_path)
    with open(file_path, 'rb') as f:
        async with aiohttp.ClientSession() as session:
            form_data = aiohttp.FormData()
            form_data.add_field('file', f, filename=file_path, content_type=mime_type)
            form_data.add_field('user', user_id)
            
            async with session.post(
                f"{LLM_URL}/files/upload",
                headers={"Authorization": f"Bearer {LLM_API}"},
                data=form_data
            ) as response:
                print("Upload response status code:", response.status)  # Debug information
                if response.status == 404:
                    return "Error: Endpoint not found (404)"
                
                response_text = await response.text()
                print("Raw upload response text:", response_text)  # Debug information
                
                try:
                    response_json = json.loads(response_text)
                    file_id = response_json.get("id")
                    if file_id:
                        return response_json
                    else:
                        return "Error: No file ID returned in upload response"
                except json.JSONDecodeError:
                    return "Error: Invalid JSON response"

async def handle_input(file_path, user_input):
    upload_response = await upload_file(LLM_URL, LLM_API, file_path, USER_ID)
    print("Upload response:", upload_response)  # Debug information
    if isinstance(upload_response, str) and "Error" in upload_response:
        return upload_response
    file_id = upload_response.get("id")  # Extract file ID from the response
    if not file_id:
        return "Error: No file ID returned from upload"
    
    chat_response = await send_chat_message(LLM_URL, LLM_API, user_input, file_id)
    print("Chat response:", chat_response)  # Debug information
    return chat_response

# 定義界面標題和描述
TITLE = """<h1>Multimodal RAG Playground 💬 輸入工地照片,生成工地場景及相關法規和缺失描述</h1>"""
SUBTITLE = """<h2><a href='https://www.twman.org' target='_blank'>TonTon Huang Ph.D. @ 2024/11 </a><br></h2>"""
LINKS = """
<a href='https://github.com/Deep-Learning-101' target='_blank'>Deep Learning 101 Github</a> | <a href='http://deeplearning101.twman.org' target='_blank'>Deep Learning 101</a> | <a href='https://www.facebook.com/groups/525579498272187/' target='_blank'>台灣人工智慧社團 FB</a> | <a href='https://www.youtube.com/c/DeepLearning101' target='_blank'>YouTube</a><br>
<a href='https://reurl.cc/g6GlZX' target='_blank'>手把手帶你一起踩AI坑</a> | <a href='https://blog.twman.org/2024/11/diffusion.html' target='_blank'>ComfyUI + Stable Diffuision</a><br>
<a href='https://blog.twman.org/2024/08/LLM.html' target='_blank'>白話文手把手帶你科普 GenAI</a> | <a href='https://blog.twman.org/2024/09/LLM.html' target='_blank'>大型語言模型直接就打完收工?</a><br>
<a href='https://blog.twman.org/2023/04/GPT.html' target='_blank'>什麼是大語言模型,它是什麼?想要嗎?</a> | <a href='https://blog.twman.org/2024/07/RAG.html' target='_blank'>那些檢索增強生成要踩的坑 </a><br>
<a href='https://blog.twman.org/2021/04/ASR.html' target='_blank'>那些語音處理 (Speech Processing) 踩的坑</a> | <a href='https://blog.twman.org/2021/04/NLP.html' target='_blank'>那些自然語言處理 (Natural Language Processing, NLP) 踩的坑</a><br>
<a href='https://blog.twman.org/2024/02/asr-tts.html' target='_blank'>那些ASR和TTS可能會踩的坑</a> | <a href='https://blog.twman.org/2024/02/LLM.html' target='_blank'>那些大模型開發會踩的坑</a><br>
<a href='https://blog.twman.org/2023/07/wsl.html' target='_blank'>用PPOCRLabel來幫PaddleOCR做OCR的微調和標註</a> | <a href='https://blog.twman.org/2023/07/HugIE.html' target='_blank'>基於機器閱讀理解和指令微調的統一信息抽取框架之診斷書醫囑資訊擷取分析</a><br>
"""

# Define Gradio interface
file_input = gr.Image(label='圖片上傳', type='filepath')
user_input = gr.Textbox(label='輸入問題描述', value="分析一下這張工地場景照片", placeholder="請輸入您的問題描述...")
output_text = gr.Textbox(label="結果輸出", lines=4)

# # 範例資料
examples = [
    ['DEMO/DEMO_0004.jpg', '0004-51'],    
    ['DEMO/DEMO_0005.jpg', '0005-92'],
    ['DEMO/DEMO_0006.jpg', '0006-281'],
    ['DEMO/DEMO_0008.jpg', '0008-281'],    
    ['DEMO/DEMO_0011.jpg', '0011-108'],    
    ['DEMO/DEMO_0013.jpg', '0013-108'],
    ['DEMO/DEMO_0014.jpg', '0014-108'],
    ['DEMO/DEMO_0015.jpg', '0015-108'],    
]

with gr.Blocks() as iface:
    gr.HTML(TITLE)
    gr.HTML(SUBTITLE)
    gr.HTML(LINKS)
    gr.Interface(
        fn=handle_input,
        inputs=[file_input, user_input],
        outputs="text",
        examples=examples,
        flagging_mode="never"  # 更新此處
    )

iface.launch()