3v324v23 commited on
Commit
ae03257
·
1 Parent(s): 9cb4e4d

加入imagetotext.py

Browse files
Files changed (1) hide show
  1. text2image.py +55 -0
text2image.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ from io import BytesIO
4
+ from PIL import Image
5
+ import gradio as gr
6
+ from google import genai
7
+ from google.genai import types
8
+
9
+ # 設定 logging
10
+ logging.basicConfig(
11
+ filename='app.log',
12
+ level=logging.INFO,
13
+ format='%(asctime)s - %(levelname)s - %(message)s'
14
+ )
15
+
16
+ # 初始化 Gemini API
17
+ GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
18
+ client = genai.Client(api_key=GEMINI_API_KEY)
19
+
20
+ # 定義「圖解釋文」功能
21
+ def explain_image(image: Image.Image):
22
+ # 將 PIL Image 轉成 Gemini 所需的格式
23
+ buffered = BytesIO()
24
+ image.save(buffered, format="PNG")
25
+ image_data = buffered.getvalue()
26
+
27
+ # 準備輸入給 Gemini 的內容
28
+ contents = [
29
+ types.ContentPart.from_data(data=image_data, mime_type="image/png"),
30
+ types.ContentPart.text("請用繁體中文說明這張圖片的內容。")
31
+ ]
32
+
33
+ # 呼叫 Gemini 模型
34
+ response = client.models.generate_content(
35
+ model="gemini-1.5-flash",
36
+ contents=contents,
37
+ config=types.GenerateContentConfig(response_modalities=["TEXT"])
38
+ )
39
+
40
+ # 回傳第一個回答
41
+ explanation = response.candidates[0].content.parts[0].text
42
+ logging.info("圖片說明成功取得。")
43
+ return explanation
44
+
45
+ # Gradio 介面
46
+ with gr.Blocks() as demo:
47
+ gr.Markdown("## 🧠 Gemini 圖片解釋器(圖 ➜ 文)")
48
+ image_input = gr.Image(type="pil", label="上傳圖片")
49
+ explain_button = gr.Button("解釋圖片")
50
+ output_text = gr.Textbox(label="圖片說明", lines=5)
51
+
52
+ explain_button.click(fn=explain_image, inputs=image_input, outputs=output_text)
53
+
54
+ if __name__ == "__main__":
55
+ demo.launch()