wensjheng commited on
Commit
c78f550
·
verified ·
1 Parent(s): 53212d7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -2
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import gradio as gr
2
  import requests
3
  import json
 
4
 
5
  GEMINI_API_KEY = "AIzaSyAZ3WSpx_o53zpmhIJRzR0JMsiBOTnttbg"
6
  API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={GEMINI_API_KEY}"
@@ -50,21 +51,53 @@ Your task is to provide scientifically sound health education. Please follow the
50
  else:
51
  return f"[API 錯誤 {response.status_code}]: {response.text}"
52
 
 
 
 
 
 
 
 
 
 
53
  with gr.Blocks(title="氫鬆聊 - 專屬於您的健康諮詢夥伴") as demo:
54
  gr.HTML(INTRO_TEXT)
55
  chatbot = gr.Chatbot(height=400)
56
- msg = gr.Textbox(label="請輸入您的問題", placeholder="例如:我正在接受__治療,可以吃___?",show_microphone=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  with gr.Row():
58
  ask = gr.Button("提問")
59
  clear = gr.Button("清除對話")
60
-
61
  def respond(message, history):
62
  reply = call_gemini(message)
63
  history.append((message, reply))
64
  return "", history
65
 
 
 
 
 
66
  msg.submit(respond, [msg, chatbot], [msg, chatbot])
67
  ask.click(respond, [msg, chatbot], [msg, chatbot])
68
  clear.click(lambda: [], None, chatbot)
 
69
 
70
  demo.launch()
 
1
  import gradio as gr
2
  import requests
3
  import json
4
+ import speech_recognition as sr
5
 
6
  GEMINI_API_KEY = "AIzaSyAZ3WSpx_o53zpmhIJRzR0JMsiBOTnttbg"
7
  API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={GEMINI_API_KEY}"
 
51
  else:
52
  return f"[API 錯誤 {response.status_code}]: {response.text}"
53
 
54
+ def transcribe_audio(audio_file, lang_code):
55
+ recognizer = sr.Recognizer()
56
+ with sr.AudioFile(audio_file) as source:
57
+ audio = recognizer.record(source)
58
+ try:
59
+ return recognizer.recognize_google(audio, language=lang_code)
60
+ except Exception as e:
61
+ return f"[語音辨識失敗]:{str(e)}"
62
+
63
  with gr.Blocks(title="氫鬆聊 - 專屬於您的健康諮詢夥伴") as demo:
64
  gr.HTML(INTRO_TEXT)
65
  chatbot = gr.Chatbot(height=400)
66
+ with gr.Row():
67
+ msg = gr.Textbox(label="請輸入您的問題", placeholder="例如:我正在接受__治療,可以吃___?")
68
+ lang_select = gr.Dropdown(
69
+ label="語音語言",
70
+ choices=[
71
+ ("中文(台灣)", "zh-TW"),
72
+ ("English (US)", "en-US"),
73
+ ("日本語", "ja-JP"),
74
+ ("한국어", "ko-KR"),
75
+ ("Bahasa Indonesia", "id-ID"),
76
+ ("Tiếng Việt", "vi-VN"),
77
+ ("Français", "fr-FR"),
78
+ ("Deutsch", "de-DE"),
79
+ ],
80
+ value="zh-TW"
81
+ )
82
+ with gr.Row():
83
+ audio_input = gr.Audio(source="microphone", type="filepath", label="🎙 speak")
84
+ voice_to_text = gr.Button("🎤 speak")
85
  with gr.Row():
86
  ask = gr.Button("提問")
87
  clear = gr.Button("清除對話")
88
+
89
  def respond(message, history):
90
  reply = call_gemini(message)
91
  history.append((message, reply))
92
  return "", history
93
 
94
+ def handle_audio(audio_path, lang_code):
95
+ text = transcribe_audio(audio_path, lang_code)
96
+ return text
97
+
98
  msg.submit(respond, [msg, chatbot], [msg, chatbot])
99
  ask.click(respond, [msg, chatbot], [msg, chatbot])
100
  clear.click(lambda: [], None, chatbot)
101
+ voice_to_text.click(handle_audio, [audio_input, lang_select], msg)
102
 
103
  demo.launch()