Spaces:

hohobio
/

HydroX

Running

App Files Files Community

wensjheng commited on Apr 9

Commit

c78f550

verified ·

1 Parent(s): 53212d7

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -2

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import gradio as gr
 import requests
 import json
 GEMINI_API_KEY = "AIzaSyAZ3WSpx_o53zpmhIJRzR0JMsiBOTnttbg"
 API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={GEMINI_API_KEY}"
@@ -50,21 +51,53 @@ Your task is to provide scientifically sound health education. Please follow the
     else:
         return f"[API 錯誤 {response.status_code}]: {response.text}"
 with gr.Blocks(title="氫鬆聊 - 專屬於您的健康諮詢夥伴") as demo:
     gr.HTML(INTRO_TEXT)
     chatbot = gr.Chatbot(height=400)
-    msg = gr.Textbox(label="請輸入您的問題", placeholder="例如：我正在接受＿＿治療，可以吃＿＿＿？",show_microphone=True)
     with gr.Row():
         ask = gr.Button("提問")
         clear = gr.Button("清除對話")
     def respond(message, history):
         reply = call_gemini(message)
         history.append((message, reply))
         return "", history
     msg.submit(respond, [msg, chatbot], [msg, chatbot])
     ask.click(respond, [msg, chatbot], [msg, chatbot])
     clear.click(lambda: [], None, chatbot)
 demo.launch()

 import gradio as gr
 import requests
 import json
+import speech_recognition as sr
 GEMINI_API_KEY = "AIzaSyAZ3WSpx_o53zpmhIJRzR0JMsiBOTnttbg"
 API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={GEMINI_API_KEY}"
     else:
         return f"[API 錯誤 {response.status_code}]: {response.text}"
+def transcribe_audio(audio_file, lang_code):
+    recognizer = sr.Recognizer()
+    with sr.AudioFile(audio_file) as source:
+        audio = recognizer.record(source)
+    try:
+        return recognizer.recognize_google(audio, language=lang_code)
+    except Exception as e:
+        return f"[語音辨識失敗]：{str(e)}"
 with gr.Blocks(title="氫鬆聊 - 專屬於您的健康諮詢夥伴") as demo:
     gr.HTML(INTRO_TEXT)
     chatbot = gr.Chatbot(height=400)
+    with gr.Row():
+        msg = gr.Textbox(label="請輸入您的問題", placeholder="例如：我正在接受＿＿治療，可以吃＿＿＿？")
+        lang_select = gr.Dropdown(
+            label="語音語言",
+            choices=[
+                ("中文（台灣）", "zh-TW"),
+                ("English (US)", "en-US"),
+                ("日本語", "ja-JP"),
+                ("한국어", "ko-KR"),
+                ("Bahasa Indonesia", "id-ID"),
+                ("Tiếng Việt", "vi-VN"),
+                ("Français", "fr-FR"),
+                ("Deutsch", "de-DE"),
+            ],
+            value="zh-TW"
+        )
+    with gr.Row():
+        audio_input = gr.Audio(source="microphone", type="filepath", label="🎙 speak")
+        voice_to_text = gr.Button("🎤 speak")
     with gr.Row():
         ask = gr.Button("提問")
         clear = gr.Button("清除對話")
     def respond(message, history):
         reply = call_gemini(message)
         history.append((message, reply))
         return "", history
+    def handle_audio(audio_path, lang_code):
+        text = transcribe_audio(audio_path, lang_code)
+        return text
     msg.submit(respond, [msg, chatbot], [msg, chatbot])
     ask.click(respond, [msg, chatbot], [msg, chatbot])
     clear.click(lambda: [], None, chatbot)
+    voice_to_text.click(handle_audio, [audio_input, lang_select], msg)
 demo.launch()