Spaces:
Sleeping
Sleeping
import gradio as gr | |
import speech_recognition as sr | |
from time import time | |
import threading | |
from pydub import AudioSegment | |
from pydub.playback import play | |
import io | |
# Global variables | |
is_recording = False | |
start_beep = AudioSegment.silent(duration=200).append(AudioSegment.from_wav(io.BytesIO(b''), crossfade=100) | |
end_beep = AudioSegment.silent(duration=200).append(AudioSegment.from_wav(io.BytesIO(b'')), crossfade=100) | |
def play_start_sound(): | |
try: | |
play(start_beep) | |
except: | |
pass | |
def play_end_sound(): | |
try: | |
play(end_beep) | |
except: | |
pass | |
def start_recording(audio_time_limit): | |
global is_recording | |
is_recording = True | |
recognizer = sr.Recognizer() | |
microphone = sr.Microphone() | |
play_start_sound() | |
with microphone as source: | |
recognizer.adjust_for_ambient_noise(source, duration=0.5) | |
try: | |
audio = recognizer.listen(source, timeout=3, phrase_time_limit=audio_time_limit) | |
text = recognizer.recognize_google(audio) | |
return text | |
except sr.WaitTimeoutError: | |
return "" | |
except sr.UnknownValueError: | |
return "" | |
except Exception as e: | |
print(f"Error: {str(e)}") | |
return "" | |
finally: | |
play_end_sound() | |
is_recording = False | |
def transcribe_audio(audio_time_limit=10): | |
def execute_recording(): | |
nonlocal result | |
result = start_recording(audio_time_limit) | |
result = "" | |
recording_thread = threading.Thread(target=execute_recording) | |
recording_thread.start() | |
start_time = time() | |
while is_recording and (time() - start_time) < audio_time_limit: | |
time_elapsed = time() - start_time | |
time_left = max(0, audio_time_limit - time_elapsed) | |
progress = 1 - (time_left / audio_time_limit) | |
yield {"__type__": "update", "value": f"π€ Recording... {time_left:.1f}s left", "visible": True}, {"__type__": "update", "value": "", "visible": True} | |
gr.sleep(0.1) | |
recording_thread.join() | |
yield {"__type__": "update", "value": "β Done!", "visible": True}, {"__type__": "update", "value": result, "visible": True} | |
def create_ui(): | |
css = """ | |
.mic-button { | |
background: linear-gradient(45deg, #FF3366, #BA265D) !important; | |
border: none !important; | |
color: white !important; | |
padding: 12px !important; | |
border-radius: 50% !important; | |
height: 50px !important; | |
width: 50px !important; | |
margin-left: 10px !important; | |
} | |
.mic-button:hover { | |
transform: scale(1.05) !important; | |
} | |
.input-with-mic { | |
display: flex !important; | |
align-items: center !important; | |
gap: 10px !important; | |
} | |
.status-message { | |
font-style: italic; | |
color: #666; | |
margin-top: 5px; | |
} | |
""" | |
with gr.Blocks(css=css) as demo: | |
gr.Markdown("## π€ Speech to Text Converter") | |
with gr.Group(): | |
with gr.Row(): | |
text_input = gr.Textbox( | |
label="Your Input", | |
placeholder="Click the mic button and speak...", | |
elem_classes=["input-box"], | |
scale=9 | |
) | |
mic_button = gr.Button( | |
"π€", | |
elem_classes=["mic-button"], | |
scale=1 | |
) | |
status_display = gr.Textbox( | |
label="Status", | |
visible=False, | |
interactive=False, | |
elem_classes=["status-message"] | |
) | |
mic_button.click( | |
fn=transcribe_audio, | |
inputs=[gr.Slider(5, 30, value=10, label="Recording time limit (seconds)")], | |
outputs=[status_display, text_input], | |
show_progress="hidden" | |
) | |
gr.Examples( | |
examples=["Hello world", "How are you today?", "Please convert my speech to text"], | |
inputs=text_input, | |
label="Try these examples:" | |
) | |
return demo | |
if __name__ == "__main__": | |
demo = create_ui() | |
demo.launch(debug=True) |