Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 2,756 Bytes
650a7ec bd507ca 650a7ec c2be063 650a7ec bd507ca 650a7ec bd507ca 650a7ec f5c4474 bd507ca f5c4474 650a7ec bd507ca 650a7ec bd507ca 650a7ec bd507ca 650a7ec bd507ca 650a7ec 0a71e28 bd507ca 0a71e28 650a7ec 0a71e28 650a7ec f5c4474 0a71e28 f5c4474 0a71e28 650a7ec |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import os
from pathlib import Path
from httpx import AsyncClient
import gradio as gr
import numpy as np
from dotenv import load_dotenv
from fastrtc import (
AdditionalOutputs,
ReplyOnPause,
Stream,
audio_to_bytes,
get_turn_credentials_async,
get_turn_credentials,
)
from gradio.utils import get_space
from languages import LANGUAGES
cur_dir = Path(__file__).parent
load_dotenv()
client = AsyncClient(timeout=30)
async def transcribe_file(audio: tuple[int, np.ndarray], language: str):
response = await client.post(
url="https://douatiz8x2itm3yn.us-east-1.aws.endpoints.huggingface.cloud/api/v1/audio/transcriptions",
headers={"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"},
files={"file": audio_to_bytes(audio)},
data={"response_format": "text", "language": language},
)
return response.text
async def transcribe(audio: tuple[int, np.ndarray], transcript: str, language: str):
text = await transcribe_file(audio, language)
yield AdditionalOutputs(transcript + " " + text)
transcript = gr.Textbox(label="Transcript")
stream = Stream(
ReplyOnPause(transcribe, input_sample_rate=48_100),
modality="audio",
mode="send",
additional_inputs=[transcript, gr.Dropdown(choices=LANGUAGES, label="Language")],
additional_outputs=[transcript],
additional_outputs_handler=lambda a, b: b,
rtc_configuration=get_turn_credentials_async,
server_rtc_configuration=get_turn_credentials(ttl=604_800),
concurrency_limit=20 if get_space() else None,
time_limit=300,
ui_args={"title": ""},
)
iface = gr.Interface(
fn=transcribe_file,
inputs=[gr.Audio(label="Upload Audio", sources=["upload", "microphone"]), gr.Dropdown(choices=LANGUAGES, label="Language")],
outputs=gr.Textbox(label="Transcript"),
)
with gr.Blocks() as demo:
gr.HTML(
"""
<h1 style='text-align: center; display: flex; align-items: center; justify-content: center;'>
<img src="/gradio_api/file=AV_Huggy.png" alt="Streaming Huggy" style="height: 50px; margin-right: 10px"> Really Fast Whisper
</h1>
"""
)
gr.HTML(
"""
<h2 style='text-align: center'>
Powered by <a href="https://huggingface.co/hfendpoints/whisper-large-v3">HF Inference Endpoints</a> and <a href="https://fastrtc.org/">FastRTC</a>
</h2>
"""
)
with gr.Tabs():
with gr.Tab("Streaming"):
gr.Markdown(
"Grant access to the microphone and speak naturally. The transcript will be updated as you pause."
)
stream.ui.render()
with gr.Tab("File Upload"):
iface.render()
if __name__ == "__main__":
demo.launch(allowed_paths=["AV_Huggy.png"])
|