Spaces:

sofdog
/

live-transcription-docker

Sleeping

App Files Files Community

Sofia Casadei commited on 5 days ago

Commit

437ed2e

1 Parent(s): 7338a56

fix: use hf-cloudflare turn server

Browse files

Files changed (1) hide show

main.py +4 -14

main.py CHANGED Viewed

@@ -29,8 +29,6 @@ from transformers.utils import is_flash_attn_2_available
 from utils.logger_config import setup_logging
 from utils.device import get_device, get_torch_and_np_dtypes
-from utils.turn_server import get_credential_function, get_rtc_credentials
 load_dotenv()
 setup_logging()
@@ -40,7 +38,6 @@ logger = logging.getLogger(__name__)
 UI_MODE = os.getenv("UI_MODE", "fastapi").lower() # gradio | fastapi
 UI_TYPE = os.getenv("UI_TYPE", "base").lower() # base | screen
 APP_MODE = os.getenv("APP_MODE", "local").lower() # local | deployed
-TURN_SERVER_PROVIDER = os.getenv("TURN_SERVER_PROVIDER", "hf-cloudflare").lower() # hf-cloudflare | cloudflare | hf | twilio
 MODEL_ID = os.getenv("MODEL_ID", "openai/whisper-large-v3-turbo")
 LANGUAGE = os.getenv("LANGUAGE", "english").lower()
@@ -48,7 +45,6 @@ device = get_device(force_cpu=False)
 torch_dtype, np_dtype = get_torch_and_np_dtypes(device, use_bfloat16=False)
 logger.info(f"Using device: {device}, torch_dtype: {torch_dtype}, np_dtype: {np_dtype}")
 attention = "flash_attention_2" if is_flash_attn_2_available() else "sdpa"
 logger.info(f"Using attention: {attention}")
@@ -87,7 +83,6 @@ warmup_audio = np.zeros((16000,), dtype=np_dtype)  # 1s of silence
 transcribe_pipeline(warmup_audio)
 logger.info("Model warmup complete")
 async def transcribe(audio: tuple[int, np.ndarray]):
     sample_rate, audio_array = audio
     logger.info(f"Sample rate: {sample_rate}Hz, Shape: {audio_array.shape}")
@@ -104,11 +99,6 @@ async def transcribe(audio: tuple[int, np.ndarray]):
     )
     yield AdditionalOutputs(outputs["text"].strip())
-async def get_credentials():
-    return await get_cloudflare_turn_credentials_async(hf_token=os.getenv("HF_TOKEN"))
-server_credentials = get_cloudflare_turn_credentials(ttl=360_000) if APP_MODE == "deployed" else None
 logger.info("Initializing FastRTC stream")
 stream = Stream(
     handler=ReplyOnPause(
@@ -146,8 +136,7 @@ stream = Stream(
         gr.Textbox(label="Transcript"),
     ],
     additional_outputs_handler=lambda current, new: current + " " + new,
-    rtc_configuration=get_credentials,
-    server_rtc_configuration=server_credentials,
     concurrency_limit=6
 )
@@ -162,8 +151,9 @@ async def index():
     elif UI_TYPE == "screen":
         html_content = open("static/index-screen.html").read()
-    # Use the same server credentials for the client
-    return HTMLResponse(content=html_content.replace("__RTC_CONFIGURATION__", json.dumps(server_credentials)))
 @app.get("/transcript")
 def _(webrtc_id: str):

 from utils.logger_config import setup_logging
 from utils.device import get_device, get_torch_and_np_dtypes
 load_dotenv()
 setup_logging()
 UI_MODE = os.getenv("UI_MODE", "fastapi").lower() # gradio | fastapi
 UI_TYPE = os.getenv("UI_TYPE", "base").lower() # base | screen
 APP_MODE = os.getenv("APP_MODE", "local").lower() # local | deployed
 MODEL_ID = os.getenv("MODEL_ID", "openai/whisper-large-v3-turbo")
 LANGUAGE = os.getenv("LANGUAGE", "english").lower()
 torch_dtype, np_dtype = get_torch_and_np_dtypes(device, use_bfloat16=False)
 logger.info(f"Using device: {device}, torch_dtype: {torch_dtype}, np_dtype: {np_dtype}")
 attention = "flash_attention_2" if is_flash_attn_2_available() else "sdpa"
 logger.info(f"Using attention: {attention}")
 transcribe_pipeline(warmup_audio)
 logger.info("Model warmup complete")
 async def transcribe(audio: tuple[int, np.ndarray]):
     sample_rate, audio_array = audio
     logger.info(f"Sample rate: {sample_rate}Hz, Shape: {audio_array.shape}")
     )
     yield AdditionalOutputs(outputs["text"].strip())
 logger.info("Initializing FastRTC stream")
 stream = Stream(
     handler=ReplyOnPause(
         gr.Textbox(label="Transcript"),
     ],
     additional_outputs_handler=lambda current, new: current + " " + new,
+    rtc_configuration=get_cloudflare_turn_credentials_async(hf_token=os.getenv("HF_TOKEN")) if APP_MODE == "deployed" else None,
     concurrency_limit=6
 )
     elif UI_TYPE == "screen":
         html_content = open("static/index-screen.html").read()
+    rtc_configuration = get_cloudflare_turn_credentials_async(hf_token=os.getenv("HF_TOKEN")) if APP_MODE == "deployed" else None
+    html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_configuration))
+    return HTMLResponse(content=html_content)
 @app.get("/transcript")
 def _(webrtc_id: str):