stt-4

Running

App Files Files Community

bcci commited on Feb 27

Commit

06526ee

verified ·

1 Parent(s): c8231f4

Update app.py

Browse files

Files changed (1) hide show

app.py +6 -1

app.py CHANGED Viewed

@@ -72,6 +72,7 @@ async def websocket_endpoint(websocket: WebSocket):
     recording = False
     last_partial_time = time.time()
     current_model = transcriber_tiny  # Default to tiny model
     try:
         while True:
@@ -88,6 +89,7 @@ async def websocket_endpoint(websocket: WebSocket):
             # Convert the 16-bit PCM data to float32.
             chunk = pcm16_to_float32(data["bytes"])
             speech = np.concatenate((speech, chunk))
             if not recording:
                 # Retain only the last few chunks when not recording.
                 speech = speech[-lookback_size:]
@@ -95,6 +97,7 @@ async def websocket_endpoint(websocket: WebSocket):
             # Process VAD on the current chunk.
             vad_result = vad_iterator(chunk)
             current_time = time.time()
             if vad_result:
                 # If VAD signals the start of speech and we're not already recording.
                 if "start" in vad_result and not recording:
@@ -125,7 +128,9 @@ async def websocket_endpoint(websocket: WebSocket):
                 # Send partial transcription updates periodically.
                 if (current_time - last_partial_time) > MIN_REFRESH_SECS:
                     text = current_model(speech)
-                    await websocket.send_json({"type": "partial", "transcript": text})
                     last_partial_time = current_time
     except WebSocketDisconnect:
         # If the client disconnects, send any final transcript if available.

     recording = False
     last_partial_time = time.time()
     current_model = transcriber_tiny  # Default to tiny model
+    last_output = ""
     try:
         while True:
             # Convert the 16-bit PCM data to float32.
             chunk = pcm16_to_float32(data["bytes"])
             speech = np.concatenate((speech, chunk))
             if not recording:
                 # Retain only the last few chunks when not recording.
                 speech = speech[-lookback_size:]
             # Process VAD on the current chunk.
             vad_result = vad_iterator(chunk)
             current_time = time.time()
             if vad_result:
                 # If VAD signals the start of speech and we're not already recording.
                 if "start" in vad_result and not recording:
                 # Send partial transcription updates periodically.
                 if (current_time - last_partial_time) > MIN_REFRESH_SECS:
                     text = current_model(speech)
+                    if last_output != text:
+                        last_output = text
+                        await websocket.send_json({"type": "partial", "transcript": text})
                     last_partial_time = current_time
     except WebSocketDisconnect:
         # If the client disconnects, send any final transcript if available.