awacke1 commited on
Commit
7be7ac1
·
verified ·
1 Parent(s): d437815

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -0
app.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import websockets
3
+ import streamlit as st
4
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer
5
+ import numpy as np
6
+ import torch
7
+ import soundfile as sf
8
+ import io
9
+
10
+ # Load pre-trained model and tokenizer
11
+ tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-base-960h")
12
+ model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
13
+
14
+ async def recognize_speech(websocket):
15
+ async for message in websocket:
16
+ wf, samplerate = sf.read(io.BytesIO(message))
17
+ input_values = tokenizer(wf, return_tensors="pt").input_values
18
+ with torch.no_grad():
19
+ logits = model(input_values).logits
20
+
21
+ predicted_ids = torch.argmax(logits, dim=-1)
22
+ transcription = tokenizer.decode(predicted_ids[0])
23
+ await websocket.send(transcription)
24
+
25
+ async def main_logic():
26
+ async with websockets.serve(recognize_speech, "localhost", 8000):
27
+ await asyncio.Future() # run forever
28
+
29
+ # Create the streamlit interface
30
+ st.title("Real-Time ASR with Transformers.js")
31
+
32
+ # The script can't be run via "streamlit run" because that hangs asyncio loop
33
+ st.markdown("""
34
+ <script>
35
+ const handleAudio = async (stream) => {
36
+ const websocket = new WebSocket('ws://localhost:8000');
37
+ const mediaRecorder = new MediaRecorder(stream, {mimeType: 'audio/webm'});
38
+ const audioChunks = [];
39
+
40
+ mediaRecorder.addEventListener("dataavailable", event => {
41
+ console.log('dataavailable:', event.data);
42
+ audioChunks.push(event.data);
43
+ websocket.send(event.data);
44
+ });
45
+
46
+ websocket.onmessage = (event) => {
47
+ const transcription = event.data;
48
+ const transcriptionDiv = document.getElementById("transcription");
49
+ transcriptionDiv.innerHTML = transcriptionDiv.innerHTML + transcription + "<br/>";
50
+ console.log('Received:', transcription);
51
+ };
52
+
53
+ mediaRecorder.start(1000);
54
+
55
+ websocket.onopen = () => {
56
+ console.log('Connected to WebSocket');
57
+ };
58
+
59
+ websocket.onerror = (error) => {
60
+ console.error('WebSocket Error:', error);
61
+ };
62
+
63
+ websocket.onclose = () => {
64
+ console.log('WebSocket Closed');
65
+ };
66
+ };
67
+
68
+ navigator.mediaDevices.getUserMedia({ audio: true })
69
+ .then(handleAudio)
70
+ .catch(error => console.error('getUserMedia Error:', error));
71
+ </script>
72
+
73
+ <div id="transcription">Your transcriptions will appear here:</div>
74
+ """, unsafe_allow_html=True)