File size: 8,842 Bytes
0ebb77f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
faed9d7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8"/>
  <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
  <title>Audio Transcription</title>
  <style>
    body {
      font-family: 'Inter', sans-serif;
      margin: 20px;
      text-align: center;
    }
    #recordButton {
      width: 80px;
      height: 80px;
      font-size: 36px;
      border: none;
      border-radius: 50%;
      background-color: white;
      cursor: pointer;
      box-shadow: 0 0px 10px rgba(0, 0, 0, 0.2);
      transition: background-color 0.3s ease, transform 0.2s ease;
    }
    #recordButton.recording {
      background-color: #ff4d4d;
      color: white;
    }
    #recordButton:active {
      transform: scale(0.95);
    }
    #status {
      margin-top: 20px;
      font-size: 16px;
      color: #333;
    }
    .settings-container {
      display: flex;
      justify-content: center;
      align-items: center;
      gap: 15px;
      margin-top: 20px;
    }
    .settings {
      display: flex;
      flex-direction: column;
      align-items: flex-start;
      gap: 5px;
    }
    #chunkSelector,
    #websocketInput {
      font-size: 16px;
      padding: 5px;
      border-radius: 5px;
      border: 1px solid #ddd;
      background-color: #f9f9f9;
    }
    #websocketInput {
      width: 200px;
    }
    #chunkSelector:focus,
    #websocketInput:focus {
      outline: none;
      border-color: #007bff;
    }
    label {
      font-size: 14px;
    }
    /* Speaker-labeled transcript area */
    #linesTranscript {
      margin: 20px auto;
      max-width: 600px;
      text-align: left;
      font-size: 16px;
    }
    #linesTranscript p {
      margin: 5px 0;
    }
    #linesTranscript strong {
      color: #333;
    }
    /* Grey buffer styling */
    .buffer {
      color: rgb(180, 180, 180);
      font-style: italic;
      margin-left: 4px;
    }
  </style>
</head>
<body>

  <div class="settings-container">
    <button id="recordButton">🎙️</button>
    <div class="settings">
      <div>
        <label for="chunkSelector">Chunk size (ms):</label>
        <select id="chunkSelector">
          <option value="500" selected>500 ms</option>
          <option value="1000">1000 ms</option>
          <option value="2000">2000 ms</option>
          <option value="3000">3000 ms</option>
          <option value="4000">4000 ms</option>
          <option value="5000">5000 ms</option>
        </select>
      </div>
      <div>
        <label for="websocketInput">WebSocket URL:</label>
        <input id="websocketInput" type="text" value="wss://atpeak-realtime-stt-translation.hf.space/asr" />
      </div>
    </div>
  </div>

  <p id="status"></p>

  <!-- Speaker-labeled transcript -->
  <div id="linesTranscript"></div>

  <script>
    let isRecording = false;
    let websocket = null;
    let recorder = null;
    let chunkDuration = 500;
    let websocketUrl = "ws://localhost:8000/asr";
    let userClosing = false;
    let lines = []; // フロント側でlinesを保持
    let audioQueue = []; // 再生待ちのaudio_urlを保持
    let isPlayingAudio = false; // オーディオ再生中かどうかを示すフラグ
    let currentAudio = null; // 現在再生中のオーディオを保持

    const statusText = document.getElementById("status");
    const recordButton = document.getElementById("recordButton");
    const chunkSelector = document.getElementById("chunkSelector");
    const websocketInput = document.getElementById("websocketInput");
    const linesTranscriptDiv = document.getElementById("linesTranscript");

    chunkSelector.addEventListener("change", () => {
      chunkDuration = parseInt(chunkSelector.value);
    });

    websocketInput.addEventListener("change", () => {
      const urlValue = websocketInput.value.trim();
      if (!urlValue.startsWith("ws://") && !urlValue.startsWith("wss://")) {
        statusText.textContent = "Invalid WebSocket URL (must start with ws:// or wss://)";
        return;
      }
      websocketUrl = urlValue;
      statusText.textContent = "WebSocket URL updated. Ready to connect.";
    });

    function setupWebSocket() {
      return new Promise((resolve, reject) => {
        try {
          websocket = new WebSocket(websocketUrl);
        } catch (error) {
          statusText.textContent = "Invalid WebSocket URL. Please check and try again.";
          reject(error);
          return;
        }

        websocket.onopen = () => {
          statusText.textContent = "Connected to server.";
          resolve();
        };

        websocket.onclose = () => {
          if (userClosing) {
            statusText.textContent = "WebSocket closed by user.";
          } else {
            statusText.textContent =
              "Disconnected from the WebSocket server. (Check logs if model is loading.)";
          }
          userClosing = false;
        };

        websocket.onerror = () => {
          statusText.textContent = "Error connecting to WebSocket.";
          reject(new Error("Error connecting to WebSocket"));
        };

        // Handle messages from server
        websocket.onmessage = (event) => {
          const data = JSON.parse(event.data);
          /*
            The server might send:
            {
              "line": {"speaker": 0, "text": "Hello."},
              "buffer": "...",
              "audio_url": "https://example.com/audio.wav"
            }
          */
          const { line = "", buffer = "" } = data;
          if (line) {
            lines.push(line); // 新しいlineをスタック
            renderLinesWithBuffer(lines, buffer);
          }
          if (line.audio_url) {
            audioQueue.push(line.audio_url);
            if (!isPlayingAudio) {
              playNextAudio();
            }
          }
        };
      });
    }

    function renderLinesWithBuffer(lines, buffer) {
      // Build the HTML
      // The buffer is appended to the last line if it's non-empty
      const linesHtml = lines.map((item, idx) => {
        let textContent = item.text;
        if (idx === lines.length - 1 && buffer) {
          textContent += `<span class="buffer">${buffer}</span>`;
        }
        return `<p><strong>Speaker ${item.speaker}:</strong> ${textContent}</p>`;
      }).join("");

      linesTranscriptDiv.innerHTML = linesHtml;
    }

    function playNextAudio() {
      if (audioQueue.length === 0) {
        isPlayingAudio = false;
        return;
      }
      isPlayingAudio = true;
      const url = audioQueue.shift(); // ここでshiftして次のオーディオを取得
      currentAudio = new Audio(url);
      currentAudio.onended = () => {
        playNextAudio(); // 再生が終了したときに次のオーディオを再生
      };
      currentAudio.onerror = (err) => {
        console.error("Error playing audio:", err);
        playNextAudio(); // エラーが発生した場合も次のオーディオを再生
      };
      currentAudio.play().catch(err => {
        console.error("Error playing audio:", err);
        playNextAudio(); // 再生エラーが発生した場合も次のオーディオを再生
      });
    }

    async function startRecording() {
      try {
        const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
        recorder = new MediaRecorder(stream, { mimeType: "audio/webm" });
        recorder.ondataavailable = (e) => {
          if (websocket && websocket.readyState === WebSocket.OPEN) {
            websocket.send(e.data);
          }
        };
        recorder.start(chunkDuration);
        isRecording = true;
        updateUI();
      } catch (err) {
        statusText.textContent = "Error accessing microphone. Please allow microphone access.";
      }
    }

    function stopRecording() {
      userClosing = true;
      if (recorder) {
        recorder.stop();
        recorder = null;
      }
      isRecording = false;

      if (websocket) {
        websocket.close();
        websocket = null;
      }

      if (currentAudio) {
        currentAudio.pause();
        currentAudio = null;
      }

      updateUI();
    }

    async function toggleRecording() {
      if (!isRecording) {
        linesTranscriptDiv.innerHTML = "";
        lines = []; // 録音開始時にlinesをクリア
        try {
          await setupWebSocket();
          await startRecording();
        } catch (err) {
          statusText.textContent = "Could not connect to WebSocket or access mic. Aborted.";
        }
      } else {
        stopRecording();
      }
    }

    function updateUI() {
      recordButton.classList.toggle("recording", isRecording);
      statusText.textContent = isRecording ? "Recording..." : "Click to start transcription";
    }

    recordButton.addEventListener("click", toggleRecording);
  </script>
</body>
</html>