Steveeeeeeen HF Staff commited on
Commit
77dbc9a
·
verified ·
1 Parent(s): 1e991ac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -21
app.py CHANGED
@@ -46,12 +46,15 @@ def transcribe(
46
 
47
  tf_input = [d for d in transformers_chat]
48
 
 
49
  output = pipe(
50
  {"audio": audio_sr, "turns": tf_input, "sampling_rate": target_sr},
51
  max_new_tokens=512,
52
  )
 
53
  transcription = whisper({"array": audio_sr.squeeze(), "sampling_rate": target_sr})
54
 
 
55
  conversation.append({"role": "user", "content": transcription["text"]})
56
  conversation.append({"role": "assistant", "content": output})
57
  transformers_chat.append({"role": "user", "content": transcription["text"]})
@@ -60,32 +63,61 @@ def transcribe(
60
  yield AdditionalOutputs(transformers_chat, conversation)
61
 
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  with gr.Blocks() as demo:
64
  gr.HTML(
65
  """
66
- <h1 style='text-align: center'>
67
- Talk to Smolvox Smollm2(Powered by WebRTC ⚡️)
68
- </h1>
69
- <p style='text-align: center'>
70
- Once you grant access to your microphone, you can talk naturally to Ultravox.
71
- When you stop talking, the audio will be sent for processing.
72
- </p>
73
- <p style='text-align: center'>
74
- Each conversation is limited to 90 seconds. Once the time limit is up you can rejoin the conversation.
75
- </p>
76
- """
77
  )
 
 
 
 
 
 
 
 
 
 
78
  with gr.Row():
79
- transformers_chat = gr.State(
80
- value=[
81
- {
82
- "role": "system",
83
- "content": "You are a friendly and helpful character. You love to answer questions for people.",
84
- }
85
- ]
86
- )
87
- with gr.Group():
88
- transcript = gr.Chatbot(label="transcript", type="messages")
89
  audio = WebRTC(
90
  rtc_configuration=rtc_configuration,
91
  label="Stream",
@@ -93,6 +125,7 @@ with gr.Blocks() as demo:
93
  modality="audio",
94
  )
95
 
 
96
  audio.stream(
97
  ReplyOnPause(transcribe),
98
  inputs=[audio, transformers_chat, transcript],
@@ -106,5 +139,14 @@ with gr.Blocks() as demo:
106
  show_progress="hidden",
107
  )
108
 
 
 
 
 
 
 
 
 
 
109
  if __name__ == "__main__":
110
  demo.launch()
 
46
 
47
  tf_input = [d for d in transformers_chat]
48
 
49
+ # Generate response from the pipeline using the audio input
50
  output = pipe(
51
  {"audio": audio_sr, "turns": tf_input, "sampling_rate": target_sr},
52
  max_new_tokens=512,
53
  )
54
+ # Transcribe the audio using Whisper
55
  transcription = whisper({"array": audio_sr.squeeze(), "sampling_rate": target_sr})
56
 
57
+ # Update both conversation histories
58
  conversation.append({"role": "user", "content": transcription["text"]})
59
  conversation.append({"role": "assistant", "content": output})
60
  transformers_chat.append({"role": "user", "content": transcription["text"]})
 
63
  yield AdditionalOutputs(transformers_chat, conversation)
64
 
65
 
66
+ def respond_text(
67
+ user_text: str,
68
+ transformers_chat: list[dict],
69
+ conversation: list[dict],
70
+ ):
71
+ if not user_text.strip():
72
+ # Do nothing if the textbox is empty
73
+ return transformers_chat, conversation
74
+
75
+ # Append the user message from the textbox
76
+ conversation.append({"role": "user", "content": user_text})
77
+ transformers_chat.append({"role": "user", "content": user_text})
78
+
79
+ # Generate a response using the pipeline.
80
+ # Here we assume the pipeline can also process text input via the "text" key.
81
+ output = pipe({"text": user_text, "turns": transformers_chat}, max_new_tokens=512)
82
+
83
+ conversation.append({"role": "assistant", "content": output})
84
+ transformers_chat.append({"role": "assistant", "content": output})
85
+ return transformers_chat, conversation
86
+
87
+
88
  with gr.Blocks() as demo:
89
  gr.HTML(
90
  """
91
+ <h1 style='text-align: center'>
92
+ Talk to Smolvox Smollm2 (Powered by WebRTC ⚡️)
93
+ </h1>
94
+ <p style='text-align: center'>
95
+ Once you grant access to your microphone, you can talk naturally to Ultravox.
96
+ When you stop talking, the audio will be sent for processing.
97
+ </p>
98
+ <p style='text-align: center'>
99
+ Each conversation is limited to 90 seconds. Once the time limit is up you can rejoin the conversation.
100
+ </p>
101
+ """
102
  )
103
+ # Shared conversation state
104
+ transformers_chat = gr.State(
105
+ value=[
106
+ {
107
+ "role": "system",
108
+ "content": "You are a friendly and helpful character. You love to answer questions for people.",
109
+ }
110
+ ]
111
+ )
112
+
113
  with gr.Row():
114
+ with gr.Column(scale=1):
115
+ transcript = gr.Chatbot(label="Transcript", type="messages")
116
+ text_input = gr.Textbox(
117
+ placeholder="Type your message here...", label="Your Message"
118
+ )
119
+ send_button = gr.Button("Send")
120
+ with gr.Column(scale=1):
 
 
 
121
  audio = WebRTC(
122
  rtc_configuration=rtc_configuration,
123
  label="Stream",
 
125
  modality="audio",
126
  )
127
 
128
+ # Audio stream: when you stop speaking, process the audio input.
129
  audio.stream(
130
  ReplyOnPause(transcribe),
131
  inputs=[audio, transformers_chat, transcript],
 
139
  show_progress="hidden",
140
  )
141
 
142
+ # Text input: when you click "Send", process the typed message.
143
+ send_button.click(
144
+ respond_text,
145
+ inputs=[text_input, transformers_chat, transcript],
146
+ outputs=[transformers_chat, transcript],
147
+ )
148
+ # Optionally clear the text box after sending:
149
+ send_button.click(lambda: "", inputs=[], outputs=[text_input])
150
+
151
  if __name__ == "__main__":
152
  demo.launch()