nightey3s commited on
Commit
8cf5bd1
·
unverified ·
1 Parent(s): 984bc80

Fix compatability for ZeroGPU

Browse files
profanity_detector.py CHANGED
@@ -24,19 +24,12 @@ logging.basicConfig(
24
  )
25
  logger = logging.getLogger('profanity_detector')
26
 
27
- # ZeroGPU COMPATIBILITY NOTES:
28
- # The @spaces.GPU decorators throughout this code enable compatibility with Hugging Face ZeroGPU.
29
- # - They request GPU resources only when needed and release them after function completion
30
- # - They have no effect when running in local environments or standard GPU Spaces
31
- # - Custom durations can be specified for functions requiring longer processing times
32
- # - For local development, you'll need: pip install huggingface_hub[spaces]
33
-
34
  # Detect if we're running in a ZeroGPU environment
35
  IS_ZEROGPU = os.environ.get("SPACE_RUNTIME_STATELESS", "0") == "1"
36
 
37
  # Define device strategy that works in both environments
38
  if IS_ZEROGPU:
39
- # In ZeroGPU: initialize on CPU, will use GPU only in @spaces.GPU functions
40
  device = torch.device("cpu")
41
  logger.info("ZeroGPU environment detected. Using CPU for initial loading.")
42
  else:
@@ -44,10 +37,6 @@ else:
44
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
45
  logger.info(f"Local environment. Using device: {device}")
46
 
47
- # Define device at the top of the script (global scope)
48
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
49
- logger.info(f"Using device: {device}")
50
-
51
  # Global variables for models
52
  profanity_model = None
53
  profanity_tokenizer = None
@@ -77,79 +66,73 @@ def load_models():
77
  profanity_tokenizer = AutoTokenizer.from_pretrained(PROFANITY_MODEL)
78
 
79
  # Load model without moving to CUDA directly
80
- if IS_ZEROGPU:
81
- logger.info("ZeroGPU mode: Loading model without CUDA initialization")
82
- # For ZeroGPU, use device_map='auto' or just stay on CPU
83
- profanity_model = AutoModelForSequenceClassification.from_pretrained(
84
- PROFANITY_MODEL,
85
- device_map=None, # Explicitly stay on CPU
86
- low_cpu_mem_usage=True
87
- )
88
- else:
89
- # For local runs, normal loading with CUDA if available
90
- profanity_model = AutoModelForSequenceClassification.from_pretrained(PROFANITY_MODEL)
91
- if torch.cuda.is_available():
92
- profanity_model = profanity_model.to(device)
93
- try:
94
- profanity_model = profanity_model.half()
95
- logger.info("Successfully converted profanity model to half precision")
96
- except Exception as e:
97
- logger.warning(f"Could not convert to half precision: {str(e)}")
98
 
99
- # Apply similar changes to all other model loading...
100
  logger.info("Loading detoxification model...")
101
  T5_MODEL = "s-nlp/t5-paranmt-detox"
102
  t5_tokenizer = AutoTokenizer.from_pretrained(T5_MODEL)
103
 
104
- if IS_ZEROGPU:
105
- t5_model = AutoModelForSeq2SeqLM.from_pretrained(
106
- T5_MODEL,
107
- device_map=None,
108
- low_cpu_mem_usage=True
109
- )
110
- else:
111
- t5_model = AutoModelForSeq2SeqLM.from_pretrained(T5_MODEL)
112
- if torch.cuda.is_available():
113
- t5_model = t5_model.to(device)
114
- try:
115
- t5_model = t5_model.half()
116
- logger.info("Successfully converted T5 model to half precision")
117
- except Exception as e:
118
- logger.warning(f"Could not convert to half precision: {str(e)}")
119
 
120
  logger.info("Loading Whisper speech-to-text model...")
121
- if IS_ZEROGPU:
122
- # For ZeroGPU, stay on CPU in the main process
123
- whisper_model = whisper.load_model("medium", device="cpu")
124
- else:
125
- whisper_model = whisper.load_model("large")
126
- if torch.cuda.is_available():
127
- whisper_model = whisper_model.to(device)
128
 
129
  logger.info("Loading Text-to-Speech model...")
130
  TTS_MODEL = "microsoft/speecht5_tts"
131
  tts_processor = SpeechT5Processor.from_pretrained(TTS_MODEL)
132
 
133
- if IS_ZEROGPU:
134
- tts_model = SpeechT5ForTextToSpeech.from_pretrained(
135
- TTS_MODEL,
136
- device_map=None,
137
- low_cpu_mem_usage=True
138
- )
139
- vocoder = SpeechT5HifiGan.from_pretrained(
140
- "microsoft/speecht5_hifigan",
141
- device_map=None,
142
- low_cpu_mem_usage=True
143
- )
144
- else:
145
- tts_model = SpeechT5ForTextToSpeech.from_pretrained(TTS_MODEL)
146
- vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
147
- if torch.cuda.is_available():
148
- tts_model = tts_model.to(device)
149
- vocoder = vocoder.to(device)
150
 
151
  # Speaker embeddings - always on CPU for ZeroGPU
152
  speaker_embeddings = torch.zeros((1, 512))
 
153
  if not IS_ZEROGPU and torch.cuda.is_available():
154
  speaker_embeddings = speaker_embeddings.to(device)
155
 
@@ -182,8 +165,17 @@ def detect_profanity(text: str, threshold: float = 0.5):
182
  try:
183
  # Detect profanity and score
184
  inputs = profanity_tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
185
- if torch.cuda.is_available():
186
- inputs = inputs.to(device)
 
 
 
 
 
 
 
 
 
187
 
188
  with torch.no_grad():
189
  outputs = profanity_model(**inputs).logits
@@ -201,7 +193,7 @@ def detect_profanity(text: str, threshold: float = 0.5):
201
 
202
  word_inputs = profanity_tokenizer(word, return_tensors="pt", truncation=True, max_length=512)
203
  if torch.cuda.is_available():
204
- word_inputs = word_inputs.to(device)
205
 
206
  with torch.no_grad():
207
  word_outputs = profanity_model(**word_inputs).logits
@@ -211,6 +203,10 @@ def detect_profanity(text: str, threshold: float = 0.5):
211
  if word_score > threshold:
212
  profane_words.append(word.lower())
213
 
 
 
 
 
214
  # Create highlighted version of the text
215
  highlighted_text = create_highlighted_text(text, profane_words)
216
 
@@ -225,6 +221,12 @@ def detect_profanity(text: str, threshold: float = 0.5):
225
  except Exception as e:
226
  error_msg = f"Error in profanity detection: {str(e)}"
227
  logger.error(error_msg)
 
 
 
 
 
 
228
  return {"error": error_msg, "text": text, "score": 0, "profanity": False}
229
 
230
  def create_highlighted_text(text, profane_words):
@@ -255,8 +257,16 @@ def rephrase_profanity(text):
255
  try:
256
  # Rephrase using the detoxification model
257
  inputs = t5_tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
258
- if torch.cuda.is_available():
259
- inputs = inputs.to(device)
 
 
 
 
 
 
 
 
260
 
261
  # Use more conservative generation settings with error handling
262
  try:
@@ -275,6 +285,10 @@ def rephrase_profanity(text):
275
  logger.warning(f"T5 model produced unusable output: '{rephrased_text}'")
276
  return text # Return original if output is too short
277
 
 
 
 
 
278
  return rephrased_text.strip()
279
 
280
  except RuntimeError as e:
@@ -289,6 +303,11 @@ def rephrase_profanity(text):
289
  early_stopping=True
290
  )
291
  rephrased_text = t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
292
  return rephrased_text.strip()
293
  else:
294
  raise e # Re-raise if it's not a memory issue
@@ -296,6 +315,12 @@ def rephrase_profanity(text):
296
  except Exception as e:
297
  error_msg = f"Error in rephrasing: {str(e)}"
298
  logger.error(error_msg)
 
 
 
 
 
 
299
  return text # Return original text if rephrasing fails
300
 
301
  @spaces.GPU
@@ -312,19 +337,37 @@ def text_to_speech(text):
312
 
313
  # Process the text input
314
  inputs = tts_processor(text=text, return_tensors="pt")
315
- if torch.cuda.is_available():
316
- inputs = inputs.to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
317
 
318
  # Generate speech with a fixed speaker embedding
319
  speech = tts_model.generate_speech(
320
  inputs["input_ids"],
321
- speaker_embeddings,
322
  vocoder=vocoder
323
  )
324
 
325
  # Convert from PyTorch tensor to NumPy array
326
  speech_np = speech.cpu().numpy()
327
 
 
 
 
 
 
328
  # Save as WAV file (sampling rate is 16kHz for SpeechT5)
329
  write_wav(temp_file, 16000, speech_np)
330
 
@@ -332,6 +375,13 @@ def text_to_speech(text):
332
  except Exception as e:
333
  error_msg = f"Error in text-to-speech conversion: {str(e)}"
334
  logger.error(error_msg)
 
 
 
 
 
 
 
335
  return None
336
 
337
  def text_analysis(input_text, threshold=0.5):
@@ -402,10 +452,19 @@ def analyze_audio(audio_path, threshold=0.5):
402
  return "No audio provided.", None, None
403
 
404
  try:
 
 
 
 
 
405
  # Transcribe audio
406
  result = whisper_model.transcribe(audio_path, fp16=torch.cuda.is_available())
407
  text = result["text"]
408
 
 
 
 
 
409
  # Detect profanity with user-defined threshold
410
  analysis = detect_profanity(text, threshold=threshold)
411
 
@@ -432,6 +491,12 @@ def analyze_audio(audio_path, threshold=0.5):
432
  except Exception as e:
433
  error_msg = f"Error in audio analysis: {str(e)}\n{traceback.format_exc()}"
434
  logger.error(error_msg)
 
 
 
 
 
 
435
  return error_msg, None, None
436
 
437
  # Global variables to store streaming results
@@ -497,10 +562,19 @@ def process_stream_chunk(audio_chunk):
497
  stream_results["profanity_info"] = "Error: Failed to create audio file for processing"
498
  return stream_results["transcript"], stream_results["profanity_info"], stream_results["clean_text"], stream_results["audio_output"]
499
 
 
 
 
 
 
500
  # Process with Whisper
501
  result = whisper_model.transcribe(temp_file, fp16=torch.cuda.is_available())
502
  transcript = result["text"].strip()
503
 
 
 
 
 
504
  # Skip processing if transcript is empty
505
  if not transcript:
506
  # Clean up temp file if we created it
@@ -554,6 +628,17 @@ def process_stream_chunk(audio_chunk):
554
  error_msg = f"Error processing streaming audio: {str(e)}\n{traceback.format_exc()}"
555
  logger.error(error_msg)
556
 
 
 
 
 
 
 
 
 
 
 
 
557
  # Update profanity info with error message
558
  stream_results["profanity_info"] = f"Error: {str(e)}"
559
 
 
24
  )
25
  logger = logging.getLogger('profanity_detector')
26
 
 
 
 
 
 
 
 
27
  # Detect if we're running in a ZeroGPU environment
28
  IS_ZEROGPU = os.environ.get("SPACE_RUNTIME_STATELESS", "0") == "1"
29
 
30
  # Define device strategy that works in both environments
31
  if IS_ZEROGPU:
32
+ # In ZeroGPU: always initialize on CPU, will use GPU only in @spaces.GPU functions
33
  device = torch.device("cpu")
34
  logger.info("ZeroGPU environment detected. Using CPU for initial loading.")
35
  else:
 
37
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
38
  logger.info(f"Local environment. Using device: {device}")
39
 
 
 
 
 
40
  # Global variables for models
41
  profanity_model = None
42
  profanity_tokenizer = None
 
66
  profanity_tokenizer = AutoTokenizer.from_pretrained(PROFANITY_MODEL)
67
 
68
  # Load model without moving to CUDA directly
69
+ profanity_model = AutoModelForSequenceClassification.from_pretrained(
70
+ PROFANITY_MODEL,
71
+ device_map=None, # Stay on CPU for now
72
+ low_cpu_mem_usage=True
73
+ )
74
+
75
+ # Only move to device if NOT in ZeroGPU mode
76
+ if not IS_ZEROGPU and torch.cuda.is_available():
77
+ profanity_model = profanity_model.to(device)
78
+ try:
79
+ profanity_model = profanity_model.half()
80
+ logger.info("Successfully converted profanity model to half precision")
81
+ except Exception as e:
82
+ logger.warning(f"Could not convert to half precision: {str(e)}")
 
 
 
 
83
 
 
84
  logger.info("Loading detoxification model...")
85
  T5_MODEL = "s-nlp/t5-paranmt-detox"
86
  t5_tokenizer = AutoTokenizer.from_pretrained(T5_MODEL)
87
 
88
+ t5_model = AutoModelForSeq2SeqLM.from_pretrained(
89
+ T5_MODEL,
90
+ device_map=None, # Stay on CPU for now
91
+ low_cpu_mem_usage=True
92
+ )
93
+
94
+ # Only move to device if NOT in ZeroGPU mode
95
+ if not IS_ZEROGPU and torch.cuda.is_available():
96
+ t5_model = t5_model.to(device)
97
+ try:
98
+ t5_model = t5_model.half()
99
+ logger.info("Successfully converted T5 model to half precision")
100
+ except Exception as e:
101
+ logger.warning(f"Could not convert to half precision: {str(e)}")
 
102
 
103
  logger.info("Loading Whisper speech-to-text model...")
104
+ # Always load on CPU in ZeroGPU mode
105
+ #whisper_model = whisper.load_model("medium" if IS_ZEROGPU else "large", device="cpu")
106
+ whisper_model = whisper.load_model("large-v2", device="cpu")
107
+
108
+ # Only move to device if NOT in ZeroGPU mode
109
+ if not IS_ZEROGPU and torch.cuda.is_available():
110
+ whisper_model = whisper_model.to(device)
111
 
112
  logger.info("Loading Text-to-Speech model...")
113
  TTS_MODEL = "microsoft/speecht5_tts"
114
  tts_processor = SpeechT5Processor.from_pretrained(TTS_MODEL)
115
 
116
+ tts_model = SpeechT5ForTextToSpeech.from_pretrained(
117
+ TTS_MODEL,
118
+ device_map=None, # Stay on CPU for now
119
+ low_cpu_mem_usage=True
120
+ )
121
+
122
+ vocoder = SpeechT5HifiGan.from_pretrained(
123
+ "microsoft/speecht5_hifigan",
124
+ device_map=None, # Stay on CPU for now
125
+ low_cpu_mem_usage=True
126
+ )
127
+
128
+ # Only move to device if NOT in ZeroGPU mode
129
+ if not IS_ZEROGPU and torch.cuda.is_available():
130
+ tts_model = tts_model.to(device)
131
+ vocoder = vocoder.to(device)
 
132
 
133
  # Speaker embeddings - always on CPU for ZeroGPU
134
  speaker_embeddings = torch.zeros((1, 512))
135
+ # Only move to device if NOT in ZeroGPU mode
136
  if not IS_ZEROGPU and torch.cuda.is_available():
137
  speaker_embeddings = speaker_embeddings.to(device)
138
 
 
165
  try:
166
  # Detect profanity and score
167
  inputs = profanity_tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
168
+
169
+ # In ZeroGPU, move to GPU here inside the spaces.GPU function
170
+ # For local environments, it might already be on the correct device
171
+ current_device = device
172
+ if IS_ZEROGPU and torch.cuda.is_available():
173
+ current_device = torch.device("cuda")
174
+ inputs = inputs.to(current_device)
175
+ # Only in ZeroGPU mode, we need to move the model to GPU inside the function
176
+ profanity_model.to(current_device)
177
+ elif torch.cuda.is_available(): # Local environment with CUDA
178
+ inputs = inputs.to(current_device)
179
 
180
  with torch.no_grad():
181
  outputs = profanity_model(**inputs).logits
 
193
 
194
  word_inputs = profanity_tokenizer(word, return_tensors="pt", truncation=True, max_length=512)
195
  if torch.cuda.is_available():
196
+ word_inputs = word_inputs.to(current_device)
197
 
198
  with torch.no_grad():
199
  word_outputs = profanity_model(**word_inputs).logits
 
203
  if word_score > threshold:
204
  profane_words.append(word.lower())
205
 
206
+ # Move model back to CPU if in ZeroGPU mode - to free GPU memory
207
+ if IS_ZEROGPU and torch.cuda.is_available():
208
+ profanity_model.to(torch.device("cpu"))
209
+
210
  # Create highlighted version of the text
211
  highlighted_text = create_highlighted_text(text, profane_words)
212
 
 
221
  except Exception as e:
222
  error_msg = f"Error in profanity detection: {str(e)}"
223
  logger.error(error_msg)
224
+ # Make sure model is on CPU if in ZeroGPU mode - to free GPU memory
225
+ if IS_ZEROGPU and torch.cuda.is_available():
226
+ try:
227
+ profanity_model.to(torch.device("cpu"))
228
+ except:
229
+ pass
230
  return {"error": error_msg, "text": text, "score": 0, "profanity": False}
231
 
232
  def create_highlighted_text(text, profane_words):
 
257
  try:
258
  # Rephrase using the detoxification model
259
  inputs = t5_tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
260
+
261
+ # In ZeroGPU, move to GPU here inside the spaces.GPU function
262
+ current_device = device
263
+ if IS_ZEROGPU and torch.cuda.is_available():
264
+ current_device = torch.device("cuda")
265
+ inputs = inputs.to(current_device)
266
+ # Only in ZeroGPU mode, we need to move the model to GPU inside the function
267
+ t5_model.to(current_device)
268
+ elif torch.cuda.is_available(): # Local environment with CUDA
269
+ inputs = inputs.to(current_device)
270
 
271
  # Use more conservative generation settings with error handling
272
  try:
 
285
  logger.warning(f"T5 model produced unusable output: '{rephrased_text}'")
286
  return text # Return original if output is too short
287
 
288
+ # Move model back to CPU if in ZeroGPU mode - to free GPU memory
289
+ if IS_ZEROGPU and torch.cuda.is_available():
290
+ t5_model.to(torch.device("cpu"))
291
+
292
  return rephrased_text.strip()
293
 
294
  except RuntimeError as e:
 
303
  early_stopping=True
304
  )
305
  rephrased_text = t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
306
+
307
+ # Move model back to CPU if in ZeroGPU mode - to free GPU memory
308
+ if IS_ZEROGPU and torch.cuda.is_available():
309
+ t5_model.to(torch.device("cpu"))
310
+
311
  return rephrased_text.strip()
312
  else:
313
  raise e # Re-raise if it's not a memory issue
 
315
  except Exception as e:
316
  error_msg = f"Error in rephrasing: {str(e)}"
317
  logger.error(error_msg)
318
+ # Make sure model is on CPU if in ZeroGPU mode - to free GPU memory
319
+ if IS_ZEROGPU and torch.cuda.is_available():
320
+ try:
321
+ t5_model.to(torch.device("cpu"))
322
+ except:
323
+ pass
324
  return text # Return original text if rephrasing fails
325
 
326
  @spaces.GPU
 
337
 
338
  # Process the text input
339
  inputs = tts_processor(text=text, return_tensors="pt")
340
+
341
+ # In ZeroGPU, move to GPU here inside the spaces.GPU function
342
+ current_device = device
343
+ if IS_ZEROGPU and torch.cuda.is_available():
344
+ current_device = torch.device("cuda")
345
+ inputs = inputs.to(current_device)
346
+ # Only in ZeroGPU mode, we need to move the models to GPU inside the function
347
+ tts_model.to(current_device)
348
+ vocoder.to(current_device)
349
+ speaker_embeddings_local = speaker_embeddings.to(current_device)
350
+ elif torch.cuda.is_available(): # Local environment with CUDA
351
+ inputs = inputs.to(current_device)
352
+ speaker_embeddings_local = speaker_embeddings
353
+ else:
354
+ speaker_embeddings_local = speaker_embeddings
355
 
356
  # Generate speech with a fixed speaker embedding
357
  speech = tts_model.generate_speech(
358
  inputs["input_ids"],
359
+ speaker_embeddings_local,
360
  vocoder=vocoder
361
  )
362
 
363
  # Convert from PyTorch tensor to NumPy array
364
  speech_np = speech.cpu().numpy()
365
 
366
+ # Move models back to CPU if in ZeroGPU mode - to free GPU memory
367
+ if IS_ZEROGPU and torch.cuda.is_available():
368
+ tts_model.to(torch.device("cpu"))
369
+ vocoder.to(torch.device("cpu"))
370
+
371
  # Save as WAV file (sampling rate is 16kHz for SpeechT5)
372
  write_wav(temp_file, 16000, speech_np)
373
 
 
375
  except Exception as e:
376
  error_msg = f"Error in text-to-speech conversion: {str(e)}"
377
  logger.error(error_msg)
378
+ # Make sure models are on CPU if in ZeroGPU mode - to free GPU memory
379
+ if IS_ZEROGPU and torch.cuda.is_available():
380
+ try:
381
+ tts_model.to(torch.device("cpu"))
382
+ vocoder.to(torch.device("cpu"))
383
+ except:
384
+ pass
385
  return None
386
 
387
  def text_analysis(input_text, threshold=0.5):
 
452
  return "No audio provided.", None, None
453
 
454
  try:
455
+ # In ZeroGPU mode, models need to be moved to GPU
456
+ if IS_ZEROGPU and torch.cuda.is_available():
457
+ current_device = torch.device("cuda")
458
+ whisper_model.to(current_device)
459
+
460
  # Transcribe audio
461
  result = whisper_model.transcribe(audio_path, fp16=torch.cuda.is_available())
462
  text = result["text"]
463
 
464
+ # Move whisper model back to CPU if in ZeroGPU mode
465
+ if IS_ZEROGPU and torch.cuda.is_available():
466
+ whisper_model.to(torch.device("cpu"))
467
+
468
  # Detect profanity with user-defined threshold
469
  analysis = detect_profanity(text, threshold=threshold)
470
 
 
491
  except Exception as e:
492
  error_msg = f"Error in audio analysis: {str(e)}\n{traceback.format_exc()}"
493
  logger.error(error_msg)
494
+ # Make sure models are on CPU if in ZeroGPU mode
495
+ if IS_ZEROGPU and torch.cuda.is_available():
496
+ try:
497
+ whisper_model.to(torch.device("cpu"))
498
+ except:
499
+ pass
500
  return error_msg, None, None
501
 
502
  # Global variables to store streaming results
 
562
  stream_results["profanity_info"] = "Error: Failed to create audio file for processing"
563
  return stream_results["transcript"], stream_results["profanity_info"], stream_results["clean_text"], stream_results["audio_output"]
564
 
565
+ # In ZeroGPU mode, move whisper model to GPU
566
+ if IS_ZEROGPU and torch.cuda.is_available():
567
+ current_device = torch.device("cuda")
568
+ whisper_model.to(current_device)
569
+
570
  # Process with Whisper
571
  result = whisper_model.transcribe(temp_file, fp16=torch.cuda.is_available())
572
  transcript = result["text"].strip()
573
 
574
+ # Move whisper model back to CPU if in ZeroGPU mode
575
+ if IS_ZEROGPU and torch.cuda.is_available():
576
+ whisper_model.to(torch.device("cpu"))
577
+
578
  # Skip processing if transcript is empty
579
  if not transcript:
580
  # Clean up temp file if we created it
 
628
  error_msg = f"Error processing streaming audio: {str(e)}\n{traceback.format_exc()}"
629
  logger.error(error_msg)
630
 
631
+ # Make sure all models are on CPU if in ZeroGPU mode
632
+ if IS_ZEROGPU and torch.cuda.is_available():
633
+ try:
634
+ whisper_model.to(torch.device("cpu"))
635
+ profanity_model.to(torch.device("cpu"))
636
+ t5_model.to(torch.device("cpu"))
637
+ tts_model.to(torch.device("cpu"))
638
+ vocoder.to(torch.device("cpu"))
639
+ except:
640
+ pass
641
+
642
  # Update profanity info with error message
643
  stream_results["profanity_info"] = f"Error: {str(e)}"
644
 
requirements.txt CHANGED
@@ -7,4 +7,5 @@ torch
7
  transformers
8
  pillow
9
  sentencepiece
10
- spaces
 
 
7
  transformers
8
  pillow
9
  sentencepiece
10
+ spaces
11
+ accelerate
temp_tts_output_1742102180.wav ADDED
Binary file (217 kB). View file