preston-cell commited on
Commit
1154d5a
·
verified ·
1 Parent(s): 8c3caa4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -47,9 +47,9 @@ doge_generation_config = GenerationConfig(
47
  repetition_penalty=1.0
48
  )
49
 
50
- # Load speaker embedding for SpeechT5
51
  embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
52
- speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
53
 
54
 
55
  def process_image(image):
@@ -113,4 +113,4 @@ iface = gr.Interface(
113
  description="Upload an image to generate a caption, extract text (OCR), generate context using Doge, and turn it into speech using SpeechT5."
114
  )
115
 
116
- iface.launch()
 
47
  repetition_penalty=1.0
48
  )
49
 
50
+ # Load and trim speaker embedding for SpeechT5 (must be size 600)
51
  embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
52
+ speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)[:, :600]
53
 
54
 
55
  def process_image(image):
 
113
  description="Upload an image to generate a caption, extract text (OCR), generate context using Doge, and turn it into speech using SpeechT5."
114
  )
115
 
116
+ iface.launch()