Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -47,9 +47,9 @@ doge_generation_config = GenerationConfig(
|
|
47 |
repetition_penalty=1.0
|
48 |
)
|
49 |
|
50 |
-
# Load speaker embedding for SpeechT5
|
51 |
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
|
52 |
-
speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
|
53 |
|
54 |
|
55 |
def process_image(image):
|
@@ -113,4 +113,4 @@ iface = gr.Interface(
|
|
113 |
description="Upload an image to generate a caption, extract text (OCR), generate context using Doge, and turn it into speech using SpeechT5."
|
114 |
)
|
115 |
|
116 |
-
iface.launch()
|
|
|
47 |
repetition_penalty=1.0
|
48 |
)
|
49 |
|
50 |
+
# Load and trim speaker embedding for SpeechT5 (must be size 600)
|
51 |
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
|
52 |
+
speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)[:, :600]
|
53 |
|
54 |
|
55 |
def process_image(image):
|
|
|
113 |
description="Upload an image to generate a caption, extract text (OCR), generate context using Doge, and turn it into speech using SpeechT5."
|
114 |
)
|
115 |
|
116 |
+
iface.launch()
|