Add sampling_rate arg to scorewaveforms
Browse files
app.py
CHANGED
@@ -70,7 +70,7 @@ def texttoaudio(prompt, neg_prompt, seed, inf_steps, guidance_scale, n_candidate
|
|
70 |
return (16000, waveform)
|
71 |
|
72 |
def score_waveforms(text, waveforms):
|
73 |
-
inputs = processor(text=text, audios=list(waveforms), return_tensors="pt", padding=True)
|
74 |
inputs = {key: inputs[key].to(device) for key in inputs}
|
75 |
with torch.no_grad():
|
76 |
logits_per_text = clap_model(**inputs).logits_per_text # this is the audio-text similarity score
|
|
|
70 |
return (16000, waveform)
|
71 |
|
72 |
def score_waveforms(text, waveforms):
|
73 |
+
inputs = processor(text=text, audios=list(waveforms), return_tensors="pt", padding=True, sampling_rate=16000)
|
74 |
inputs = {key: inputs[key].to(device) for key in inputs}
|
75 |
with torch.no_grad():
|
76 |
logits_per_text = clap_model(**inputs).logits_per_text # this is the audio-text similarity score
|