Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 3,601 Bytes
b29e61c 1e5834c 5906e88 e1ef382 1e5834c 4d71280 b29e61c 1e5834c b3948c4 d2e72fa b3948c4 1e5834c b3948c4 d2e72fa b3948c4 1e5834c b29e61c 1e5834c e1ef382 1e5834c 2ffd15f 4d71280 c3277a4 1412907 1e5834c 1412907 1e5834c 03c7515 1e5834c d88a33d 1e5834c 1412907 bf18264 1e5834c 1412907 2d57870 1412907 e1ef382 2d57870 e1ef382 3a3b17f 1e5834c e1ef382 76d8e4f e1ef382 1e5834c e1ef382 bf7c926 1e5834c 3a3b17f bf18264 e1ef382 bf18264 3a3b17f bf18264 3a3b17f 2d57870 3a3b17f e1ef382 3a3b17f 76339ca |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
import gradio as gr
from huggingface_hub import hf_hub_download
from audio_index import AudioEmbeddingSystem
from search import search, get_prompt
import pandas as pd
import numpy as np
db_file = hf_hub_download(
repo_id="freddyaboulton/common-voice-english-audio",
filename="audio_db_full.sqlite",
repo_type="dataset",
)
index_file = hf_hub_download(
repo_id="freddyaboulton/common-voice-english-audio",
filename="audio_faiss_full.index",
repo_type="dataset",
)
audio_embedding_system = AudioEmbeddingSystem(db_path=db_file, index_path=index_file)
def audio_search(audio_tuple, prompt: str):
if audio_tuple is None:
return gr.skip()
sample_rate, array = audio_tuple
if array.dtype == np.int16:
array = array.astype(np.float32) / 32768.0
rows = audio_embedding_system.search((sample_rate, array))
least_similar = audio_embedding_system.search((sample_rate, array), least_similar=True)
rows += least_similar
orig_rows = search(rows)
for i, row in enumerate(rows):
path = row["path"]
for orig in orig_rows:
orig_row = orig["row"]
if orig_row["path"] == path:
row["sentence"] = orig_row["sentence"]
row["audio"] = [
"<audio src=" + orig_row["audio"][0]["src"] + " controls />"
]
df = pd.DataFrame(rows)[["path", "audio", "sentence", "distance"]].sort_values(
by="distance", ascending=True
)
# Define the styling function
def style_path_column(col):
n = len(col)
# Default empty styles
styles = [''] * n
for i in range(n):
# First 5 rows: green background with opacity
if i < 5:
styles[i] = 'background-color: rgba(0, 255, 0, 0.3)'
# Last 3 rows: red background with opacity
elif i >= 5:
styles[i] = 'background-color: rgba(255, 0, 0, 0.3)'
return styles
# Apply the styling to the 'path' column and return the Styler object
return df.style.apply(style_path_column, subset=['path'])
sample_text = gr.Textbox(
label="Suggested Prompt",
info="Unsure what to record? Use this prompt. Hit Enter to get a new one from the common voice dataset",
value=get_prompt(),
)
iface = gr.Interface(
fn=audio_search,
inputs=[gr.Audio(
label="Record or upload a clip of your voice", sources=["microphone", "upload"]
), sample_text],
outputs=gr.Dataframe(
show_label=False,
headers=["path", "audio", "sentence", "distance"],
datatype=["str", "html", "str", "number"],
),
)
with gr.Blocks() as demo:
gr.HTML(
f"""
<h1 style='text-align: center; display: flex; align-items: center; justify-content: center;'>
<img src="/gradio_api/file=Karaoke_Huggy.png" alt="Voice Match" style="height: 100px; margin-right: 10px"> Voice Match
</h1>
"""
)
gr.HTML(
"""
<h2 style='text-align: center'>
Powered by <a href="https://huggingface.co/rimelabs/rimecaster">RimeCaster</a>
</h2>
"""
)
gr.Markdown(
f"""
<div style='text-align: center'>
Record or upload an English clip of your voice and we'll find the most similar (and dissimilar) voices in the <a href="https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0">Common Voice dataset</a>.
</div>
"""
)
iface.render()
sample_text.submit(fn=get_prompt, inputs=None, outputs=sample_text)
demo.launch(allowed_paths=["Karaoke_Huggy.png"])
|