Spaces:
Sleeping
Sleeping
File size: 6,783 Bytes
3c8c320 4497e1b f35f09f 3c8c320 f35f09f 3c8c320 4497e1b f35f09f 3c8c320 4497e1b 06efaec a5451af 3c8c320 cc08a1c 3c8c320 f35f09f cc08a1c f35f09f cc08a1c f35f09f 3c8c320 f35f09f 3c8c320 4497e1b f35f09f 4497e1b f35f09f 4497e1b cc08a1c f35f09f 1c933d8 f35f09f 4497e1b f35f09f a2da443 f35f09f cc08a1c a2da443 cc08a1c f35f09f a2da443 cc08a1c 1c933d8 3c8c320 cc08a1c f35f09f cc08a1c 4497e1b f35f09f cc08a1c 3c8c320 a2da443 f35f09f cc08a1c f35f09f 3c8c320 cc08a1c 3c8c320 f35f09f cc08a1c f35f09f cc08a1c f35f09f cc08a1c f35f09f 3c8c320 f35f09f 3c8c320 f35f09f 3c8c320 f35f09f 3c8c320 f35f09f cc08a1c f35f09f cc08a1c f35f09f 3c8c320 f35f09f cc08a1c f35f09f 3c8c320 f35f09f cc08a1c 3c8c320 f35f09f cc08a1c 3c8c320 f35f09f 3c8c320 f35f09f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 |
import os
import uuid
import json
import numpy as np
import gradio as gr
import soundfile as sf
import xxhash
from huggingface_hub import upload_file, HfApi
from dotenv import load_dotenv
from datasets import Audio
# Load environment variables
load_dotenv()
# Ensure the outputs directory exists
os.makedirs("outputs", exist_ok=True)
# Initialize Hugging Face API client
hf_api = HfApi(token=os.getenv("HF_TOKEN"))
DATASET_REPO = "alisartazkhan/audioLLM_judge"
CATEGORY = "pilot_tempo_control7"
MAX_RECORDINGS = 5 # Number of prompts to record
COMPLETION_CODE = "CEO4RWQ6"
resampler = Audio(sampling_rate=16_000)
# ====== MODIFY THIS SECTION TO CHANGE INSTRUCTIONS AND PROMPT ======
# Instructions for the user
USER_INSTRUCTIONS = """
## Recording Instructions:
Please record yourself reading your instruction clearly and naturally, speaking into the microphone in a quiet environment.
"""
# The prompt that users will record
RECORDING_PROMPT = ""
# ================================================================
# Create a JSON database to track uploads
class UploadTracker:
def __init__(self, filename="recording_tracker.json"):
self.filename = filename
self.data = []
# Create file if it doesn't exist
if not os.path.exists(filename):
with open(filename, "w") as f:
json.dump([], f)
else:
# Load existing data
with open(filename, "r") as f:
self.data = json.load(f)
def add_recording(self, audio_hash, filename):
"""Add a record of an uploaded recording"""
record = {
"prompt": RECORDING_PROMPT,
"audio_hash": audio_hash,
"filename": filename,
"timestamp": str(uuid.uuid4())
}
self.data.append(record)
# Save to file
with open(self.filename, "w") as f:
json.dump(self.data, f, indent=2)
# Upload tracker file to HF
self.upload_tracker()
return record
def upload_tracker(self):
"""Upload the tracker JSON to Hugging Face"""
try:
upload_file(
path_or_fileobj=self.filename,
path_in_repo=f"{CATEGORY}/{self.filename}",
repo_id=DATASET_REPO,
repo_type="dataset",
token=os.getenv("HF_TOKEN")
)
print(f"Uploaded tracker file to Hugging Face")
return True
except Exception as e:
print(f"Error uploading tracker file: {e}")
return False
# Initialize the tracker
tracker = UploadTracker()
def upload_to_hf(local_path, repo_path):
"""Upload a file to the Hugging Face dataset repository"""
try:
upload_file(
path_or_fileobj=local_path,
path_in_repo=repo_path,
repo_id=DATASET_REPO,
repo_type="dataset",
token=os.getenv("HF_TOKEN")
)
print(f"Uploaded file: {local_path} to Hugging Face at {repo_path}")
return True
except Exception as e:
print(f"Error uploading file to HF: {e}")
return False
def on_submit(audio_input, recording_count):
"""Handle the submission of a recorded audio prompt"""
if audio_input is None:
return next_prompt(recording_count)
# Process the audio
sr, y = audio_input
audio_hash = xxhash.xxh32(bytes(y)).hexdigest()
y = y.astype(np.float32)
y /= np.max(np.abs(y)) if np.max(np.abs(y)) > 0 else 1.0
unique_id = str(uuid.uuid4())[:8]
clean_prompt = RECORDING_PROMPT.replace(" ", "_").replace(".", "").replace(",", "")[:20]
local_filename = f"outputs/{clean_prompt}_{audio_hash}_{unique_id}.wav"
sf.write(local_filename, y, sr, format="wav")
hf_path = f"{CATEGORY}/{clean_prompt}_{audio_hash}_{unique_id}.wav"
upload_to_hf(local_filename, hf_path)
tracker.add_recording(audio_hash, hf_path)
# After successful upload, immediately move to the next prompt
return next_prompt(recording_count)
def next_prompt(recording_count):
"""Move to the next prompt"""
recording_count += 1
# Check if we've gone through all prompts
if recording_count >= MAX_RECORDINGS:
return (
gr.Markdown(f"# All recordings complete! Completion code: {COMPLETION_CODE}"),
gr.Markdown("## Thank you for your participation."),
gr.Markdown("### You have completed all recordings."),
gr.Audio(visible=False),
gr.Button(visible=False),
gr.Button(visible=False),
recording_count
)
# Display the next recording screen
return (
gr.Markdown(f"# Recording {recording_count + 1}/{MAX_RECORDINGS}"),
gr.Markdown(USER_INSTRUCTIONS),
gr.Markdown(f"### \"{RECORDING_PROMPT}\""),
gr.Audio(value=None, label="Record your response", sources=["microphone"]),
gr.Button("Submit Recording", interactive=False),
gr.Button("Next Recording", visible=False),
recording_count
)
def enable_submit_button(audio_input):
"""Enable the submit button when audio is recorded"""
if audio_input is not None:
return gr.Button("Submit Recording", interactive=True)
return gr.Button("Submit Recording", interactive=False)
# Create a theme
theme = gr.themes.Soft(
primary_hue="blue",
secondary_hue="indigo",
neutral_hue="slate",
)
# Create Gradio interface
with gr.Blocks(theme=theme, css="footer {visibility: hidden}") as demo:
recording_count = gr.State(0)
title = gr.Markdown(f"# Recording 1/{MAX_RECORDINGS}")
instructions = gr.Markdown(USER_INSTRUCTIONS)
prompt_text = gr.Markdown(f"### \"{RECORDING_PROMPT}\"")
audio_input = gr.Audio(
label="Record your response",
sources=["microphone"],
streaming=False
)
with gr.Row():
submit_btn = gr.Button("Submit Recording", interactive=False)
next_btn = gr.Button("Next Recording", visible=False)
# Enable submit button when audio is recorded
audio_input.change(
fn=enable_submit_button,
inputs=[audio_input],
outputs=[submit_btn]
)
# Handle submission
submit_btn.click(
fn=on_submit,
inputs=[audio_input, recording_count],
outputs=[title, instructions, prompt_text, audio_input, submit_btn, next_btn, recording_count]
)
# Handle next button
next_btn.click(
fn=next_prompt,
inputs=[recording_count],
outputs=[title, instructions, prompt_text, audio_input, submit_btn, next_btn, recording_count]
)
# Launch the app
if __name__ == "__main__":
demo.launch(share=True) |