Spaces:

alisartazkhan
/

tempo_control2

Running

App Files Files Community

tempo_control2 / talk_arena /audio_collection.py

alisartazkhan

Update talk_arena/audio_collection.py

06efaec verified 7 days ago

raw

history blame contribute delete

6.78 kB

	import os
	import uuid
	import json
	import numpy as np
	import gradio as gr
	import soundfile as sf
	import xxhash
	from huggingface_hub import upload_file, HfApi
	from dotenv import load_dotenv
	from datasets import Audio

	# Load environment variables
	load_dotenv()

	# Ensure the outputs directory exists
	os.makedirs("outputs", exist_ok=True)

	# Initialize Hugging Face API client
	hf_api = HfApi(token=os.getenv("HF_TOKEN"))
	DATASET_REPO = "alisartazkhan/audioLLM_judge"
	CATEGORY = "pilot_tempo_control7"
	MAX_RECORDINGS = 5 # Number of prompts to record
	COMPLETION_CODE = "CEO4RWQ6"
	resampler = Audio(sampling_rate=16_000)

	# ====== MODIFY THIS SECTION TO CHANGE INSTRUCTIONS AND PROMPT ======
	# Instructions for the user
	USER_INSTRUCTIONS = """
	## Recording Instructions:
	Please record yourself reading your instruction clearly and naturally, speaking into the microphone in a quiet environment.
	"""

	# The prompt that users will record
	RECORDING_PROMPT = ""
	# ================================================================

	# Create a JSON database to track uploads
	class UploadTracker:
	def __init__(self, filename="recording_tracker.json"):
	self.filename = filename
	self.data = []

	# Create file if it doesn't exist
	if not os.path.exists(filename):
	with open(filename, "w") as f:
	json.dump([], f)
	else:
	# Load existing data
	with open(filename, "r") as f:
	self.data = json.load(f)

	def add_recording(self, audio_hash, filename):
	"""Add a record of an uploaded recording"""
	record = {
	"prompt": RECORDING_PROMPT,
	"audio_hash": audio_hash,
	"filename": filename,
	"timestamp": str(uuid.uuid4())
	}
	self.data.append(record)

	# Save to file
	with open(self.filename, "w") as f:
	json.dump(self.data, f, indent=2)

	# Upload tracker file to HF
	self.upload_tracker()

	return record

	def upload_tracker(self):
	"""Upload the tracker JSON to Hugging Face"""
	try:
	upload_file(
	path_or_fileobj=self.filename,
	path_in_repo=f"{CATEGORY}/{self.filename}",
	repo_id=DATASET_REPO,
	repo_type="dataset",
	token=os.getenv("HF_TOKEN")
	)
	print(f"Uploaded tracker file to Hugging Face")
	return True
	except Exception as e:
	print(f"Error uploading tracker file: {e}")
	return False

	# Initialize the tracker
	tracker = UploadTracker()

	def upload_to_hf(local_path, repo_path):
	"""Upload a file to the Hugging Face dataset repository"""
	try:
	upload_file(
	path_or_fileobj=local_path,
	path_in_repo=repo_path,
	repo_id=DATASET_REPO,
	repo_type="dataset",
	token=os.getenv("HF_TOKEN")
	)
	print(f"Uploaded file: {local_path} to Hugging Face at {repo_path}")
	return True
	except Exception as e:
	print(f"Error uploading file to HF: {e}")
	return False

	def on_submit(audio_input, recording_count):
	"""Handle the submission of a recorded audio prompt"""
	if audio_input is None:
	return next_prompt(recording_count)

	# Process the audio
	sr, y = audio_input
	audio_hash = xxhash.xxh32(bytes(y)).hexdigest()
	y = y.astype(np.float32)
	y /= np.max(np.abs(y)) if np.max(np.abs(y)) > 0 else 1.0

	unique_id = str(uuid.uuid4())[:8]
	clean_prompt = RECORDING_PROMPT.replace(" ", "_").replace(".", "").replace(",", "")[:20]
	local_filename = f"outputs/{clean_prompt}_{audio_hash}_{unique_id}.wav"
	sf.write(local_filename, y, sr, format="wav")

	hf_path = f"{CATEGORY}/{clean_prompt}_{audio_hash}_{unique_id}.wav"
	upload_to_hf(local_filename, hf_path)

	tracker.add_recording(audio_hash, hf_path)

	# After successful upload, immediately move to the next prompt
	return next_prompt(recording_count)

	def next_prompt(recording_count):
	"""Move to the next prompt"""
	recording_count += 1

	# Check if we've gone through all prompts
	if recording_count >= MAX_RECORDINGS:
	return (
	gr.Markdown(f"# All recordings complete! Completion code: {COMPLETION_CODE}"),
	gr.Markdown("## Thank you for your participation."),
	gr.Markdown("### You have completed all recordings."),
	gr.Audio(visible=False),
	gr.Button(visible=False),
	gr.Button(visible=False),
	recording_count
	)

	# Display the next recording screen
	return (
	gr.Markdown(f"# Recording {recording_count + 1}/{MAX_RECORDINGS}"),
	gr.Markdown(USER_INSTRUCTIONS),
	gr.Markdown(f"### \"{RECORDING_PROMPT}\""),
	gr.Audio(value=None, label="Record your response", sources=["microphone"]),
	gr.Button("Submit Recording", interactive=False),
	gr.Button("Next Recording", visible=False),
	recording_count
	)

	def enable_submit_button(audio_input):
	"""Enable the submit button when audio is recorded"""
	if audio_input is not None:
	return gr.Button("Submit Recording", interactive=True)
	return gr.Button("Submit Recording", interactive=False)

	# Create a theme
	theme = gr.themes.Soft(
	primary_hue="blue",
	secondary_hue="indigo",
	neutral_hue="slate",
	)

	# Create Gradio interface
	with gr.Blocks(theme=theme, css="footer {visibility: hidden}") as demo:
	recording_count = gr.State(0)

	title = gr.Markdown(f"# Recording 1/{MAX_RECORDINGS}")
	instructions = gr.Markdown(USER_INSTRUCTIONS)
	prompt_text = gr.Markdown(f"### \"{RECORDING_PROMPT}\"")

	audio_input = gr.Audio(
	label="Record your response",
	sources=["microphone"],
	streaming=False
	)

	with gr.Row():
	submit_btn = gr.Button("Submit Recording", interactive=False)
	next_btn = gr.Button("Next Recording", visible=False)

	# Enable submit button when audio is recorded
	audio_input.change(
	fn=enable_submit_button,
	inputs=[audio_input],
	outputs=[submit_btn]
	)

	# Handle submission
	submit_btn.click(
	fn=on_submit,
	inputs=[audio_input, recording_count],
	outputs=[title, instructions, prompt_text, audio_input, submit_btn, next_btn, recording_count]
	)

	# Handle next button
	next_btn.click(
	fn=next_prompt,
	inputs=[recording_count],
	outputs=[title, instructions, prompt_text, audio_input, submit_btn, next_btn, recording_count]
	)

	# Launch the app
	if __name__ == "__main__":
	demo.launch(share=True)