File size: 6,783 Bytes
3c8c320
4497e1b
f35f09f
3c8c320
f35f09f
3c8c320
 
4497e1b
f35f09f
 
3c8c320
4497e1b
 
 
 
 
 
 
 
 
06efaec
a5451af
 
3c8c320
 
cc08a1c
 
 
 
 
 
 
 
 
 
3c8c320
f35f09f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cc08a1c
f35f09f
 
cc08a1c
f35f09f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c8c320
f35f09f
 
3c8c320
4497e1b
f35f09f
4497e1b
 
 
 
 
 
 
 
f35f09f
4497e1b
 
 
 
 
cc08a1c
f35f09f
 
1c933d8
 
f35f09f
4497e1b
f35f09f
 
 
a2da443
f35f09f
cc08a1c
 
a2da443
 
cc08a1c
f35f09f
a2da443
cc08a1c
1c933d8
 
 
3c8c320
cc08a1c
f35f09f
cc08a1c
4497e1b
f35f09f
cc08a1c
3c8c320
a2da443
f35f09f
cc08a1c
f35f09f
3c8c320
 
cc08a1c
3c8c320
f35f09f
cc08a1c
f35f09f
cc08a1c
 
 
f35f09f
 
cc08a1c
 
f35f09f
3c8c320
f35f09f
 
 
 
 
3c8c320
f35f09f
3c8c320
f35f09f
 
 
3c8c320
 
f35f09f
 
cc08a1c
f35f09f
 
cc08a1c
 
f35f09f
 
 
 
 
3c8c320
f35f09f
 
 
cc08a1c
f35f09f
 
 
 
 
 
3c8c320
f35f09f
 
 
cc08a1c
 
 
3c8c320
f35f09f
 
 
 
cc08a1c
 
3c8c320
 
f35f09f
3c8c320
f35f09f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
import os
import uuid
import json
import numpy as np
import gradio as gr
import soundfile as sf
import xxhash
from huggingface_hub import upload_file, HfApi
from dotenv import load_dotenv
from datasets import Audio

# Load environment variables
load_dotenv()

# Ensure the outputs directory exists
os.makedirs("outputs", exist_ok=True)

# Initialize Hugging Face API client
hf_api = HfApi(token=os.getenv("HF_TOKEN"))
DATASET_REPO = "alisartazkhan/audioLLM_judge"
CATEGORY = "pilot_tempo_control7"
MAX_RECORDINGS = 5  # Number of prompts to record
COMPLETION_CODE = "CEO4RWQ6"
resampler = Audio(sampling_rate=16_000)

# ====== MODIFY THIS SECTION TO CHANGE INSTRUCTIONS AND PROMPT ======
# Instructions for the user
USER_INSTRUCTIONS = """
## Recording Instructions:
Please record yourself reading your instruction clearly and naturally, speaking into the microphone in a quiet environment.
"""

# The prompt that users will record
RECORDING_PROMPT = ""
# ================================================================

# Create a JSON database to track uploads
class UploadTracker:
    def __init__(self, filename="recording_tracker.json"):
        self.filename = filename
        self.data = []
        
        # Create file if it doesn't exist
        if not os.path.exists(filename):
            with open(filename, "w") as f:
                json.dump([], f)
        else:
            # Load existing data
            with open(filename, "r") as f:
                self.data = json.load(f)
    
    def add_recording(self, audio_hash, filename):
        """Add a record of an uploaded recording"""
        record = {
            "prompt": RECORDING_PROMPT,
            "audio_hash": audio_hash,
            "filename": filename,
            "timestamp": str(uuid.uuid4())
        }
        self.data.append(record)
        
        # Save to file
        with open(self.filename, "w") as f:
            json.dump(self.data, f, indent=2)
        
        # Upload tracker file to HF
        self.upload_tracker()
        
        return record
    
    def upload_tracker(self):
        """Upload the tracker JSON to Hugging Face"""
        try:
            upload_file(
                path_or_fileobj=self.filename,
                path_in_repo=f"{CATEGORY}/{self.filename}",
                repo_id=DATASET_REPO,
                repo_type="dataset",
                token=os.getenv("HF_TOKEN")
            )
            print(f"Uploaded tracker file to Hugging Face")
            return True
        except Exception as e:
            print(f"Error uploading tracker file: {e}")
            return False

# Initialize the tracker
tracker = UploadTracker()

def upload_to_hf(local_path, repo_path):
    """Upload a file to the Hugging Face dataset repository"""
    try:
        upload_file(
            path_or_fileobj=local_path,
            path_in_repo=repo_path,
            repo_id=DATASET_REPO,
            repo_type="dataset",
            token=os.getenv("HF_TOKEN")
        )
        print(f"Uploaded file: {local_path} to Hugging Face at {repo_path}")
        return True
    except Exception as e:
        print(f"Error uploading file to HF: {e}")
        return False

def on_submit(audio_input, recording_count):
    """Handle the submission of a recorded audio prompt"""
    if audio_input is None:
        return next_prompt(recording_count)

    # Process the audio
    sr, y = audio_input
    audio_hash = xxhash.xxh32(bytes(y)).hexdigest()
    y = y.astype(np.float32)
    y /= np.max(np.abs(y)) if np.max(np.abs(y)) > 0 else 1.0

    unique_id = str(uuid.uuid4())[:8]
    clean_prompt = RECORDING_PROMPT.replace(" ", "_").replace(".", "").replace(",", "")[:20]
    local_filename = f"outputs/{clean_prompt}_{audio_hash}_{unique_id}.wav"
    sf.write(local_filename, y, sr, format="wav")

    hf_path = f"{CATEGORY}/{clean_prompt}_{audio_hash}_{unique_id}.wav"
    upload_to_hf(local_filename, hf_path)

    tracker.add_recording(audio_hash, hf_path)

    # After successful upload, immediately move to the next prompt
    return next_prompt(recording_count)

def next_prompt(recording_count):
    """Move to the next prompt"""
    recording_count += 1
    
    # Check if we've gone through all prompts
    if recording_count >= MAX_RECORDINGS:
        return (
            gr.Markdown(f"# All recordings complete! Completion code: {COMPLETION_CODE}"),
            gr.Markdown("## Thank you for your participation."),
            gr.Markdown("### You have completed all recordings."),
            gr.Audio(visible=False),
            gr.Button(visible=False),
            gr.Button(visible=False),
            recording_count
        )
    
    # Display the next recording screen
    return (
        gr.Markdown(f"# Recording {recording_count + 1}/{MAX_RECORDINGS}"),
        gr.Markdown(USER_INSTRUCTIONS),
        gr.Markdown(f"### \"{RECORDING_PROMPT}\""),
        gr.Audio(value=None, label="Record your response", sources=["microphone"]),
        gr.Button("Submit Recording", interactive=False),
        gr.Button("Next Recording", visible=False),
        recording_count
    )

def enable_submit_button(audio_input):
    """Enable the submit button when audio is recorded"""
    if audio_input is not None:
        return gr.Button("Submit Recording", interactive=True)
    return gr.Button("Submit Recording", interactive=False)

# Create a theme
theme = gr.themes.Soft(
    primary_hue="blue",
    secondary_hue="indigo",
    neutral_hue="slate",
)

# Create Gradio interface
with gr.Blocks(theme=theme, css="footer {visibility: hidden}") as demo:
    recording_count = gr.State(0)
    
    title = gr.Markdown(f"# Recording 1/{MAX_RECORDINGS}")
    instructions = gr.Markdown(USER_INSTRUCTIONS)
    prompt_text = gr.Markdown(f"### \"{RECORDING_PROMPT}\"")
    
    audio_input = gr.Audio(
        label="Record your response", 
        sources=["microphone"], 
        streaming=False
    )
    
    with gr.Row():
        submit_btn = gr.Button("Submit Recording", interactive=False)
        next_btn = gr.Button("Next Recording", visible=False)
    
    # Enable submit button when audio is recorded
    audio_input.change(
        fn=enable_submit_button,
        inputs=[audio_input],
        outputs=[submit_btn]
    )
    
    # Handle submission
    submit_btn.click(
        fn=on_submit,
        inputs=[audio_input, recording_count],
        outputs=[title, instructions, prompt_text, audio_input, submit_btn, next_btn, recording_count]
    )
    
    # Handle next button
    next_btn.click(
        fn=next_prompt,
        inputs=[recording_count],
        outputs=[title, instructions, prompt_text, audio_input, submit_btn, next_btn, recording_count]
    )

# Launch the app
if __name__ == "__main__":
    demo.launch(share=True)