Update app.py
Browse files
app.py
CHANGED
@@ -27,7 +27,7 @@ model.to(device)
|
|
27 |
print(f"✅ Model loaded on {device}")
|
28 |
|
29 |
# ================================
|
30 |
-
# 2️⃣ Load Dataset (
|
31 |
# ================================
|
32 |
DATASET_TAR_PATH = "dev-clean.tar.gz" # Dataset stored in Hugging Face Space
|
33 |
EXTRACT_PATH = "./librispeech_dev_clean" # Extracted dataset folder
|
@@ -41,9 +41,27 @@ if not os.path.exists(EXTRACT_PATH):
|
|
41 |
else:
|
42 |
print("✅ Dataset already extracted.")
|
43 |
|
44 |
-
#
|
45 |
-
AUDIO_FOLDER = os.path.join(EXTRACT_PATH, "LibriSpeech", "
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
# ================================
|
49 |
# 3️⃣ Preprocess Dataset (Manually)
|
|
|
27 |
print(f"✅ Model loaded on {device}")
|
28 |
|
29 |
# ================================
|
30 |
+
# 2️⃣ Load Dataset (Recursively from Extracted Path)
|
31 |
# ================================
|
32 |
DATASET_TAR_PATH = "dev-clean.tar.gz" # Dataset stored in Hugging Face Space
|
33 |
EXTRACT_PATH = "./librispeech_dev_clean" # Extracted dataset folder
|
|
|
41 |
else:
|
42 |
print("✅ Dataset already extracted.")
|
43 |
|
44 |
+
# Define the base directory where audio files are stored
|
45 |
+
AUDIO_FOLDER = os.path.join(EXTRACT_PATH, "LibriSpeech", "dev-clean")
|
46 |
+
|
47 |
+
# Recursively find all `.flac` files inside the dataset directory
|
48 |
+
def find_audio_files(base_folder):
|
49 |
+
"""Recursively search for all .flac files in subdirectories."""
|
50 |
+
audio_files = []
|
51 |
+
for root, _, files in os.walk(base_folder):
|
52 |
+
for file in files:
|
53 |
+
if file.endswith(".flac"):
|
54 |
+
audio_files.append(os.path.join(root, file))
|
55 |
+
return audio_files
|
56 |
+
|
57 |
+
# Get all audio files
|
58 |
+
audio_files = find_audio_files(AUDIO_FOLDER)
|
59 |
+
|
60 |
+
# Check if audio files were found
|
61 |
+
if not audio_files:
|
62 |
+
raise FileNotFoundError(f"❌ No .flac files found in {AUDIO_FOLDER}. Check dataset structure!")
|
63 |
+
|
64 |
+
print(f"✅ Found {len(audio_files)} audio files in dataset!")
|
65 |
|
66 |
# ================================
|
67 |
# 3️⃣ Preprocess Dataset (Manually)
|