tahirsher commited on
Commit
1bb8243
·
verified ·
1 Parent(s): d2d38cf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -4
app.py CHANGED
@@ -27,7 +27,7 @@ model.to(device)
27
  print(f"✅ Model loaded on {device}")
28
 
29
  # ================================
30
- # 2️⃣ Load Dataset (Manually from Extracted Path)
31
  # ================================
32
  DATASET_TAR_PATH = "dev-clean.tar.gz" # Dataset stored in Hugging Face Space
33
  EXTRACT_PATH = "./librispeech_dev_clean" # Extracted dataset folder
@@ -41,9 +41,27 @@ if not os.path.exists(EXTRACT_PATH):
41
  else:
42
  print("✅ Dataset already extracted.")
43
 
44
- # Load audio files manually
45
- AUDIO_FOLDER = os.path.join(EXTRACT_PATH, "LibriSpeech", "train-clean-100") # Adjust as per structure
46
- audio_files = [os.path.join(AUDIO_FOLDER, f) for f in os.listdir(AUDIO_FOLDER) if f.endswith(".flac")]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  # ================================
49
  # 3️⃣ Preprocess Dataset (Manually)
 
27
  print(f"✅ Model loaded on {device}")
28
 
29
  # ================================
30
+ # 2️⃣ Load Dataset (Recursively from Extracted Path)
31
  # ================================
32
  DATASET_TAR_PATH = "dev-clean.tar.gz" # Dataset stored in Hugging Face Space
33
  EXTRACT_PATH = "./librispeech_dev_clean" # Extracted dataset folder
 
41
  else:
42
  print("✅ Dataset already extracted.")
43
 
44
+ # Define the base directory where audio files are stored
45
+ AUDIO_FOLDER = os.path.join(EXTRACT_PATH, "LibriSpeech", "dev-clean")
46
+
47
+ # Recursively find all `.flac` files inside the dataset directory
48
+ def find_audio_files(base_folder):
49
+ """Recursively search for all .flac files in subdirectories."""
50
+ audio_files = []
51
+ for root, _, files in os.walk(base_folder):
52
+ for file in files:
53
+ if file.endswith(".flac"):
54
+ audio_files.append(os.path.join(root, file))
55
+ return audio_files
56
+
57
+ # Get all audio files
58
+ audio_files = find_audio_files(AUDIO_FOLDER)
59
+
60
+ # Check if audio files were found
61
+ if not audio_files:
62
+ raise FileNotFoundError(f"❌ No .flac files found in {AUDIO_FOLDER}. Check dataset structure!")
63
+
64
+ print(f"✅ Found {len(audio_files)} audio files in dataset!")
65
 
66
  # ================================
67
  # 3️⃣ Preprocess Dataset (Manually)