tahirsher commited on
Commit
723513d
·
verified ·
1 Parent(s): 3a79217

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -11
app.py CHANGED
@@ -4,6 +4,7 @@ import torch
4
  import torchaudio
5
  import numpy as np
6
  import streamlit as st
 
7
  from transformers import (
8
  AutoProcessor,
9
  AutoModelForSpeechSeq2Seq,
@@ -13,7 +14,13 @@ from transformers import (
13
  )
14
 
15
  # ================================
16
- # 1️⃣ Load Model & Processor
 
 
 
 
 
 
17
  # ================================
18
  MODEL_NAME = "AqeelShafy7/AudioSangraha-Audio_to_Text"
19
 
@@ -27,7 +34,7 @@ model.to(device)
27
  print(f"✅ Model loaded on {device}")
28
 
29
  # ================================
30
- # 2️⃣ Load Dataset (Recursively from Extracted Path)
31
  # ================================
32
  DATASET_TAR_PATH = "dev-clean.tar.gz"
33
  EXTRACT_PATH = "./librispeech_dev_clean"
@@ -63,7 +70,7 @@ if not audio_files:
63
  print(f"✅ Found {len(audio_files)} audio files in dataset!")
64
 
65
  # ================================
66
- # 3️⃣ Preprocess Dataset (Fixed input_features)
67
  # ================================
68
  def load_and_process_audio(audio_path):
69
  """Loads and processes a single audio file into model format."""
@@ -80,7 +87,7 @@ def load_and_process_audio(audio_path):
80
  # Manually create dataset structure
81
  dataset = [{"input_features": load_and_process_audio(f), "labels": []} for f in audio_files[:100]]
82
 
83
- # Split dataset into train and eval (Recommended Fix)
84
  train_size = int(0.9 * len(dataset))
85
  train_dataset = dataset[:train_size]
86
  eval_dataset = dataset[train_size:]
@@ -88,11 +95,11 @@ eval_dataset = dataset[train_size:]
88
  print(f"✅ Dataset Loaded! Training: {len(train_dataset)}, Evaluation: {len(eval_dataset)}")
89
 
90
  # ================================
91
- # 4️⃣ Training Arguments & Trainer
92
  # ================================
93
  training_args = TrainingArguments(
94
  output_dir="./asr_model_finetuned",
95
- eval_strategy="epoch", # Fix: Proper evaluation
96
  save_strategy="epoch",
97
  learning_rate=5e-5,
98
  per_device_train_batch_size=8,
@@ -102,7 +109,9 @@ training_args = TrainingArguments(
102
  logging_dir="./logs",
103
  logging_steps=500,
104
  save_total_limit=2,
105
- push_to_hub=True,
 
 
106
  )
107
 
108
  # Data collator (for dynamic padding)
@@ -113,13 +122,13 @@ trainer = Trainer(
113
  model=model,
114
  args=training_args,
115
  train_dataset=train_dataset,
116
- eval_dataset=eval_dataset, # Fix: Providing eval_dataset
117
  processing_class=processor, # Fix: Replacing deprecated `tokenizer`
118
  data_collator=data_collator,
119
  )
120
 
121
  # ================================
122
- # 5️⃣ Fine-Tuning Execution
123
  # ================================
124
  if st.button("Start Fine-Tuning"):
125
  with st.spinner("Fine-tuning in progress... Please wait!"):
@@ -127,7 +136,7 @@ if st.button("Start Fine-Tuning"):
127
  st.success("✅ Fine-Tuning Completed! Model updated.")
128
 
129
  # ================================
130
- # 6️⃣ Streamlit ASR Web App
131
  # ================================
132
  st.title("🎙️ Speech-to-Text ASR with Fine-Tuning 🎶")
133
 
@@ -159,7 +168,7 @@ if audio_file:
159
  st.write(transcription)
160
 
161
  # ================================
162
- # 7️⃣ Fine-Tune Model with User Correction
163
  # ================================
164
  user_correction = st.text_area("🔧 Correct the transcription (if needed):", transcription)
165
 
 
4
  import torchaudio
5
  import numpy as np
6
  import streamlit as st
7
+ from huggingface_hub import login
8
  from transformers import (
9
  AutoProcessor,
10
  AutoModelForSpeechSeq2Seq,
 
14
  )
15
 
16
  # ================================
17
+ # 1️⃣ Authenticate with Hugging Face Hub
18
+ # ================================
19
+ HF_TOKEN = "hf_xxxxxxxxxxxxxxxxxxxxxxx" # Replace with your Hugging Face token
20
+ login(token=HF_TOKEN) # Ensure authentication
21
+
22
+ # ================================
23
+ # 2️⃣ Load Model & Processor
24
  # ================================
25
  MODEL_NAME = "AqeelShafy7/AudioSangraha-Audio_to_Text"
26
 
 
34
  print(f"✅ Model loaded on {device}")
35
 
36
  # ================================
37
+ # 3️⃣ Load Dataset (Recursively from Extracted Path)
38
  # ================================
39
  DATASET_TAR_PATH = "dev-clean.tar.gz"
40
  EXTRACT_PATH = "./librispeech_dev_clean"
 
70
  print(f"✅ Found {len(audio_files)} audio files in dataset!")
71
 
72
  # ================================
73
+ # 4️⃣ Preprocess Dataset (Fixed input_features)
74
  # ================================
75
  def load_and_process_audio(audio_path):
76
  """Loads and processes a single audio file into model format."""
 
87
  # Manually create dataset structure
88
  dataset = [{"input_features": load_and_process_audio(f), "labels": []} for f in audio_files[:100]]
89
 
90
+ # Split dataset into train and eval
91
  train_size = int(0.9 * len(dataset))
92
  train_dataset = dataset[:train_size]
93
  eval_dataset = dataset[train_size:]
 
95
  print(f"✅ Dataset Loaded! Training: {len(train_dataset)}, Evaluation: {len(eval_dataset)}")
96
 
97
  # ================================
98
+ # 5️⃣ Training Arguments & Trainer
99
  # ================================
100
  training_args = TrainingArguments(
101
  output_dir="./asr_model_finetuned",
102
+ eval_strategy="epoch", # Fixed deprecated evaluation_strategy
103
  save_strategy="epoch",
104
  learning_rate=5e-5,
105
  per_device_train_batch_size=8,
 
109
  logging_dir="./logs",
110
  logging_steps=500,
111
  save_total_limit=2,
112
+ push_to_hub=True, # Fix: Properly authenticate Hugging Face Hub
113
+ hub_model_id="tahirsher/ASR_Model", # Replace with your Hugging Face repo
114
+ hub_token=HF_TOKEN,
115
  )
116
 
117
  # Data collator (for dynamic padding)
 
122
  model=model,
123
  args=training_args,
124
  train_dataset=train_dataset,
125
+ eval_dataset=eval_dataset,
126
  processing_class=processor, # Fix: Replacing deprecated `tokenizer`
127
  data_collator=data_collator,
128
  )
129
 
130
  # ================================
131
+ # 6️⃣ Fine-Tuning Execution
132
  # ================================
133
  if st.button("Start Fine-Tuning"):
134
  with st.spinner("Fine-tuning in progress... Please wait!"):
 
136
  st.success("✅ Fine-Tuning Completed! Model updated.")
137
 
138
  # ================================
139
+ # 7️⃣ Streamlit ASR Web App
140
  # ================================
141
  st.title("🎙️ Speech-to-Text ASR with Fine-Tuning 🎶")
142
 
 
168
  st.write(transcription)
169
 
170
  # ================================
171
+ # 8️⃣ Fine-Tune Model with User Correction
172
  # ================================
173
  user_correction = st.text_area("🔧 Correct the transcription (if needed):", transcription)
174