Pontonkid commited on
Commit
f16a2a2
·
verified ·
1 Parent(s): ae8b5ff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -46
app.py CHANGED
@@ -3,71 +3,71 @@ import torch
3
  from faster_whisper import WhisperModel
4
  import pandas as pd
5
 
6
- # Define model size
7
  model_size = "large-v2"
8
 
9
- def get_device():
10
- return "cuda:0" if torch.cuda.is_available() else "cpu"
11
 
12
- # Initialize model based on available hardware
13
- device = get_device()
14
  if device == "cuda:0":
 
15
  model_whisper = WhisperModel(model_size, device="cuda", compute_type="float16")
16
  else:
 
17
  model_whisper = WhisperModel(model_size, device="cpu", compute_type="int8")
18
 
 
19
  def get_filename(file_obj):
20
  return file_obj.name.split("/")[-1]
21
 
 
22
  def audio_to_transcript(file_obj):
23
- """Converts uploaded audio to a transcript with timestamps."""
24
- if not file_obj:
25
- return "No file uploaded.", None, None
26
-
27
- filename = get_filename(file_obj)
28
  try:
 
29
  segments, _ = model_whisper.transcribe(file_obj.name, beam_size=5, vad_filter=True)
30
  except:
31
- return "Error processing file.", None, None
 
 
 
 
32
 
33
- start_segments, end_segments, text_segments = [], [], []
34
  for segment in segments:
35
- start_segments.append(segment.start)
36
- end_segments.append(segment.end)
37
- text_segments.append(segment.text)
38
-
39
- df = pd.DataFrame({"Start Time": start_segments, "End Time": end_segments, "Text": text_segments})
40
- csv_file = filename.replace(".wav", "").replace(".mp3", "") + "_transcript.csv"
 
 
 
 
 
 
 
41
  df.to_csv(csv_file, encoding="utf-8", index=False)
42
-
43
- return filename, csv_file, df
44
 
45
- # Gradio UI
46
- definition = """
47
- ## 🎙️ Audio Transcription App
48
- This app allows you to upload an audio file and get an accurate transcript with timestamps.
49
- It uses **Faster-Whisper**, a fast and efficient ASR model, to generate transcriptions.
50
- Simply upload your file, and the app will process and return a CSV transcript.
51
- """
52
 
53
- with gr.Blocks(theme="soft") as iface:
54
- gr.Markdown(definition)
55
-
56
- with gr.Row():
57
- audio_input = gr.File(label="Upload an Audio File", type="file")
58
- file_preview = gr.Textbox(label="Uploaded File Name", interactive=False)
59
-
60
- transcribe_btn = gr.Button("Transcribe 🎧")
61
- progress = gr.Markdown("_Processing... Please wait._", visible=False)
62
-
63
- with gr.Row():
64
- csv_output = gr.File(label="Download Transcript (CSV)", visible=False)
65
- transcript_df = gr.DataFrame(headers=["Start Time", "End Time", "Text"], label="Transcript Preview")
66
-
67
- def update_file_name(file):
68
- return file.name if file else "No file uploaded."
69
-
70
- audio_input.change(update_file_name, inputs=[audio_input], outputs=[file_preview])
71
- transcribe_btn.click(audio_to_transcript, inputs=[audio_input], outputs=[file_preview, csv_output, transcript_df], show_progress=True)
72
 
73
  iface.launch(debug=True)
 
3
  from faster_whisper import WhisperModel
4
  import pandas as pd
5
 
6
+ # Model size selection
7
  model_size = "large-v2"
8
 
9
+ # Get device
10
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
11
 
12
+ # Initialize model based on device
 
13
  if device == "cuda:0":
14
+ # Run on GPU with FP16
15
  model_whisper = WhisperModel(model_size, device="cuda", compute_type="float16")
16
  else:
17
+ # Run on CPU with INT8
18
  model_whisper = WhisperModel(model_size, device="cpu", compute_type="int8")
19
 
20
+ # Function to get filename from file object
21
  def get_filename(file_obj):
22
  return file_obj.name.split("/")[-1]
23
 
24
+ # Function to transcribe audio to text
25
  def audio_to_transcript(file_obj):
 
 
 
 
 
26
  try:
27
+ filename = get_filename(file_obj)
28
  segments, _ = model_whisper.transcribe(file_obj.name, beam_size=5, vad_filter=True)
29
  except:
30
+ filename = file_obj.split("/")[-1]
31
+ segments, _ = model_whisper.transcribe(file_obj, beam_size=5, vad_filter=True)
32
+
33
+ # Initialize lists to store transcription data
34
+ start_segments, end_segments, text_segments = list(), list(), list()
35
 
36
+ # Process each segment for start time, end time, and text
37
  for segment in segments:
38
+ start, end, text = segment.start, segment.end, segment.text
39
+ start_segments.append(start)
40
+ end_segments.append(end)
41
+ text_segments.append(text)
42
+
43
+ # Save transcript to CSV
44
+ df = pd.DataFrame()
45
+ df["start"] = start_segments
46
+ df["end"] = end_segments
47
+ df["text"] = text_segments
48
+
49
+ # Define output CSV file
50
+ csv_file = filename.split(".")[0] + ".csv"
51
  df.to_csv(csv_file, encoding="utf-8", index=False)
52
+ path_to_csv = gr.File.update(value=csv_file, visible=True)
 
53
 
54
+ return filename, path_to_csv, df
 
 
 
 
 
 
55
 
56
+ ## Gradio Interface Setup
57
+ headers = ["start", "end", "text"]
58
+
59
+ iface = gr.Interface(
60
+ fn=audio_to_transcript,
61
+ inputs=gr.File(label="Upload an Audio File", type="filepath"),
62
+ outputs=[
63
+ gr.Textbox(label="Audio file name"),
64
+ gr.File(label="Transcript CSV file"),
65
+ gr.DataFrame(label="Transcript", headers=headers),
66
+ ],
67
+ allow_flagging="never",
68
+ title="Audio to Transcript",
69
+ description="Upload an audio file, and this tool will return a transcript with time-stamped segments.",
70
+ theme="compact", # Enhanced UI theme for simplicity
71
+ )
 
 
 
72
 
73
  iface.launch(debug=True)