AbdullahAdeeb commited on
Commit
973bb27
·
1 Parent(s): d13db1b
Files changed (6) hide show
  1. .gitattributes +0 -35
  2. README.md +14 -1
  3. app.py +96 -0
  4. example.py +14 -0
  5. packages.txt +1 -0
  6. requirements.txt +4 -0
.gitattributes DELETED
@@ -1,35 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  title: Whisper Gradio Template
3
- emoji: 👀
4
  colorFrom: indigo
5
  colorTo: green
6
  sdk: gradio
@@ -10,4 +10,17 @@ pinned: false
10
  short_description: hf space gradio app to transcribe audio using whisper ai
11
  ---
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: Whisper Gradio Template
3
+ emoji: 🎙️
4
  colorFrom: indigo
5
  colorTo: green
6
  sdk: gradio
 
10
  short_description: hf space gradio app to transcribe audio using whisper ai
11
  ---
12
 
13
+ # Whisper Gradio App
14
+
15
+ This Gradio app uses OpenAI's Whisper model to transcribe audio files into multiple formats:
16
+ - Plain text transcription
17
+ - SRT subtitle format
18
+ - Detailed JSON output with timestamps and metadata
19
+
20
+ ## Usage
21
+ 1. Upload an audio file (supports various formats)
22
+ 2. Wait for the model to process the audio
23
+ 3. Get the transcription in three different formats
24
+
25
+
26
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import whisper
3
+ import torch
4
+ import json
5
+ import spaces
6
+ from datetime import timedelta
7
+ import os
8
+ import zipfile
9
+ from pathlib import Path
10
+
11
+ def format_timestamp(seconds):
12
+ """Convert seconds to SRT timestamp format"""
13
+ td = timedelta(seconds=seconds)
14
+ hours = td.seconds//3600
15
+ minutes = (td.seconds//60)%60
16
+ seconds = td.seconds%60
17
+ milliseconds = td.microseconds//1000
18
+ return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"
19
+
20
+ def save_files(text, srt, json_data, base_name):
21
+ """Save transcription in different formats and create zip"""
22
+ # Create output directory if it doesn't exist
23
+ output_dir = Path("transcriptions")
24
+ output_dir.mkdir(exist_ok=True)
25
+
26
+ # Generate filenames
27
+ base_name = Path(base_name).stem
28
+ txt_path = output_dir / f"{base_name}.txt"
29
+ srt_path = output_dir / f"{base_name}.srt"
30
+ json_path = output_dir / f"{base_name}.json"
31
+ zip_path = output_dir / f"{base_name}_all.zip"
32
+
33
+ # Save individual files
34
+ txt_path.write_text(text)
35
+ srt_path.write_text(srt)
36
+ json_path.write_text(json_data)
37
+
38
+ # Create ZIP file
39
+ with zipfile.ZipFile(zip_path, 'w') as zipf:
40
+ zipf.write(txt_path, txt_path.name)
41
+ zipf.write(srt_path, srt_path.name)
42
+ zipf.write(json_path, json_path.name)
43
+
44
+ return str(txt_path), str(srt_path), str(json_path), str(zip_path)
45
+
46
+ @spaces.GPU
47
+ def transcribe(audio_file):
48
+ # Load the Whisper model
49
+ model = whisper.load_model("large-v3-turbo")
50
+
51
+ # Transcribe the audio
52
+ result = model.transcribe(audio_file)
53
+
54
+ # Format as plain text
55
+ text_output = result["text"]
56
+
57
+ # Format as JSON
58
+ json_output = json.dumps(result, indent=2)
59
+
60
+ # Format as SRT
61
+ srt_output = ""
62
+ for i, segment in enumerate(result["segments"], 1):
63
+ start_time = format_timestamp(segment["start"])
64
+ end_time = format_timestamp(segment["end"])
65
+ text = segment["text"].strip()
66
+ srt_output += f"{i}\n{start_time} --> {end_time}\n{text}\n\n"
67
+
68
+ # Save files and get paths
69
+ txt_file, srt_file, json_file, zip_file = save_files(
70
+ text_output, srt_output, json_output,
71
+ os.path.basename(audio_file)
72
+ )
73
+
74
+ return (
75
+ txt_file, srt_file, json_file, zip_file, text_output, srt_output, json_output
76
+ )
77
+
78
+ # Create the Gradio interface
79
+ demo = gr.Interface(
80
+ fn=transcribe,
81
+ inputs=gr.Audio(type="filepath", label="Upload Audio"),
82
+ outputs=[
83
+ gr.File(label="Download TXT"),
84
+ gr.File(label="Download SRT"),
85
+ gr.File(label="Download JSON"),
86
+ gr.File(label="Download All (ZIP)"),
87
+ gr.Textbox(label="Transcription", lines=5),
88
+ gr.Textbox(label="SRT Format"),
89
+ gr.JSON(label="JSON Output")
90
+ ],
91
+ title="Audio Transcription with Whisper",
92
+ description="Upload an audio file to transcribe it into text, SRT, and JSON formats using OpenAI's Whisper model. You can download the results in different formats or get everything in a ZIP file."
93
+ )
94
+
95
+ if __name__ == "__main__":
96
+ demo.launch()
example.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import spaces
3
+ import torch
4
+
5
+ zero = torch.Tensor([0]).cuda()
6
+ print(zero.device) # <-- 'cpu' 🤔
7
+
8
+ @spaces.GPU
9
+ def greet(n):
10
+ print(zero.device) # <-- 'cuda:0' 🤗
11
+ return f"Hello {zero + n} Tensor"
12
+
13
+ demo = gr.Interface(fn=greet, inputs=gr.Number(), outputs=gr.Text())
14
+ demo.launch(share=True)
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio
2
+ openai-whisper
3
+ spaces
4
+ torch