File size: 1,390 Bytes
d7f3834
cef63ff
 
 
b8516d6
 
6477218
cef63ff
6477218
cef63ff
fb215b4
0796099
 
 
fb215b4
 
2a95557
fb215b4
505545b
2a95557
fb215b4
c4b8c6c
0796099
fb215b4
b940808
fb215b4
 
457a841
 
 
 
fb215b4
 
b940808
 
fb215b4
 
457a841
 
 
 
fb215b4
b940808
cef63ff
b8516d6
 
 
 
90c8dbd
 
b8516d6
6477218
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import os
import gradio as gr
from transformers import pipeline

demo = gr.Blocks()

asr = pipeline("automatic-speech-recognition", model="jonatasgrosman/wav2vec2-large-xlsr-53-english")

asr2 = pipeline("summarization", model="facebook/bart-large-cnn")

def transcribe_and_summarize(filepath):
    if filepath is None:
        gr.Warning("No audio found, please retry.")
        return ""
    
    transcription = asr(filepath)
    transcribed_text = transcription.get("text", "")

    summarized_output = asr2(transcribed_text, max_length=20, do_sample=False)
    summary = summarized_output[0].get("summary_text", "")

    return transcribed_text, summary

# Gradio Interfaces
mic_transcribe = gr.Interface(
    fn=transcribe_and_summarize,
    inputs=gr.Audio(sources="microphone", type="filepath"),
    outputs=[
        gr.Textbox(label="Transcript", lines=3),
        gr.Textbox(label="Summary", lines=3)
    ],
    allow_flagging="never"
)

file_transcribe = gr.Interface(
    fn=transcribe_and_summarize,
    inputs=gr.Audio(sources="upload", type="filepath"),
    outputs=[
        gr.Textbox(label="Transcript", lines=3),
        gr.Textbox(label="Summary", lines=3)
    ],
    allow_flagging="never"
)

with demo:
    gr.TabbedInterface(
        [mic_transcribe,
         file_transcribe],
        ["Transcribe Microphone",
         "Transcribe Audio File"],
    )
demo.launch()