File size: 6,100 Bytes
abc89d1
 
 
 
 
351252d
 
 
 
 
 
 
 
 
 
 
abc89d1
 
e79814a
dbca570
 
abc89d1
c5571fa
f0c35fe
d4b107b
cf8326e
85002a1
ad6d7c2
 
abc89d1
 
85002a1
 
c5571fa
dbca570
2f03bd6
ad6d7c2
dbca570
abc89d1
04f2c63
8c6ad91
04f2c63
 
cf8326e
04f2c63
 
 
 
 
 
cf8326e
ad6d7c2
dbca570
3a0e2ab
85002a1
788df7d
73a1be0
85002a1
73a1be0
 
 
 
 
 
 
e79814a
 
638acc9
73a1be0
 
2a7f812
e79814a
 
 
 
2a7f812
e79814a
 
73a1be0
2a7f812
73a1be0
 
32f88c0
361f8d0
5c44de8
85002a1
 
 
 
5c44de8
361f8d0
4a5b260
85002a1
 
071df52
 
361f8d0
 
 
071df52
e79814a
9e722fb
abc89d1
 
 
 
49113b6
 
 
abc89d1
 
8ec53db
abc89d1
 
 
 
 
 
 
 
2fb8a5f
04f2c63
2fb8a5f
abc89d1
6a67784
abc89d1
ad6d7c2
85002a1
5ca37ae
abc89d1
49113b6
e79814a
788df7d
 
 
49113b6
 
 
73a1be0
abc89d1
788df7d
 
 
 
73a1be0
abc89d1
 
 
73a1be0
 
 
56823a6
 
73a1be0
 
 
 
 
56823a6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
### -----------------------------------------------------------------------
### (FULL, Revised) version_1.07ALPHA_app.py
### -----------------------------------------------------------------------

# -------------------------------------------------------------------------
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# -------------------------------------------------------------------------

import spaces
import gradio as gr
from PIL import Image
#from pydub import AudioSegment
#from scipy.io import wavfile

import os
import re
import time
import warnings
#import datetime
#import pandas as pd
#import csv
import subprocess
from pathlib import Path
import tempfile
from fpdf import FPDF

import psutil
from gpuinfo import GPUInfo

#import numpy as np
import torch
#import torchaudio
#import torchaudio.transforms as transforms

from transformers import pipeline #AutoModel

#import spacy
#import networkx as nx
#from sklearn.feature_extraction.text import TfidfVectorizer
#from sklearn.metrics.pairwise import cosine_similarity

warnings.filterwarnings("ignore")

# ------------header section------------
HEADER_INFO = """
    # SWITCHVOX ✨|🇳🇴 *Switch Work Web app*
**Transkribering av lydfiler til norsk skrift**
""".strip()
LOGO = "https://cdn-lfs-us-1.huggingface.co/repos/fe/3b/fe3bd7c8beece8b087fddcc2278295e7f56c794c8dcf728189f4af8bddc585e1/24ad06a03a5bc66f3eba361b94e45ad17e46f98b76632f2d17faf8a0b4f9ab6b?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27banner_trans.png%3B+filename%3D%22banner_trans.png%22%3B&response-content-type=image%2Fpng&Expires=1725145079&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyNTE0NTA3OX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2ZlLzNiL2ZlM2JkN2M4YmVlY2U4YjA4N2ZkZGNjMjI3ODI5NWU3ZjU2Yzc5NGM4ZGNmNzI4MTg5ZjRhZjhiZGRjNTg1ZTEvMjRhZDA2YTAzYTViYzY2ZjNlYmEzNjFiOTRlNDVhZDE3ZTQ2Zjk4Yjc2NjMyZjJkMTdmYWY4YTBiNGY5YWI2Yj9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSomcmVzcG9uc2UtY29udGVudC10eXBlPSoifV19&Signature=PCB1BZeLzsZXnn4lRi1Fj50%7E0E4G39u6-GKLNLLgxhDyhXlqb3BJkR7IOgdyjuNyBh8Iz2d7QqhzNSsOlQmqR30cJLl6aDM5eJO01OlWXoD3Z0KcphoVBFyyrkoxe2oS8i2mdlbFRYn7oc%7EhyOcW46zR6HtqAB91iEydhEa5WTyz3C9nWasgMZevb0vRJtzwhplM9e-%7EbRrZTm2fMzkL14IGWpTpUOGBe93BDSAYbPhrZK1jvuY8p0Tmy1iEKVP3Zdzix5U5lrbxit5luitEhK8x6q2t63Gdv7F0CZvjQtTh7MYkB5GNiru8bTGKAgCdHGiZbG7VCGfhlX3UKvUTPg__&Key-Pair-Id=K24J24Z295AEI9"
SIDEBAR_INFO = f"""
<div align="center">
    <img src="{LOGO}" style="width: 100%; height: auto;"/>
</div>
"""

@spaces.GPU()
def transcribe(microphone, file_upload):
    
    file = microphone if microphone is not None else file_upload
    start_time = time.time()

    
    #--------------____________________________________________--------------"
    device = "cuda" if torch.cuda.is_available() else "cpu"
    pipe = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", device=device)
    # chunk_length_s=30, generate_kwargs={'task': 'transcribe', 'language': 'no'}
    text = pipe(file)["text"]
    #--------------____________________________________________--------------"

    
    end_time = time.time()
    output_time = end_time - start_time
    word_count = len(text.split())

    # --GPU metrics
    memory = psutil.virtual_memory()
    gpu_utilization, gpu_memory = GPUInfo.gpu_usage()
    gpu_utilization = gpu_utilization[0] if len(gpu_utilization) > 0 else 0
    gpu_memory = gpu_memory[0] if len(gpu_memory) > 0 else 0
    # --CPU metric
    cpu_usage = psutil.cpu_percent(interval=1)
    # --system info string
    system_info = f"""
    *Memory: {memory.total / (1024 * 1024 * 1024):.2f}GB, used: {memory.percent}%, available: {memory.available / (1024 * 1024 * 1024):.2f}GB.*
    *Processing time: {output_time:.2f} seconds.*
    *Number of words: {word_count}*
    *GPU Utilization: {gpu_utilization}%, GPU Memory: {gpu_memory}*
    *CPU Usage: {cpu_usage}%*
    """

    return warn_output + text, system_info

def save_to_pdf(text, summary):
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", size=12)
    #
    # ----add same if/elif logic as above here----
    #
    if text:
        pdf.multi_cell(0, 10, "Text:\n" + text)

    pdf.ln(10)  # Paragraph metric

    if summary:
        pdf.multi_cell(0, 10, "Summary:\n" + summary)

    pdf_output_path = "transcription_.pdf"
    pdf.output(pdf_output_path)
    return pdf_output_path


iface = gr.Blocks()

with iface:

    gr.HTML(SIDEBAR_INFO)
    gr.Markdown(HEADER_INFO)

    with gr.Row():
        gr.Markdown('''
        ##### 1. Last opp lydfila 🔊 
        2. Trykk på "Transkriber" knappen og vent på svar ☕️
        3. Går rimelig bra kjapt med Norwegian NB-Whisper Large⚡️
        4. Planlegger tilleggs-funksjoner senere😅
        ##### ''')
        microphone = gr.Audio(sources="microphone", type="filepath")
        upload = gr.Audio(sources="upload", type="filepath")
        transcribe_btn = gr.Button("Transcribe Interview")

    with gr.Row():
        text_output = gr.Textbox(label="Transkribert Tekst")
        with gr.Column():
            system_info = gr.Textbox(label="System Info")


    with gr.Tabs():
        with gr.TabItem("Download PDF"):
            pdf_text_only = gr.Button("Download PDF with Transcribed Text")
            pdf_output = gr.File(label="Download PDF")

            pdf_text_only.click(fn=lambda text: save_to_pdf(text, ""), inputs=[text_output], outputs=[pdf_output])


    transcribe_btn.click(fn=transcribe, inputs=[microphone, upload], outputs=[text_output, system_info])



iface.launch(debug=True)