zinoubm commited on
Commit
0013d95
·
1 Parent(s): 867485b

initial commit

Browse files
Files changed (3) hide show
  1. .env +2 -0
  2. app.py +36 -0
  3. requirements.txt +10 -0
.env ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ OPENAI_API_KEY = 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
2
+ MODEL = ada:ft-personal-2023-02-17-17-56-33
app.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import numpy as np
4
+ import librosa
5
+ import torch
6
+ from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration
7
+ from dotenv import load_dotenv
8
+
9
+ load_dotenv()
10
+
11
+ os.environ["PATH"] += ".\env\Lib\site-packages\ffprobe"
12
+
13
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
14
+ MODEL = os.getenv("MODEL")
15
+
16
+ model = Speech2TextForConditionalGeneration.from_pretrained(
17
+ "facebook/s2t-small-librispeech-asr"
18
+ )
19
+ processor = Speech2TextProcessor.from_pretrained("facebook/s2t-small-librispeech-asr")
20
+
21
+
22
+ def transcribe(audio):
23
+ input, rate = librosa.load(
24
+ audio, sr=16000
25
+ ) # Downsample original frequency to 16000hrz
26
+ inputs = processor(input, sampling_rate=rate, return_tensors="pt")
27
+ generated_ids = model.generate(
28
+ inputs["input_features"], attention_mask=inputs["attention_mask"]
29
+ )
30
+ transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)
31
+ return transcription
32
+
33
+
34
+ gr.Interface(
35
+ fn=transcribe, inputs=gr.Audio(source="microphone", type="filepath"), outputs="text"
36
+ ).launch()
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ torchaudio
3
+ sentencepiece
4
+ transformers
5
+ openai
6
+ sounddevice
7
+ librosa
8
+ dotenv
9
+ ffmpeg
10
+ ffprobe