Kartheesh commited on
Commit
5803861
·
verified ·
1 Parent(s): 0594aa4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -0
app.py CHANGED
@@ -1,2 +1,70 @@
 
 
 
 
 
1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
 
1
+ import fitz # PyMuPDF
2
+ from transformers import VitsModel, MBartForConditionalGeneration, AutoTokenizer
3
+ import torch
4
+ import soundfile as sf
5
+ import gradio as gr
6
 
7
+ # Load the translation model and tokenizer
8
+ translation_tokenizer = AutoTokenizer.from_pretrained("facebook/mbart-large-50-one-to-many-mmt", use_fast=False)
9
+ translation_model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-one-to-many-mmt")
10
+
11
+ # Load the TTS model and tokenizer
12
+ tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-hin")
13
+ tts_model = VitsModel.from_pretrained("facebook/mms-tts-hin")
14
+
15
+ def extract_text_from_pdf(pdf_file):
16
+ """Extract text from a PDF file."""
17
+ doc = fitz.open(pdf_file)
18
+ text = ""
19
+ for page in doc:
20
+ text += page.get_text()
21
+ return text
22
+
23
+ def process_pdf(pdf_file):
24
+ # Extract text from the PDF
25
+ input_text = extract_text_from_pdf(pdf_file)
26
+
27
+ # Convert sentences to tensors
28
+ model_inputs = translation_tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)
29
+
30
+ # Translate from English to Hindi
31
+ generated_tokens = translation_model.generate(
32
+ **model_inputs,
33
+ forced_bos_token_id=translation_tokenizer.lang_code_to_id["hi_IN"]
34
+ )
35
+
36
+ # Decode the translated tokens to text
37
+ translation = translation_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
38
+ translated_text = " ".join(translation) # Join all translated sentences
39
+
40
+ # Tokenize the translated text for TTS
41
+ tts_inputs = tts_tokenizer(translated_text, return_tensors="pt")
42
+
43
+ # Generate the waveform
44
+ try:
45
+ with torch.no_grad():
46
+ tts_output = tts_model(**tts_inputs)
47
+ waveform = tts_output.waveform.squeeze().cpu().numpy()
48
+ except RuntimeError as e:
49
+ return f"Runtime Error: {e}"
50
+
51
+ # Save the waveform to an audio file
52
+ audio_path = "output.wav"
53
+ sf.write(audio_path, waveform, 22050)
54
+
55
+ return audio_path
56
+
57
+ def gradio_interface(pdf_file):
58
+ audio_path = process_pdf(pdf_file.name)
59
+ return audio_path
60
+
61
+ # Create the Gradio interface
62
+ iface = gr.Interface(
63
+ fn=gradio_interface,
64
+ inputs=gr.File(file_count="single"),
65
+ outputs="audio"
66
+ )
67
+
68
+ # Launch the Gradio app
69
+ iface.launch(debug=True)
70