Sid26Roy commited on
Commit
3937682
·
verified ·
1 Parent(s): a1ea0ee

Upload 2 files

Browse files
Files changed (2) hide show
  1. app (8).py +53 -0
  2. requirements (3).txt +114 -0
app (8).py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
4
+ import os
5
+ import sys
6
+
7
+ # Add local IndicTransToolkit path
8
+ sys.path.append(os.path.abspath("libs/IndicTransToolkit"))
9
+ from IndicTransToolkit.processor import IndicProcessor
10
+
11
+ # Load processor and model
12
+ ip = IndicProcessor(inference=True)
13
+ tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indictrans2-en-indic-dist-200M", trust_remote_code=True)
14
+ model = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/indictrans2-en-indic-dist-200M", trust_remote_code=True)
15
+
16
+ LANG_OPTIONS = [
17
+ "hin_Deva", "ben_Beng", "pan_Guru", "guj_Gujr",
18
+ "tam_Taml", "tel_Telu", "mal_Mlym",
19
+ "mar_Deva", "kan_Knda", "asm_Beng"
20
+ ]
21
+
22
+ def translate(text, target_lang):
23
+ if not text.strip():
24
+ return "Please enter some text."
25
+
26
+ try:
27
+ batch = ip.preprocess_batch([text], src_lang="eng_Latn", tgt_lang=target_lang)
28
+ batch = tokenizer(batch, padding="longest", truncation=True, max_length=256, return_tensors="pt")
29
+
30
+ with torch.inference_mode():
31
+ outputs = model.generate(**batch, num_beams=5, max_length=256)
32
+
33
+ with tokenizer.as_target_tokenizer():
34
+ decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True, clean_up_tokenization_spaces=True)
35
+
36
+ translated = ip.postprocess_batch(decoded, lang=target_lang)[0]
37
+ return translated
38
+ except Exception as e:
39
+ return f"Error: {e}"
40
+
41
+ demo = gr.Interface(
42
+ fn=translate,
43
+ inputs=[
44
+ gr.Textbox(label="Enter text in English", lines=5),
45
+ gr.Dropdown(choices=LANG_OPTIONS, label="Select Target Language")
46
+ ],
47
+ outputs="text",
48
+ title="IndicTrans Translator",
49
+ description="Translate English text into Indian languages using IndicTrans2."
50
+ )
51
+
52
+ if __name__ == "__main__":
53
+ demo.launch()
requirements (3).txt ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ alabaster==1.0.0
3
+ annotated-types==0.7.0
4
+ anyio==4.9.0
5
+ asttokens==3.0.0
6
+ babel==2.17.0
7
+ certifi==2025.1.31
8
+ charset-normalizer==3.4.1
9
+ click==8.1.8
10
+ colorama==0.4.6
11
+ comm==0.2.2
12
+ Cython==3.0.12
13
+ debugpy==1.8.13
14
+ decorator==5.2.1
15
+ docutils==0.21.2
16
+ executing==2.2.0
17
+ fastapi==0.115.12
18
+ ffmpy==0.5.0
19
+ filelock==3.18.0
20
+ fsspec==2025.3.2
21
+ gradio==5.23.3
22
+ gradio_client==1.8.0
23
+ groovy==0.1.2
24
+ h11==0.14.0
25
+ httpcore==1.0.7
26
+ httpx==0.28.1
27
+ huggingface-hub==0.30.1
28
+ idna==3.10
29
+ imagesize==1.4.1
30
+ -e git+https://github.com/VarunGumma/IndicTransToolkit@9ca9208435d2d24514b592286e89eb115483d2fb#egg=IndicTransToolkit
31
+ ipykernel==6.29.5
32
+ ipython
33
+ ipython_pygments_lexers==1.1.1
34
+ jedi==0.19.2
35
+ Jinja2==3.1.6
36
+ joblib==1.4.2
37
+ jupyter_client==8.6.3
38
+ jupyter_core==5.7.2
39
+ lxml==5.3.1
40
+ markdown-it-py==3.0.0
41
+ MarkupSafe==3.0.2
42
+ matplotlib-inline==0.1.7
43
+ mdurl==0.1.2
44
+ Morfessor==2.0.6
45
+ mpmath==1.3.0
46
+ nest-asyncio==1.6.0
47
+ networkx==3.4.2
48
+ numpy==2.2.4
49
+ orjson==3.10.16
50
+ packaging==24.2
51
+ pandas==2.2.3
52
+ parso==0.8.4
53
+ pillow==11.1.0
54
+ platformdirs==4.3.7
55
+ portalocker==3.1.1
56
+ prompt_toolkit==3.0.50
57
+ psutil==7.0.0
58
+ pure_eval==0.2.3
59
+ pydantic==2.11.2
60
+ pydantic_core==2.33.1
61
+ pydub==0.25.1
62
+ Pygments==2.19.1
63
+ python-dateutil==2.9.0.post0
64
+ python-multipart==0.0.20
65
+ pytz
66
+ PyYAML==6.0.2
67
+ pyzmq==26.3.0
68
+ regex==2024.11.6
69
+ requests==2.32.3
70
+ rich==14.0.0
71
+ roman-numerals-py==3.1.0
72
+ ruff==0.11.4
73
+ sacrebleu==2.5.1
74
+ sacremoses==0.1.1
75
+ safehttpx==0.1.6
76
+ safetensors==0.5.3
77
+ semantic-version==2.10.0
78
+ sentencepiece
79
+ setuptools==75.8.0
80
+ shellingham==1.5.4
81
+ six==1.17.0
82
+ sniffio==1.3.1
83
+ snowballstemmer==2.2.0
84
+ Sphinx
85
+ sphinx-argparse==0.5.2
86
+ sphinx-rtd-theme==3.0.2
87
+ sphinxcontrib-applehelp==2.0.0
88
+ sphinxcontrib-devhelp==2.0.0
89
+ sphinxcontrib-htmlhelp==2.1.0
90
+ sphinxcontrib-jquery==4.1
91
+ sphinxcontrib-jsmath==1.0.1
92
+ sphinxcontrib-qthelp==2.0.0
93
+ sphinxcontrib-serializinghtml==2.0.0
94
+ stack-data==0.6.3
95
+ starlette==0.46.1
96
+ sympy==1.13.1
97
+ tabulate==0.9.0
98
+ tokenizers==0.21.1
99
+ tomlkit==0.13.2
100
+ torch==2.6.0
101
+ tornado==6.4.2
102
+ tqdm==4.67.1
103
+ traitlets==5.14.3
104
+ transformers==4.50.3
105
+ typer==0.15.2
106
+ typing-inspection==0.4.0
107
+ typing_extensions==4.13.1
108
+ tzdata==2025.2
109
+ urllib3==2.3.0
110
+ uvicorn==0.34.0
111
+ wcwidth==0.2.13
112
+ websockets==15.0.1
113
+ wheel==0.45.1
114
+ gradio>=4.0.0