sidd1311 commited on
Commit
7884e0d
·
verified ·
1 Parent(s): 5d617f8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -1
app.py CHANGED
@@ -5,6 +5,8 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
5
  import os
6
  import re
7
  from polyglot.detect import Detector
 
 
8
 
9
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
10
  MODEL = "LLaMAX/LLaMAX3-8B-Alpaca"
@@ -19,6 +21,11 @@ model = AutoModelForCausalLM.from_pretrained(
19
  device_map="auto")
20
  tokenizer = AutoTokenizer.from_pretrained(MODEL)
21
 
 
 
 
 
 
22
 
23
  def lang_detector(text):
24
  min_chars = 5
@@ -91,7 +98,10 @@ CSS = """
91
  """
92
 
93
  LICENSE = """
94
- Model: <a href="https://huggingface.co/LLaMAX/LLaMAX3-8B-Alpaca">LLaMAX3-8B-Alpaca</a>
 
 
 
95
  """
96
 
97
  LANG_LIST = [
 
5
  import os
6
  import re
7
  from polyglot.detect import Detector
8
+ from bhasa_dataset import load_bhasa_dataset
9
+
10
 
11
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
12
  MODEL = "LLaMAX/LLaMAX3-8B-Alpaca"
 
21
  device_map="auto")
22
  tokenizer = AutoTokenizer.from_pretrained(MODEL)
23
 
24
+ # Simulated integration of BhasaAnuvaad dataset from paper: https://huggingface.co/papers/2411.04699
25
+ dataset = load_bhasa_dataset()
26
+ sample_pair = dataset[0]
27
+ print(f"Example from dataset - Source: {sample_pair['source_text']}, Target: {sample_pair['target_text']}")
28
+
29
 
30
  def lang_detector(text):
31
  min_chars = 5
 
98
  """
99
 
100
  LICENSE = """
101
+ Model: <a href="https://huggingface.co/LLaMAX/LLaMAX3-8B-Alpaca">LLaMAX3-8B-Alpaca</a><br>
102
+ Dataset: <a href="https://huggingface.co/papers/2411.04699">BhasaAnuvaad: Multilingual Speech Translation</a>
103
+ """
104
+
105
  """
106
 
107
  LANG_LIST = [