thamnt commited on
Commit
1780683
·
verified ·
1 Parent(s): ec6e026

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -6
app.py CHANGED
@@ -10,12 +10,20 @@ from datetime import datetime
10
  import gradio as gr
11
  import io
12
 
13
- nltk.data.path.append("./nltk_data")
 
14
 
15
- nltk.download('stopwords')
16
- nltk.download('punkt')
17
- nltk.download('wordnet')
18
- nltk.download('punkt_tab')
 
 
 
 
 
 
 
19
 
20
  stop_words = set(stopwords.words('english'))
21
  lemmatizer = WordNetLemmatizer()
@@ -36,7 +44,9 @@ def capitalize_sentences(text):
36
 
37
  def process_transcript(csv_file, txt_file):
38
  transcript = pd.read_csv(csv_file)
39
- loi_chuan = pd.read_csv(txt_file, sep='\t', header=None)
 
 
40
 
41
  #transcript = pd.read_csv(io.StringIO(csv_file.read().decode("utf-8")))
42
  #loi_chuan = pd.read_csv(io.StringIO(txt_file.read().decode("utf-8")), sep='\t', header=None)
 
10
  import gradio as gr
11
  import io
12
 
13
+ nltk_data_dir = "./nltk_data"
14
+ nltk.data.path.append(nltk_data_dir)
15
 
16
+ nltk_resources = ["stopwords", "punkt", "wordnet"]
17
+ for resource in nltk_resources:
18
+ try:
19
+ nltk.data.find(resource)
20
+ except LookupError:
21
+ nltk.download(resource, download_dir=nltk_data_dir)
22
+
23
+ #nltk.download('stopwords')
24
+ #nltk.download('punkt')
25
+ #nltk.download('wordnet')
26
+ #nltk.download('punkt_tab')
27
 
28
  stop_words = set(stopwords.words('english'))
29
  lemmatizer = WordNetLemmatizer()
 
44
 
45
  def process_transcript(csv_file, txt_file):
46
  transcript = pd.read_csv(csv_file)
47
+ #loi_chuan = pd.read_csv(txt_file, sep='\t', header=None)
48
+ loi_chuan = pd.read_csv(txt_file.name, sep='\t', header=None, encoding='utf-8', engine='python')
49
+
50
 
51
  #transcript = pd.read_csv(io.StringIO(csv_file.read().decode("utf-8")))
52
  #loi_chuan = pd.read_csv(io.StringIO(txt_file.read().decode("utf-8")), sep='\t', header=None)