Update app.py
Browse files
app.py
CHANGED
@@ -10,12 +10,20 @@ from datetime import datetime
|
|
10 |
import gradio as gr
|
11 |
import io
|
12 |
|
13 |
-
|
|
|
14 |
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
nltk.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
stop_words = set(stopwords.words('english'))
|
21 |
lemmatizer = WordNetLemmatizer()
|
@@ -36,7 +44,9 @@ def capitalize_sentences(text):
|
|
36 |
|
37 |
def process_transcript(csv_file, txt_file):
|
38 |
transcript = pd.read_csv(csv_file)
|
39 |
-
loi_chuan = pd.read_csv(txt_file, sep='\t', header=None)
|
|
|
|
|
40 |
|
41 |
#transcript = pd.read_csv(io.StringIO(csv_file.read().decode("utf-8")))
|
42 |
#loi_chuan = pd.read_csv(io.StringIO(txt_file.read().decode("utf-8")), sep='\t', header=None)
|
|
|
10 |
import gradio as gr
|
11 |
import io
|
12 |
|
13 |
+
nltk_data_dir = "./nltk_data"
|
14 |
+
nltk.data.path.append(nltk_data_dir)
|
15 |
|
16 |
+
nltk_resources = ["stopwords", "punkt", "wordnet"]
|
17 |
+
for resource in nltk_resources:
|
18 |
+
try:
|
19 |
+
nltk.data.find(resource)
|
20 |
+
except LookupError:
|
21 |
+
nltk.download(resource, download_dir=nltk_data_dir)
|
22 |
+
|
23 |
+
#nltk.download('stopwords')
|
24 |
+
#nltk.download('punkt')
|
25 |
+
#nltk.download('wordnet')
|
26 |
+
#nltk.download('punkt_tab')
|
27 |
|
28 |
stop_words = set(stopwords.words('english'))
|
29 |
lemmatizer = WordNetLemmatizer()
|
|
|
44 |
|
45 |
def process_transcript(csv_file, txt_file):
|
46 |
transcript = pd.read_csv(csv_file)
|
47 |
+
#loi_chuan = pd.read_csv(txt_file, sep='\t', header=None)
|
48 |
+
loi_chuan = pd.read_csv(txt_file.name, sep='\t', header=None, encoding='utf-8', engine='python')
|
49 |
+
|
50 |
|
51 |
#transcript = pd.read_csv(io.StringIO(csv_file.read().decode("utf-8")))
|
52 |
#loi_chuan = pd.read_csv(io.StringIO(txt_file.read().decode("utf-8")), sep='\t', header=None)
|