Ilde commited on
Commit
73e6565
·
1 Parent(s): 26166f1

agregar download de tokenizador

Browse files
Files changed (1) hide show
  1. app.py +3 -1
app.py CHANGED
@@ -4,6 +4,8 @@ from nltk.tokenize import word_tokenize
4
  import gradio as gr
5
 
6
 
 
 
7
  # Use gensim Keyvectors to read the embbedings
8
  wordvectors_file_vec = 'smaller_model_spa.txt'
9
  smaller_model = KeyedVectors.load_word2vec_format(wordvectors_file_vec)
@@ -12,7 +14,7 @@ with open('stop_words.pkl', 'rb') as f:
12
 
13
 
14
  def filter_words(x):
15
- word_tokens = word_tokenize(x)
16
  filtered_sentence = [w for w in word_tokens if not w.lower() in stop_words]
17
  return filtered_sentence
18
 
 
4
  import gradio as gr
5
 
6
 
7
+ nltk.download('punkt')
8
+
9
  # Use gensim Keyvectors to read the embbedings
10
  wordvectors_file_vec = 'smaller_model_spa.txt'
11
  smaller_model = KeyedVectors.load_word2vec_format(wordvectors_file_vec)
 
14
 
15
 
16
  def filter_words(x):
17
+ word_tokens = word_tokenize(x, language = "spanish")
18
  filtered_sentence = [w for w in word_tokens if not w.lower() in stop_words]
19
  return filtered_sentence
20