bearking58 commited on
Commit
3953432
·
1 Parent(s): 9c34cdf

fix: try to fix wordnet not unzipping

Browse files
Files changed (1) hide show
  1. core-model-prediction/hypothesis.py +14 -4
core-model-prediction/hypothesis.py CHANGED
@@ -8,14 +8,12 @@ from collections import defaultdict
8
  from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
9
  from gemma2b_dependencies import Gemma2BDependencies
10
  from string import punctuation
 
 
11
 
12
 
13
  class BaseModelHypothesis:
14
  def __init__(self):
15
- nltk.download('punkt')
16
- nltk.download('wordnet')
17
- nltk.download('averaged_perceptron_tagger')
18
-
19
  self.analyzer = SentimentIntensityAnalyzer()
20
  self.lexicon_df = pd.read_csv(
21
  "https://storage.googleapis.com/interview-ai-detector/higher-accuracy-final-model/NRC-Emotion-Lexicon.csv")
@@ -50,6 +48,18 @@ class BaseModelHypothesis:
50
  self.scaler_not_normalized = joblib.load(
51
  "scalers/scaler-not-normalized.joblib")
52
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  def process_emotion_lexicon(self):
54
  emotion_lexicon = {}
55
  for _, row in self.lexicon_df.iterrows():
 
8
  from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
9
  from gemma2b_dependencies import Gemma2BDependencies
10
  from string import punctuation
11
+ import os
12
+ import zipfile
13
 
14
 
15
  class BaseModelHypothesis:
16
  def __init__(self):
 
 
 
 
17
  self.analyzer = SentimentIntensityAnalyzer()
18
  self.lexicon_df = pd.read_csv(
19
  "https://storage.googleapis.com/interview-ai-detector/higher-accuracy-final-model/NRC-Emotion-Lexicon.csv")
 
48
  self.scaler_not_normalized = joblib.load(
49
  "scalers/scaler-not-normalized.joblib")
50
 
51
+ def download_and_extract_nltk_data(self):
52
+ nltk.download('punkt')
53
+ nltk.download('wordnet')
54
+ nltk.download('averaged_perceptron_tagger')
55
+
56
+ wordnet_dir = nltk.data.find("corpora/wordnet").path
57
+ if not os.path.exists(wordnet_dir):
58
+ zip_path = os.path.join(
59
+ os.path.dirname(wordnet_dir), "wordnet.zip")
60
+ with zipfile.ZipFile(zip_path, "r") as zip_ref:
61
+ zip_ref.extractall(os.path.dirname(wordnet_dir))
62
+
63
  def process_emotion_lexicon(self):
64
  emotion_lexicon = {}
65
  for _, row in self.lexicon_df.iterrows():