Spaces:
Running
Running
Commit
·
3953432
1
Parent(s):
9c34cdf
fix: try to fix wordnet not unzipping
Browse files
core-model-prediction/hypothesis.py
CHANGED
@@ -8,14 +8,12 @@ from collections import defaultdict
|
|
8 |
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
|
9 |
from gemma2b_dependencies import Gemma2BDependencies
|
10 |
from string import punctuation
|
|
|
|
|
11 |
|
12 |
|
13 |
class BaseModelHypothesis:
|
14 |
def __init__(self):
|
15 |
-
nltk.download('punkt')
|
16 |
-
nltk.download('wordnet')
|
17 |
-
nltk.download('averaged_perceptron_tagger')
|
18 |
-
|
19 |
self.analyzer = SentimentIntensityAnalyzer()
|
20 |
self.lexicon_df = pd.read_csv(
|
21 |
"https://storage.googleapis.com/interview-ai-detector/higher-accuracy-final-model/NRC-Emotion-Lexicon.csv")
|
@@ -50,6 +48,18 @@ class BaseModelHypothesis:
|
|
50 |
self.scaler_not_normalized = joblib.load(
|
51 |
"scalers/scaler-not-normalized.joblib")
|
52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
def process_emotion_lexicon(self):
|
54 |
emotion_lexicon = {}
|
55 |
for _, row in self.lexicon_df.iterrows():
|
|
|
8 |
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
|
9 |
from gemma2b_dependencies import Gemma2BDependencies
|
10 |
from string import punctuation
|
11 |
+
import os
|
12 |
+
import zipfile
|
13 |
|
14 |
|
15 |
class BaseModelHypothesis:
|
16 |
def __init__(self):
|
|
|
|
|
|
|
|
|
17 |
self.analyzer = SentimentIntensityAnalyzer()
|
18 |
self.lexicon_df = pd.read_csv(
|
19 |
"https://storage.googleapis.com/interview-ai-detector/higher-accuracy-final-model/NRC-Emotion-Lexicon.csv")
|
|
|
48 |
self.scaler_not_normalized = joblib.load(
|
49 |
"scalers/scaler-not-normalized.joblib")
|
50 |
|
51 |
+
def download_and_extract_nltk_data(self):
|
52 |
+
nltk.download('punkt')
|
53 |
+
nltk.download('wordnet')
|
54 |
+
nltk.download('averaged_perceptron_tagger')
|
55 |
+
|
56 |
+
wordnet_dir = nltk.data.find("corpora/wordnet").path
|
57 |
+
if not os.path.exists(wordnet_dir):
|
58 |
+
zip_path = os.path.join(
|
59 |
+
os.path.dirname(wordnet_dir), "wordnet.zip")
|
60 |
+
with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
61 |
+
zip_ref.extractall(os.path.dirname(wordnet_dir))
|
62 |
+
|
63 |
def process_emotion_lexicon(self):
|
64 |
emotion_lexicon = {}
|
65 |
for _, row in self.lexicon_df.iterrows():
|