bearking58 commited on
Commit
8f0525e
·
1 Parent(s): 236bdc5

fix: prepare next build for nltk fix

Browse files
core-model-prediction/Dockerfile CHANGED
@@ -10,6 +10,8 @@ COPY . /app
10
  # Install any needed packages specified in requirements.txt
11
  RUN pip install --no-cache-dir -r requirements.txt
12
 
 
 
13
  # Make port 8080 available to the world outside this container
14
  EXPOSE 8080
15
 
 
10
  # Install any needed packages specified in requirements.txt
11
  RUN pip install --no-cache-dir -r requirements.txt
12
 
13
+ RUN python -m nltk.downloader punkt wordnet averaged_perceptron_tagger
14
+
15
  # Make port 8080 available to the world outside this container
16
  EXPOSE 8080
17
 
core-model-prediction/hypothesis.py CHANGED
@@ -14,7 +14,6 @@ import zipfile
14
 
15
  class BaseModelHypothesis:
16
  def __init__(self):
17
- self.download_and_extract_nltk_data()
18
  self.analyzer = SentimentIntensityAnalyzer()
19
  self.lexicon_df = pd.read_csv(
20
  "https://storage.googleapis.com/interview-ai-detector/higher-accuracy-final-model/NRC-Emotion-Lexicon.csv")
@@ -49,18 +48,6 @@ class BaseModelHypothesis:
49
  self.scaler_not_normalized = joblib.load(
50
  "scalers/scaler-not-normalized.joblib")
51
 
52
- def download_and_extract_nltk_data(self):
53
- nltk.download('punkt')
54
- nltk.download('wordnet')
55
- nltk.download('averaged_perceptron_tagger')
56
-
57
- wordnet_dir = nltk.data.find("corpora/wordnet").path
58
- if not os.path.exists(wordnet_dir):
59
- zip_path = os.path.join(
60
- os.path.dirname(wordnet_dir), "wordnet.zip")
61
- with zipfile.ZipFile(zip_path, "r") as zip_ref:
62
- zip_ref.extractall(os.path.dirname(wordnet_dir))
63
-
64
  def process_emotion_lexicon(self):
65
  emotion_lexicon = {}
66
  for _, row in self.lexicon_df.iterrows():
 
14
 
15
  class BaseModelHypothesis:
16
  def __init__(self):
 
17
  self.analyzer = SentimentIntensityAnalyzer()
18
  self.lexicon_df = pd.read_csv(
19
  "https://storage.googleapis.com/interview-ai-detector/higher-accuracy-final-model/NRC-Emotion-Lexicon.csv")
 
48
  self.scaler_not_normalized = joblib.load(
49
  "scalers/scaler-not-normalized.joblib")
50
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  def process_emotion_lexicon(self):
52
  emotion_lexicon = {}
53
  for _, row in self.lexicon_df.iterrows():