Yakobus Iryanto Prasethio commited on
Commit
e5e10bc
·
unverified ·
2 Parent(s): 585662b 1c1651d

Merge pull request #8 from YakobusIP/main

Browse files
core-model-prediction/gemma2b_dependencies.py CHANGED
@@ -43,7 +43,7 @@ class Gemma2BDependencies:
43
 
44
  def calculate_burstiness(self, text: str):
45
  # Tokenize the text using GPT-2 tokenizer
46
- tokens = self.tokenizer.tokenize(text)
47
 
48
  # Count token frequencies
49
  frequency_counts = list(Counter(tokens).values())
@@ -55,22 +55,3 @@ class Gemma2BDependencies:
55
  # Compute Variance-to-Mean Ratio (VMR) for burstiness
56
  vmr = variance / mean if mean > 0 else 0
57
  return vmr
58
-
59
- def get_embedding(self, text: str):
60
- inputs = self.tokenizer(text, return_tensors="pt",
61
- truncation=True, max_length=1024)
62
- inputs = {k: v.to(self.device) for k, v in inputs.items()}
63
-
64
- with torch.no_grad():
65
- outputs = self.model(**inputs, output_hidden_states=True)
66
-
67
- last_hidden_states = outputs.hidden_states[-1]
68
- # Average the token embeddings to get a sentence-level embedding
69
- embedding = torch.mean(last_hidden_states, dim=1)
70
- return embedding
71
-
72
- def calculate_cosine_similarity(self, question: str, answer: str):
73
- embedding1 = self.get_embedding(question)
74
- embedding2 = self.get_embedding(answer)
75
- # Ensure the embeddings are in the correct shape for cosine_similarity
76
- return cosine_similarity(embedding1, embedding2).item()
 
43
 
44
  def calculate_burstiness(self, text: str):
45
  # Tokenize the text using GPT-2 tokenizer
46
+ tokens = self.tokenizer.encode(text, add_special_tokens=False)
47
 
48
  # Count token frequencies
49
  frequency_counts = list(Counter(tokens).values())
 
55
  # Compute Variance-to-Mean Ratio (VMR) for burstiness
56
  vmr = variance / mean if mean > 0 else 0
57
  return vmr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
core-model-prediction/hypothesis.py CHANGED
@@ -3,24 +3,47 @@ import joblib
3
  import textstat
4
  import pandas as pd
5
  import numpy as np
6
- from collections import defaultdict, Counter
 
7
  from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
8
  from gemma2b_dependencies import Gemma2BDependencies
 
9
 
10
 
11
  class BaseModelHypothesis:
12
  def __init__(self):
13
  nltk.download('punkt')
 
14
  nltk.download('averaged_perceptron_tagger')
15
 
16
  self.analyzer = SentimentIntensityAnalyzer()
17
  self.lexicon_df = pd.read_csv(
18
- "https://storage.googleapis.com/ta-ai-detector/datasets/NRC-Emotion-Lexicon.csv")
19
  self.emotion_lexicon = self.process_emotion_lexicon()
 
20
  self.gemma2bdependencies = Gemma2BDependencies()
21
 
22
- self.features_normalized_text_length = []
23
- self.features_not_normalized = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  self.scaler_normalized_text_length = joblib.load(
26
  "scalers/scaler-normalized-text-length.joblib")
@@ -35,32 +58,43 @@ class BaseModelHypothesis:
35
  emotion_lexicon[row["word"]].append(row["emotion"])
36
  return emotion_lexicon
37
 
38
- def calculate_normalized_text_length_features(self, text: str) -> np.ndarray:
39
- self.features_normalized_text_length = self.extract_pos_features(
40
  text)
41
- self.features_normalized_text_length = self.features_normalized_text_length + \
42
- self.calculate_emotion_proportions(text)
43
- self.features_normalized_text_length.append(
44
- self.measure_unique_word_ratio(text))
45
-
46
- return self.scaler_normalized_text_length.transform(np.array(self.features_normalized_text_length).astype(np.float32).reshape(1, -1))
47
-
48
- def calculate_not_normalized_features(self, text: str) -> np.ndarray:
49
- self.features_not_normalized.append(
50
- self.measure_sentiment_intensity(text))
51
- self.features_not_normalized = self.features_not_normalized + \
52
- self.measure_readability(text)
53
- self.features_not_normalized.append(
54
- self.gemma2bdependencies.calculate_perplexity(text))
55
- self.features_not_normalized.append(
56
- self.gemma2bdependencies.calculate_burstiness(text))
57
-
58
- return self.scaler_not_normalized.transform(np.array(self.features_not_normalized).astype(np.float32).reshape(1, -1))
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  def extract_pos_features(self, text: str):
61
  words = nltk.word_tokenize(text)
62
  pos_tags = nltk.pos_tag(words)
63
- desired_tags = ["JJ", "VB", "RB", "PRP", "DT", "IN", "NN", "NNS"]
64
  pos_counts = defaultdict(int, {tag: 0 for tag in desired_tags})
65
 
66
  for _, pos in pos_tags:
@@ -83,20 +117,37 @@ class BaseModelHypothesis:
83
 
84
  return [gunning_fog, smog_index, dale_chall_score]
85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  def calculate_emotion_proportions(self, text: str):
87
  tokens = nltk.word_tokenize(text)
 
 
 
 
88
 
89
- total_tokens = len(tokens)
90
 
91
  emotion_counts = {emotion: 0 for emotion in [
92
  "negative", "positive", "fear", "anger", "trust", "sadness", "disgust", "anticipation", "joy", "surprise"]}
93
 
94
- for token in tokens:
95
- if token in self.emotion_lexicon:
96
- for emotion in self.emotion_lexicon[token]:
97
  emotion_counts[emotion] += 1
98
 
99
- proportions = {emotion: count / total_tokens for emotion,
100
  count in emotion_counts.items()}
101
 
102
  return [
@@ -105,9 +156,12 @@ class BaseModelHypothesis:
105
  ]
106
 
107
  def measure_unique_word_ratio(self, text: str):
108
- tokens = nltk.word_tokenize(text)
 
 
 
109
  total_words = len(tokens)
110
 
111
- unique_words = len(Counter(tokens).keys())
112
 
113
  return (unique_words / total_words)
 
3
  import textstat
4
  import pandas as pd
5
  import numpy as np
6
+ from typing import List
7
+ from collections import defaultdict
8
  from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
9
  from gemma2b_dependencies import Gemma2BDependencies
10
+ from string import punctuation
11
 
12
 
13
  class BaseModelHypothesis:
14
  def __init__(self):
15
  nltk.download('punkt')
16
+ nltk.download('wordnet')
17
  nltk.download('averaged_perceptron_tagger')
18
 
19
  self.analyzer = SentimentIntensityAnalyzer()
20
  self.lexicon_df = pd.read_csv(
21
+ "https://storage.googleapis.com/interview-ai-detector/higher-accuracy-final-model/NRC-Emotion-Lexicon.csv")
22
  self.emotion_lexicon = self.process_emotion_lexicon()
23
+ self.lemmatizer = nltk.stem.WordNetLemmatizer()
24
  self.gemma2bdependencies = Gemma2BDependencies()
25
 
26
+ self.additional_feature_columns = [
27
+ "nn_ratio", "nns_ratio", "jj_ratio", "in_ratio", "dt_ratio", "vb_ratio", "prp_ratio", "rb_ratio",
28
+ "compound_score", "gunning_fog", "smog_index", "dale_chall_score",
29
+ "negative_emotion_proportions", "positive_emotion_proportions", "fear_emotion_proportions",
30
+ "anger_emotion_proportions", "trust_emotion_proportions", "sadness_emotion_proportions",
31
+ "disgust_emotion_proportions", "anticipation_emotion_proportions", "joy_emotion_proportions",
32
+ "surprise_emotion_proportions", "unique_words_ratio", "perplexity", "burstiness"
33
+ ]
34
+
35
+ self.features_normalized_text_length = [
36
+ "nn_ratio", "nns_ratio", "jj_ratio", "in_ratio", "dt_ratio", "vb_ratio", "prp_ratio", "rb_ratio",
37
+ "negative_emotion_proportions", "positive_emotion_proportions", "fear_emotion_proportions",
38
+ "anger_emotion_proportions", "trust_emotion_proportions", "sadness_emotion_proportions",
39
+ "disgust_emotion_proportions", "anticipation_emotion_proportions", "joy_emotion_proportions",
40
+ "surprise_emotion_proportions", "unique_words_ratio"
41
+ ]
42
+
43
+ self.features_not_normalized = [
44
+ "compound_score", "gunning_fog", "smog_index", "dale_chall_score",
45
+ "perplexity", "burstiness"
46
+ ]
47
 
48
  self.scaler_normalized_text_length = joblib.load(
49
  "scalers/scaler-normalized-text-length.joblib")
 
58
  emotion_lexicon[row["word"]].append(row["emotion"])
59
  return emotion_lexicon
60
 
61
+ def calculate_features_dataframe(self, text: str) -> np.ndarray:
62
+ normalized_text_length_features = self.calculate_normalized_text_length_features(
63
  text)
64
+ not_normalized_features = self.calculate_not_normalized_features(text)
65
+ all_features = normalized_text_length_features + not_normalized_features
66
+ features_df = pd.DataFrame(
67
+ [all_features], columns=self.additional_feature_columns)
68
+
69
+ # Scaling features
70
+ features_df[self.features_normalized_text_length] = self.scaler_normalized_text_length.transform(
71
+ features_df[self.features_normalized_text_length])
72
+ features_df[self.features_not_normalized] = self.scaler_not_normalized.transform(
73
+ features_df[self.features_not_normalized])
74
+
75
+ ordered_df = features_df[self.additional_feature_columns]
76
+
77
+ return ordered_df.values.astype(np.float32).reshape(1, -1)
78
+
79
+ def calculate_normalized_text_length_features(self, text: str) -> List[float]:
80
+ pos_features = self.extract_pos_features(text)
81
+ emotion_features = self.calculate_emotion_proportions(text)
82
+ unique_word_ratio = [self.measure_unique_word_ratio(text)]
83
+ features = pos_features + emotion_features + unique_word_ratio
84
+ return features
85
+
86
+ def calculate_not_normalized_features(self, text: str) -> List[float]:
87
+ sentiment_intensity = self.measure_sentiment_intensity(text)
88
+ readability_scores = self.measure_readability(text)
89
+ perplexity = [self.gemma2bdependencies.calculate_perplexity(text)]
90
+ burstiness = [self.gemma2bdependencies.calculate_burstiness(text)]
91
+ features = sentiment_intensity + readability_scores + perplexity + burstiness
92
+ return features
93
 
94
  def extract_pos_features(self, text: str):
95
  words = nltk.word_tokenize(text)
96
  pos_tags = nltk.pos_tag(words)
97
+ desired_tags = ["NN", "NNS", "JJ", "IN", "DT", "VB", "PRP", "RB"]
98
  pos_counts = defaultdict(int, {tag: 0 for tag in desired_tags})
99
 
100
  for _, pos in pos_tags:
 
117
 
118
  return [gunning_fog, smog_index, dale_chall_score]
119
 
120
+ def __penn2morphy(self, penntag):
121
+ morphy_tag = {
122
+ 'NN': 'n', 'NNS': 'n', 'NNP': 'n', 'NNPS': 'n', # Nouns
123
+ 'JJ': 'a', 'JJR': 'a', 'JJS': 'a', # Adjectives
124
+ 'VB': 'v', 'VBD': 'v', 'VBG': 'v', 'VBN': 'v', 'VBP': 'v', 'VBZ': 'v', # Verbs
125
+ 'RB': 'r', 'RBR': 'r', 'RBS': 'r', # Adverbs
126
+ # Pronouns, determiners, prepositions, modal verbs
127
+ 'PRP': 'n', 'PRP$': 'n', 'DT': 'n', 'IN': 'n', 'MD': 'v',
128
+ # Others, treated as nouns unless a better fit is found
129
+ 'CC': 'n', 'CD': 'n', 'EX': 'n', 'FW': 'n', 'POS': 'n', 'TO': 'n', 'WDT': 'n', 'WP': 'n', 'WP$': 'n', 'WRB': 'n', 'PDT': 'n'
130
+ }
131
+ return morphy_tag.get(penntag[:2], 'n')
132
+
133
  def calculate_emotion_proportions(self, text: str):
134
  tokens = nltk.word_tokenize(text)
135
+ tagged_tokens = nltk.pos_tag(tokens)
136
+
137
+ lemmas = [self.lemmatizer.lemmatize(
138
+ token.lower(), pos=self.__penn2morphy(tag)) for token, tag in tagged_tokens]
139
 
140
+ total_lemmas = len(lemmas)
141
 
142
  emotion_counts = {emotion: 0 for emotion in [
143
  "negative", "positive", "fear", "anger", "trust", "sadness", "disgust", "anticipation", "joy", "surprise"]}
144
 
145
+ for lemma in lemmas:
146
+ if lemma in self.emotion_lexicon:
147
+ for emotion in self.emotion_lexicon[lemma]:
148
  emotion_counts[emotion] += 1
149
 
150
+ proportions = {emotion: count / total_lemmas for emotion,
151
  count in emotion_counts.items()}
152
 
153
  return [
 
156
  ]
157
 
158
  def measure_unique_word_ratio(self, text: str):
159
+ tokens = nltk.word_tokenize(text.lower())
160
+
161
+ tokens = [token for token in tokens if token not in punctuation]
162
+
163
  total_words = len(tokens)
164
 
165
+ unique_words = len(set(tokens))
166
 
167
  return (unique_words / total_words)
core-model-prediction/main_model.py CHANGED
@@ -5,31 +5,50 @@ import torch
5
  import numpy as np
6
 
7
 
8
- class AlbertCustomClassificationHead(nn.Module):
9
- def __init__(self, albert_model, num_additional_features=25, dropout_rate=0.1):
10
- super(AlbertCustomClassificationHead, self).__init__()
11
- self.albert_model = albert_model
12
- self.dropout = nn.Dropout(dropout_rate)
13
- self.classifier = nn.Linear(1024 + num_additional_features, 1)
14
-
15
- def forward(self, input_ids, attention_mask, additional_features, labels=None):
16
- albert_output = self.albert_model(
17
- input_ids=input_ids, attention_mask=attention_mask).pooler_output
 
 
 
18
 
19
- combined_features = torch.cat(
20
- [albert_output, additional_features], dim=1)
 
 
 
 
21
 
22
- dropout_output = self.dropout(combined_features)
 
 
 
 
 
 
 
23
 
24
- logits = self.classifier(dropout_output)
 
 
 
 
 
 
 
 
 
 
25
 
26
- if labels is not None:
27
- loss_fn = nn.BCEWithLogitsLoss()
28
- labels = labels.unsqueeze(1)
29
- loss = loss_fn(logits, labels.float())
30
- return logits, loss
31
- else:
32
- return logits
33
 
34
 
35
  class PredictMainModel:
@@ -47,10 +66,9 @@ class PredictMainModel:
47
  self.albert_model = AlbertModel.from_pretrained(self.model_name)
48
  self.device = DeviceManager()
49
 
50
- self.model = AlbertCustomClassificationHead(
51
  self.albert_model).to(self.device)
52
- # TODO : CHANGE MODEL STATE DICT PATH
53
- self.model.load_state_dict(torch.load("models/albert_model.pth"))
54
 
55
  def preprocess_input(self, text: str, additional_features: np.ndarray):
56
  encoding = self.tokenizer.encode_plus(
 
5
  import numpy as np
6
 
7
 
8
+ class AlbertSeparateTransformation(nn.Module):
9
+ def __init__(self, albert_model, num_additional_features=25,
10
+ hidden_size_albert=512, hidden_size_additional=128, classifier_hidden_size=256,
11
+ dropout_rate_albert=0.3, dropout_rate_additional=0.1, dropout_rate_classifier=0.1):
12
+ super(AlbertSeparateTransformation, self).__init__()
13
+ self.albert = albert_model
14
+
15
+ # Transform ALBERT's features to an intermediate space
16
+ self.albert_feature_transform = nn.Sequential(
17
+ nn.Linear(1024, hidden_size_albert),
18
+ nn.ReLU(),
19
+ nn.Dropout(dropout_rate_albert),
20
+ )
21
 
22
+ # Transform additional features to an intermediate space
23
+ self.additional_feature_transform = nn.Sequential(
24
+ nn.Linear(num_additional_features, hidden_size_additional),
25
+ nn.ReLU(),
26
+ nn.Dropout(dropout_rate_additional),
27
+ )
28
 
29
+ # Combine both transformed features and process for final prediction
30
+ self.classifier = nn.Sequential(
31
+ nn.Linear(hidden_size_albert + hidden_size_additional,
32
+ classifier_hidden_size),
33
+ nn.ReLU(),
34
+ nn.Dropout(dropout_rate_classifier),
35
+ nn.Linear(classifier_hidden_size, 1)
36
+ )
37
 
38
+ def forward(self, input_ids, attention_mask, additional_features):
39
+ albert_output = self.albert(
40
+ input_ids=input_ids, attention_mask=attention_mask).pooler_output
41
+
42
+ transformed_albert_features = self.albert_feature_transform(
43
+ albert_output)
44
+ transformed_additional_features = self.additional_feature_transform(
45
+ additional_features)
46
+
47
+ combined_features = torch.cat(
48
+ (transformed_albert_features, transformed_additional_features), dim=1)
49
 
50
+ logits = self.classifier(combined_features)
51
+ return logits
 
 
 
 
 
52
 
53
 
54
  class PredictMainModel:
 
66
  self.albert_model = AlbertModel.from_pretrained(self.model_name)
67
  self.device = DeviceManager()
68
 
69
+ self.model = AlbertSeparateTransformation(
70
  self.albert_model).to(self.device)
71
+ self.model.load_state_dict(torch.load("models/albert_weights.pth"))
 
72
 
73
  def preprocess_input(self, text: str, additional_features: np.ndarray):
74
  encoding = self.tokenizer.encode_plus(
core-model-prediction/models/{albert_model.pth → albert_weights.pth} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b203b54caba10e290830cf720c8cd35093a358792e996ee04ee7d8e5e341651
3
- size 70752187
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59184c88c7921ac5f115aa0b10b3224536b5f7d7ebb6cf07fd45eecccfcff3ae
3
+ size 73519347
core-model-prediction/models/random_forest.joblib CHANGED
Binary files a/core-model-prediction/models/random_forest.joblib and b/core-model-prediction/models/random_forest.joblib differ
 
core-model-prediction/prediction.py CHANGED
@@ -40,29 +40,27 @@ def process_instance(data: PredictRequest):
40
  typing_duration = data.typing_duration
41
  letter_click_counts = data.letter_click_counts
42
 
 
43
  hypothesis = BaseModelHypothesis()
44
- features_normalized_text_length = hypothesis.calculate_normalized_text_length_features(
45
- answer)
46
- features_not_normalized = hypothesis.calculate_not_normalized_features(
47
- answer)
48
-
49
- combined_additional_features = np.concatenate(
50
- (features_normalized_text_length, features_not_normalized), axis=1)
51
 
 
52
  main_model = PredictMainModel()
53
  main_model_probability = main_model.predict(
54
- answer, combined_additional_features)
55
 
 
56
  random_forest_features = RandomForestDependencies()
57
  secondary_model_features = random_forest_features.calculate_features(
58
- question, answer, main_model_probability, backspace_count, typing_duration, letter_click_counts)
59
 
 
60
  secondary_model = RandomForestModel()
61
  secondary_model_prediction = secondary_model.predict(
62
  secondary_model_features)
63
 
64
  return {
65
- "prediction_class": "AI" if secondary_model_prediction == 1 else "HUMAN",
66
  "details": {
67
  "main_model_probability": str(main_model_probability),
68
  "final_prediction": secondary_model_prediction
 
40
  typing_duration = data.typing_duration
41
  letter_click_counts = data.letter_click_counts
42
 
43
+ # Data preparation for 1st model
44
  hypothesis = BaseModelHypothesis()
45
+ additional_features = hypothesis.calculate_features_dataframe(answer)
 
 
 
 
 
 
46
 
47
+ # 1st model prediction
48
  main_model = PredictMainModel()
49
  main_model_probability = main_model.predict(
50
+ answer, additional_features)
51
 
52
+ # Data preparation for 2nd model
53
  random_forest_features = RandomForestDependencies()
54
  secondary_model_features = random_forest_features.calculate_features(
55
+ answer, main_model_probability, backspace_count, typing_duration, letter_click_counts)
56
 
57
+ # 2nd model prediction
58
  secondary_model = RandomForestModel()
59
  secondary_model_prediction = secondary_model.predict(
60
  secondary_model_features)
61
 
62
  return {
63
+ "predicted_class": "AI" if secondary_model_prediction == 1 else "HUMAN",
64
  "details": {
65
  "main_model_probability": str(main_model_probability),
66
  "final_prediction": secondary_model_prediction
core-model-prediction/random_forest_dependencies.py CHANGED
@@ -3,19 +3,14 @@ from collections import Counter
3
 
4
 
5
  class RandomForestDependencies:
6
- def __init__(self):
7
- self.gemma2bdependencies = Gemma2BDependencies()
8
-
9
- def calculate_features(self, question: str, answer: str, probability: float, backspace_count: int, typing_duration: int, letter_click_counts: dict[str, int]):
10
- cosine_similarity = self.gemma2bdependencies.calculate_cosine_similarity(
11
- question, answer)
12
  backspace_count_normalized = backspace_count / len(answer)
13
  typing_duration_normalized = typing_duration / len(answer)
14
  letter_discrepancy = self.calculate_letter_discrepancy(
15
  answer, letter_click_counts)
16
 
17
  return [
18
- cosine_similarity, probability, backspace_count_normalized,
19
  typing_duration_normalized, letter_discrepancy
20
  ]
21
 
 
3
 
4
 
5
  class RandomForestDependencies:
6
+ def calculate_features(self, answer: str, probability: float, backspace_count: int, typing_duration: int, letter_click_counts: dict[str, int]):
 
 
 
 
 
7
  backspace_count_normalized = backspace_count / len(answer)
8
  typing_duration_normalized = typing_duration / len(answer)
9
  letter_discrepancy = self.calculate_letter_discrepancy(
10
  answer, letter_click_counts)
11
 
12
  return [
13
+ probability, backspace_count_normalized,
14
  typing_duration_normalized, letter_discrepancy
15
  ]
16
 
core-model-prediction/random_forest_model.py CHANGED
@@ -1,5 +1,6 @@
1
  import joblib
2
  import numpy as np
 
3
  from typing import List
4
 
5
 
@@ -7,9 +8,15 @@ class RandomForestModel:
7
  def __init__(self):
8
  self.scaler = joblib.load("scalers/rf_scaler.joblib")
9
  self.model = joblib.load("models/random_forest.joblib")
 
 
 
10
 
11
  def preprocess_input(self, secondary_model_features: List[float]) -> np.ndarray:
12
- return self.scaler.transform(np.array(secondary_model_features).astype(np.float32).reshape(1, -1))
 
 
 
13
 
14
  def predict(self, secondary_model_features: List[float]):
15
  return int(self.model.predict(self.preprocess_input(secondary_model_features))[0])
 
1
  import joblib
2
  import numpy as np
3
+ import pandas as pd
4
  from typing import List
5
 
6
 
 
8
  def __init__(self):
9
  self.scaler = joblib.load("scalers/rf_scaler.joblib")
10
  self.model = joblib.load("models/random_forest.joblib")
11
+ self.secondary_model_features = [
12
+ "machine_probability", "backspace_count_normalized", "typing_duration_normalized", "letter_discrepancy_normalized"
13
+ ]
14
 
15
  def preprocess_input(self, secondary_model_features: List[float]) -> np.ndarray:
16
+ features_df = pd.DataFrame([secondary_model_features], columns=[
17
+ self.secondary_model_features])
18
+ features_df = self.scaler.transform(features_df)
19
+ return features_df.values.astype(np.float32).reshape(1, -1)
20
 
21
  def predict(self, secondary_model_features: List[float]):
22
  return int(self.model.predict(self.preprocess_input(secondary_model_features))[0])
core-model-prediction/requirements.txt CHANGED
@@ -2,8 +2,8 @@ nltk
2
  vaderSentiment
3
  pandas
4
  textstat
5
- scikit-learn==1.4.1.post1
6
- transformers
7
  fastapi
8
  uvicorn
9
  google-cloud-secret-manager
 
2
  vaderSentiment
3
  pandas
4
  textstat
5
+ scikit-learn==1.2.2
6
+ transformers==4.38.2
7
  fastapi
8
  uvicorn
9
  google-cloud-secret-manager
core-model-prediction/scalers/rf_scaler.joblib CHANGED
Binary files a/core-model-prediction/scalers/rf_scaler.joblib and b/core-model-prediction/scalers/rf_scaler.joblib differ
 
core-model-prediction/scalers/{scaler-normalized-text-length.joblib → torch-scaler-normalized-text-length.joblib} RENAMED
Binary files a/core-model-prediction/scalers/scaler-normalized-text-length.joblib and b/core-model-prediction/scalers/torch-scaler-normalized-text-length.joblib differ
 
core-model-prediction/scalers/{scaler-not-normalized.joblib → torch-scaler-not-normalized.joblib} RENAMED
Binary files a/core-model-prediction/scalers/scaler-not-normalized.joblib and b/core-model-prediction/scalers/torch-scaler-not-normalized.joblib differ