bearking58 commited on
Commit
279839c
·
1 Parent(s): 469ec6b

feat: new gpt4o usage

Browse files
core-model-prediction/models/rf_weights.joblib DELETED
Binary file (228 kB)
 
core-model-prediction/models/secondary_weights.joblib ADDED
Binary file (49.8 kB). View file
 
core-model-prediction/prediction.py CHANGED
@@ -2,7 +2,7 @@ from fastapi import FastAPI, Response, status
2
  from pydantic import BaseModel
3
  from hypothesis import BaseModelHypothesis
4
  from secondary_model_dependencies import SecondaryModelDependencies
5
- from random_forest_model import RandomForestModel
6
  from main_model import PredictMainModel
7
  import numpy as np
8
  from typing import List
@@ -17,7 +17,7 @@ class PredictRequest(BaseModel):
17
  typing_duration: int
18
  letter_click_counts: dict[str, int]
19
  gpt35_answer: str
20
- gpt4_answer: str
21
 
22
 
23
  class RequestModel(BaseModel):
@@ -42,7 +42,7 @@ def process_instance(data: PredictRequest):
42
  typing_duration = data.typing_duration
43
  letter_click_counts = data.letter_click_counts
44
  gpt35_answer = data.gpt35_answer
45
- gpt4_answer = data.gpt4_answer
46
 
47
  # Data preparation for 1st model
48
  hypothesis = BaseModelHypothesis()
@@ -56,26 +56,27 @@ def process_instance(data: PredictRequest):
56
  # Data preparation for 2nd model
57
  secondary_model_dependencies = SecondaryModelDependencies()
58
  secondary_model_features = secondary_model_dependencies.calculate_features(
59
- question, answer, main_model_probability, backspace_count, typing_duration,
60
- letter_click_counts, gpt35_answer, gpt4_answer)
61
 
62
  # 2nd model prediction
63
- secondary_model = RandomForestModel()
64
- secondary_model_prediction = secondary_model.predict(
65
  secondary_model_features)
66
 
67
  return {
68
- "predicted_class": "AI" if secondary_model_prediction == 1 else "HUMAN",
69
  "main_model_probability": str(main_model_probability),
70
- "secondary_model_prediction": secondary_model_prediction,
71
- "confidence": get_confidence(main_model_probability, secondary_model_prediction)
72
  }
73
 
74
 
75
  def get_confidence(main_model_output: float, secondary_model_output: int):
76
- if (main_model_output >= 0.8 and secondary_model_output == 1) or (main_model_output <= 0.2 and secondary_model_output == 0):
 
77
  return 'High Confidence'
78
- elif (0.5 < main_model_output < 0.8 and secondary_model_output == 1) or (0.2 < main_model_output <= 0.5 and secondary_model_output == 0):
79
  return 'Partially Confident'
80
  else:
81
  return 'Low Confidence'
 
2
  from pydantic import BaseModel
3
  from hypothesis import BaseModelHypothesis
4
  from secondary_model_dependencies import SecondaryModelDependencies
5
+ from secondary_model import SecondaryModel
6
  from main_model import PredictMainModel
7
  import numpy as np
8
  from typing import List
 
17
  typing_duration: int
18
  letter_click_counts: dict[str, int]
19
  gpt35_answer: str
20
+ gpt4o_answer: str
21
 
22
 
23
  class RequestModel(BaseModel):
 
42
  typing_duration = data.typing_duration
43
  letter_click_counts = data.letter_click_counts
44
  gpt35_answer = data.gpt35_answer
45
+ gpt4o_answer = data.gpt4o_answer
46
 
47
  # Data preparation for 1st model
48
  hypothesis = BaseModelHypothesis()
 
56
  # Data preparation for 2nd model
57
  secondary_model_dependencies = SecondaryModelDependencies()
58
  secondary_model_features = secondary_model_dependencies.calculate_features(
59
+ answer, main_model_probability, backspace_count, typing_duration,
60
+ letter_click_counts, gpt35_answer, gpt4o_answer)
61
 
62
  # 2nd model prediction
63
+ secondary_model = SecondaryModel()
64
+ secondary_model_probability = secondary_model.predict(
65
  secondary_model_features)
66
 
67
  return {
68
+ "predicted_class": "AI" if secondary_model_probability > 0.57 else "HUMAN",
69
  "main_model_probability": str(main_model_probability),
70
+ "secondary_model_probability": secondary_model_probability,
71
+ "confidence": get_confidence(main_model_probability, secondary_model_probability)
72
  }
73
 
74
 
75
  def get_confidence(main_model_output: float, secondary_model_output: int):
76
+ threshold = 0.57
77
+ if (main_model_output >= 0.8 and secondary_model_output >= threshold) or (main_model_output <= 0.2 and secondary_model_output <= 1 - threshold):
78
  return 'High Confidence'
79
+ elif (0.5 < main_model_output < 0.8 and secondary_model_output >= threshold) or (0.2 < main_model_output <= 0.5 and secondary_model_output < threshold):
80
  return 'Partially Confident'
81
  else:
82
  return 'Low Confidence'
core-model-prediction/scalers/secondary_scaler.joblib CHANGED
Binary files a/core-model-prediction/scalers/secondary_scaler.joblib and b/core-model-prediction/scalers/secondary_scaler.joblib differ
 
core-model-prediction/{random_forest_model.py → secondary_model.py} RENAMED
@@ -4,13 +4,13 @@ import pandas as pd
4
  from typing import List
5
 
6
 
7
- class RandomForestModel:
8
  def __init__(self):
9
  self.scaler = joblib.load("scalers/secondary_scaler.joblib")
10
- self.model = joblib.load("models/rf_weights.joblib")
11
  self.secondary_model_features = [
12
  "machine_probability", "backspace_count_normalized", "typing_duration_normalized",
13
- "letter_discrepancy_normalized", "cosine_sim_gpt35", "cosine_sim_gpt4"
14
  ]
15
 
16
  def preprocess_input(self, secondary_model_features: List[float]) -> pd.DataFrame:
 
4
  from typing import List
5
 
6
 
7
+ class SecondaryModel:
8
  def __init__(self):
9
  self.scaler = joblib.load("scalers/secondary_scaler.joblib")
10
+ self.model = joblib.load("models/secondary_weights.joblib")
11
  self.secondary_model_features = [
12
  "machine_probability", "backspace_count_normalized", "typing_duration_normalized",
13
+ "letter_discrepancy_normalized", "cosine_sim_gpt35", "cosine_sim_gpt4o"
14
  ]
15
 
16
  def preprocess_input(self, secondary_model_features: List[float]) -> pd.DataFrame:
core-model-prediction/secondary_model_dependencies.py CHANGED
@@ -7,8 +7,8 @@ class SecondaryModelDependencies:
7
  self.text_similarity_model = SentenceTransformer(
8
  'sentence-transformers/all-mpnet-base-v2')
9
 
10
- def calculate_features(self, question: str, answer: str, probability: float, backspace_count: int, typing_duration: int,
11
- letter_click_counts: dict[str, int], gpt35_answer: str, gpt4_answer: str):
12
  backspace_count_normalized = backspace_count / len(answer)
13
  typing_duration_normalized = typing_duration / len(answer)
14
  letter_discrepancy = self.calculate_letter_discrepancy(
@@ -16,11 +16,12 @@ class SecondaryModelDependencies:
16
 
17
  cosine_sim_gpt35 = self.calculate_similarity_gpt35(
18
  answer, gpt35_answer)
19
- cosine_sim_gpt4 = self.calculate_similarity_gpt4(answer, gpt4_answer)
 
20
 
21
  return [
22
  probability, backspace_count_normalized, typing_duration_normalized,
23
- letter_discrepancy, cosine_sim_gpt35, cosine_sim_gpt4
24
  ]
25
 
26
  def calculate_letter_discrepancy(self, text: str, letter_click_counts: dict[str, int]):
@@ -46,10 +47,10 @@ class SecondaryModelDependencies:
46
  cosine_scores = util.cos_sim(embedding1, embedding2)
47
  return cosine_scores.item()
48
 
49
- def calculate_similarity_gpt4(self, answer: str, gpt4_answer: str) -> float:
50
  embedding1 = self.text_similarity_model.encode(
51
  [answer], convert_to_tensor=True)
52
  embedding2 = self.text_similarity_model.encode(
53
- [gpt4_answer], convert_to_tensor=True)
54
  cosine_scores = util.cos_sim(embedding1, embedding2)
55
  return cosine_scores.item()
 
7
  self.text_similarity_model = SentenceTransformer(
8
  'sentence-transformers/all-mpnet-base-v2')
9
 
10
+ def calculate_features(self, answer: str, probability: float, backspace_count: int, typing_duration: int,
11
+ letter_click_counts: dict[str, int], gpt35_answer: str, gpt4o_answer: str):
12
  backspace_count_normalized = backspace_count / len(answer)
13
  typing_duration_normalized = typing_duration / len(answer)
14
  letter_discrepancy = self.calculate_letter_discrepancy(
 
16
 
17
  cosine_sim_gpt35 = self.calculate_similarity_gpt35(
18
  answer, gpt35_answer)
19
+ cosine_sim_gpt4o = self.calculate_similarity_gpt4o(
20
+ answer, gpt4o_answer)
21
 
22
  return [
23
  probability, backspace_count_normalized, typing_duration_normalized,
24
+ letter_discrepancy, cosine_sim_gpt35, cosine_sim_gpt4o
25
  ]
26
 
27
  def calculate_letter_discrepancy(self, text: str, letter_click_counts: dict[str, int]):
 
47
  cosine_scores = util.cos_sim(embedding1, embedding2)
48
  return cosine_scores.item()
49
 
50
+ def calculate_similarity_gpt4o(self, answer: str, gpt4o_answer: str) -> float:
51
  embedding1 = self.text_similarity_model.encode(
52
  [answer], convert_to_tensor=True)
53
  embedding2 = self.text_similarity_model.encode(
54
+ [gpt4o_answer], convert_to_tensor=True)
55
  cosine_scores = util.cos_sim(embedding1, embedding2)
56
  return cosine_scores.item()