bearking58 commited on
Commit
782aa38
·
1 Parent(s): 84f0cff

feat: move gpt out, add confidence level

Browse files
core-model-prediction/prediction.py CHANGED
@@ -16,6 +16,8 @@ class PredictRequest(BaseModel):
16
  backspace_count: int
17
  typing_duration: int
18
  letter_click_counts: dict[str, int]
 
 
19
 
20
 
21
  class RequestModel(BaseModel):
@@ -39,6 +41,8 @@ def process_instance(data: PredictRequest):
39
  backspace_count = data.backspace_count
40
  typing_duration = data.typing_duration
41
  letter_click_counts = data.letter_click_counts
 
 
42
 
43
  # Data preparation for 1st model
44
  hypothesis = BaseModelHypothesis()
@@ -52,7 +56,8 @@ def process_instance(data: PredictRequest):
52
  # Data preparation for 2nd model
53
  secondary_model_dependencies = SecondaryModelDependencies()
54
  secondary_model_features = secondary_model_dependencies.calculate_features(
55
- question, answer, main_model_probability, backspace_count, typing_duration, letter_click_counts)
 
56
 
57
  # 2nd model prediction
58
  secondary_model = RandomForestModel()
@@ -61,5 +66,16 @@ def process_instance(data: PredictRequest):
61
 
62
  return {
63
  "predicted_class": "AI" if secondary_model_prediction == 1 else "HUMAN",
64
- "main_model_probability": str(main_model_probability)
 
 
65
  }
 
 
 
 
 
 
 
 
 
 
16
  backspace_count: int
17
  typing_duration: int
18
  letter_click_counts: dict[str, int]
19
+ gpt35_answer: str
20
+ gpt4_answer: str
21
 
22
 
23
  class RequestModel(BaseModel):
 
41
  backspace_count = data.backspace_count
42
  typing_duration = data.typing_duration
43
  letter_click_counts = data.letter_click_counts
44
+ gpt35_answer = data.gpt35_answer
45
+ gpt4_answer = data.gpt4_answer
46
 
47
  # Data preparation for 1st model
48
  hypothesis = BaseModelHypothesis()
 
56
  # Data preparation for 2nd model
57
  secondary_model_dependencies = SecondaryModelDependencies()
58
  secondary_model_features = secondary_model_dependencies.calculate_features(
59
+ question, answer, main_model_probability, backspace_count, typing_duration,
60
+ letter_click_counts, gpt35_answer, gpt4_answer)
61
 
62
  # 2nd model prediction
63
  secondary_model = RandomForestModel()
 
66
 
67
  return {
68
  "predicted_class": "AI" if secondary_model_prediction == 1 else "HUMAN",
69
+ "main_model_probability": str(main_model_probability),
70
+ "secondary_model_prediction": secondary_model_prediction,
71
+ "confidence": get_confidence(main_model_probability, secondary_model_prediction)
72
  }
73
+
74
+
75
+ def get_confidence(main_model_output: float, secondary_model_output: int):
76
+ if (main_model_output >= 0.8 and secondary_model_output == 1) or (main_model_output <= 0.2 and secondary_model_output == 0):
77
+ return 'High Confidence'
78
+ elif (0.5 < main_model_output < 0.8 and secondary_model_output == 1) or (0.2 < main_model_output <= 0.5 and secondary_model_output == 0):
79
+ return 'Partially Confident'
80
+ else:
81
+ return 'Low Confidence'
core-model-prediction/random_forest_model.py CHANGED
@@ -14,8 +14,8 @@ class RandomForestModel:
14
  ]
15
 
16
  def preprocess_input(self, secondary_model_features: List[float]) -> np.ndarray:
17
- features_df = pd.DataFrame([secondary_model_features], columns=[
18
- self.secondary_model_features])
19
  features_df[self.secondary_model_features] = self.scaler.transform(
20
  features_df[self.secondary_model_features])
21
  return features_df.values.astype(np.float32).reshape(1, -1)
 
14
  ]
15
 
16
  def preprocess_input(self, secondary_model_features: List[float]) -> np.ndarray:
17
+ features_df = pd.DataFrame(
18
+ [secondary_model_features], columns=self.secondary_model_features)
19
  features_df[self.secondary_model_features] = self.scaler.transform(
20
  features_df[self.secondary_model_features])
21
  return features_df.values.astype(np.float32).reshape(1, -1)
core-model-prediction/requirements.txt CHANGED
@@ -5,9 +5,6 @@ textstat==0.7.3
5
  scikit-learn==1.2.2
6
  transformers==4.38.2
7
  sentence-transformers==2.7.0
8
- langchain
9
- openai
10
- langchain-openai
11
  fastapi
12
  uvicorn
13
  google-cloud-secret-manager
 
5
  scikit-learn==1.2.2
6
  transformers==4.38.2
7
  sentence-transformers==2.7.0
 
 
 
8
  fastapi
9
  uvicorn
10
  google-cloud-secret-manager
core-model-prediction/secondary_model_dependencies.py CHANGED
@@ -1,35 +1,19 @@
1
  from sentence_transformers import SentenceTransformer, util
2
  from collections import Counter
3
- from langchain_openai import ChatOpenAI
4
- from langchain_core.messages import HumanMessage, SystemMessage
5
- from google.cloud import secretmanager
6
 
7
 
8
  class SecondaryModelDependencies:
9
  def __init__(self):
10
  self.text_similarity_model = SentenceTransformer(
11
  'sentence-transformers/all-mpnet-base-v2')
12
- api_key = self.access_openai_api_key()
13
- self.llm_gpt35 = ChatOpenAI(
14
- api_key=api_key, model="gpt-3.5-turbo")
15
- self.llm_gpt4 = ChatOpenAI(
16
- api_key=api_key, model="gpt-4-turbo")
17
 
18
- def access_openai_api_key(self):
19
- client = secretmanager.SecretManagerServiceClient()
20
- name = "projects/steady-climate-416810/secrets/OPENAI_API_KEY/versions/1"
21
- response = client.access_secret_version(request={"name": name})
22
- return response.payload.data.decode('UTF-8')
23
-
24
- def calculate_features(self, question: str, answer: str, probability: float, backspace_count: int, typing_duration: int, letter_click_counts: dict[str, int]):
25
  backspace_count_normalized = backspace_count / len(answer)
26
  typing_duration_normalized = typing_duration / len(answer)
27
  letter_discrepancy = self.calculate_letter_discrepancy(
28
  answer, letter_click_counts)
29
 
30
- gpt35_answer = self.generate_gpt35_answer(question)
31
- gpt4_answer = self.generate_gpt4_answer(question)
32
-
33
  cosine_sim_gpt35 = self.calculate_similarity_gpt35(
34
  answer, gpt35_answer)
35
  cosine_sim_gpt4 = self.calculate_similarity_gpt4(answer, gpt4_answer)
@@ -54,26 +38,6 @@ class SecondaryModelDependencies:
54
 
55
  return discrepancy_ratio_normalized
56
 
57
- def generate_gpt35_answer(self, question: str):
58
- messages = [
59
- SystemMessage(
60
- content="Please answer the following question based solely on your internal knowledge, without external references. Assume you are the human."),
61
- HumanMessage(question)
62
- ]
63
-
64
- gpt35_answer = self.llm_gpt35.invoke(messages)
65
- return gpt35_answer.content
66
-
67
- def generate_gpt4_answer(self, question: str):
68
- messages = [
69
- SystemMessage(
70
- content="Please answer the following question based solely on your internal knowledge, without external references. Assume you are the human."),
71
- HumanMessage(question)
72
- ]
73
-
74
- gpt4_answer = self.llm_gpt4.invoke(messages)
75
- return gpt4_answer.content
76
-
77
  def calculate_similarity_gpt35(self, answer: str, gpt35_answer: str) -> float:
78
  embedding1 = self.text_similarity_model.encode(
79
  [answer], convert_to_tensor=True)
 
1
  from sentence_transformers import SentenceTransformer, util
2
  from collections import Counter
 
 
 
3
 
4
 
5
  class SecondaryModelDependencies:
6
  def __init__(self):
7
  self.text_similarity_model = SentenceTransformer(
8
  'sentence-transformers/all-mpnet-base-v2')
 
 
 
 
 
9
 
10
+ def calculate_features(self, question: str, answer: str, probability: float, backspace_count: int, typing_duration: int,
11
+ letter_click_counts: dict[str, int], gpt35_answer: str, gpt4_answer: str):
 
 
 
 
 
12
  backspace_count_normalized = backspace_count / len(answer)
13
  typing_duration_normalized = typing_duration / len(answer)
14
  letter_discrepancy = self.calculate_letter_discrepancy(
15
  answer, letter_click_counts)
16
 
 
 
 
17
  cosine_sim_gpt35 = self.calculate_similarity_gpt35(
18
  answer, gpt35_answer)
19
  cosine_sim_gpt4 = self.calculate_similarity_gpt4(answer, gpt4_answer)
 
38
 
39
  return discrepancy_ratio_normalized
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  def calculate_similarity_gpt35(self, answer: str, gpt35_answer: str) -> float:
42
  embedding1 = self.text_similarity_model.encode(
43
  [answer], convert_to_tensor=True)