Spaces:
Running
Running
Commit
·
782aa38
1
Parent(s):
84f0cff
feat: move gpt out, add confidence level
Browse files
core-model-prediction/prediction.py
CHANGED
@@ -16,6 +16,8 @@ class PredictRequest(BaseModel):
|
|
16 |
backspace_count: int
|
17 |
typing_duration: int
|
18 |
letter_click_counts: dict[str, int]
|
|
|
|
|
19 |
|
20 |
|
21 |
class RequestModel(BaseModel):
|
@@ -39,6 +41,8 @@ def process_instance(data: PredictRequest):
|
|
39 |
backspace_count = data.backspace_count
|
40 |
typing_duration = data.typing_duration
|
41 |
letter_click_counts = data.letter_click_counts
|
|
|
|
|
42 |
|
43 |
# Data preparation for 1st model
|
44 |
hypothesis = BaseModelHypothesis()
|
@@ -52,7 +56,8 @@ def process_instance(data: PredictRequest):
|
|
52 |
# Data preparation for 2nd model
|
53 |
secondary_model_dependencies = SecondaryModelDependencies()
|
54 |
secondary_model_features = secondary_model_dependencies.calculate_features(
|
55 |
-
question, answer, main_model_probability, backspace_count, typing_duration,
|
|
|
56 |
|
57 |
# 2nd model prediction
|
58 |
secondary_model = RandomForestModel()
|
@@ -61,5 +66,16 @@ def process_instance(data: PredictRequest):
|
|
61 |
|
62 |
return {
|
63 |
"predicted_class": "AI" if secondary_model_prediction == 1 else "HUMAN",
|
64 |
-
"main_model_probability": str(main_model_probability)
|
|
|
|
|
65 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
backspace_count: int
|
17 |
typing_duration: int
|
18 |
letter_click_counts: dict[str, int]
|
19 |
+
gpt35_answer: str
|
20 |
+
gpt4_answer: str
|
21 |
|
22 |
|
23 |
class RequestModel(BaseModel):
|
|
|
41 |
backspace_count = data.backspace_count
|
42 |
typing_duration = data.typing_duration
|
43 |
letter_click_counts = data.letter_click_counts
|
44 |
+
gpt35_answer = data.gpt35_answer
|
45 |
+
gpt4_answer = data.gpt4_answer
|
46 |
|
47 |
# Data preparation for 1st model
|
48 |
hypothesis = BaseModelHypothesis()
|
|
|
56 |
# Data preparation for 2nd model
|
57 |
secondary_model_dependencies = SecondaryModelDependencies()
|
58 |
secondary_model_features = secondary_model_dependencies.calculate_features(
|
59 |
+
question, answer, main_model_probability, backspace_count, typing_duration,
|
60 |
+
letter_click_counts, gpt35_answer, gpt4_answer)
|
61 |
|
62 |
# 2nd model prediction
|
63 |
secondary_model = RandomForestModel()
|
|
|
66 |
|
67 |
return {
|
68 |
"predicted_class": "AI" if secondary_model_prediction == 1 else "HUMAN",
|
69 |
+
"main_model_probability": str(main_model_probability),
|
70 |
+
"secondary_model_prediction": secondary_model_prediction,
|
71 |
+
"confidence": get_confidence(main_model_probability, secondary_model_prediction)
|
72 |
}
|
73 |
+
|
74 |
+
|
75 |
+
def get_confidence(main_model_output: float, secondary_model_output: int):
|
76 |
+
if (main_model_output >= 0.8 and secondary_model_output == 1) or (main_model_output <= 0.2 and secondary_model_output == 0):
|
77 |
+
return 'High Confidence'
|
78 |
+
elif (0.5 < main_model_output < 0.8 and secondary_model_output == 1) or (0.2 < main_model_output <= 0.5 and secondary_model_output == 0):
|
79 |
+
return 'Partially Confident'
|
80 |
+
else:
|
81 |
+
return 'Low Confidence'
|
core-model-prediction/random_forest_model.py
CHANGED
@@ -14,8 +14,8 @@ class RandomForestModel:
|
|
14 |
]
|
15 |
|
16 |
def preprocess_input(self, secondary_model_features: List[float]) -> np.ndarray:
|
17 |
-
features_df = pd.DataFrame(
|
18 |
-
|
19 |
features_df[self.secondary_model_features] = self.scaler.transform(
|
20 |
features_df[self.secondary_model_features])
|
21 |
return features_df.values.astype(np.float32).reshape(1, -1)
|
|
|
14 |
]
|
15 |
|
16 |
def preprocess_input(self, secondary_model_features: List[float]) -> np.ndarray:
|
17 |
+
features_df = pd.DataFrame(
|
18 |
+
[secondary_model_features], columns=self.secondary_model_features)
|
19 |
features_df[self.secondary_model_features] = self.scaler.transform(
|
20 |
features_df[self.secondary_model_features])
|
21 |
return features_df.values.astype(np.float32).reshape(1, -1)
|
core-model-prediction/requirements.txt
CHANGED
@@ -5,9 +5,6 @@ textstat==0.7.3
|
|
5 |
scikit-learn==1.2.2
|
6 |
transformers==4.38.2
|
7 |
sentence-transformers==2.7.0
|
8 |
-
langchain
|
9 |
-
openai
|
10 |
-
langchain-openai
|
11 |
fastapi
|
12 |
uvicorn
|
13 |
google-cloud-secret-manager
|
|
|
5 |
scikit-learn==1.2.2
|
6 |
transformers==4.38.2
|
7 |
sentence-transformers==2.7.0
|
|
|
|
|
|
|
8 |
fastapi
|
9 |
uvicorn
|
10 |
google-cloud-secret-manager
|
core-model-prediction/secondary_model_dependencies.py
CHANGED
@@ -1,35 +1,19 @@
|
|
1 |
from sentence_transformers import SentenceTransformer, util
|
2 |
from collections import Counter
|
3 |
-
from langchain_openai import ChatOpenAI
|
4 |
-
from langchain_core.messages import HumanMessage, SystemMessage
|
5 |
-
from google.cloud import secretmanager
|
6 |
|
7 |
|
8 |
class SecondaryModelDependencies:
|
9 |
def __init__(self):
|
10 |
self.text_similarity_model = SentenceTransformer(
|
11 |
'sentence-transformers/all-mpnet-base-v2')
|
12 |
-
api_key = self.access_openai_api_key()
|
13 |
-
self.llm_gpt35 = ChatOpenAI(
|
14 |
-
api_key=api_key, model="gpt-3.5-turbo")
|
15 |
-
self.llm_gpt4 = ChatOpenAI(
|
16 |
-
api_key=api_key, model="gpt-4-turbo")
|
17 |
|
18 |
-
def
|
19 |
-
|
20 |
-
name = "projects/steady-climate-416810/secrets/OPENAI_API_KEY/versions/1"
|
21 |
-
response = client.access_secret_version(request={"name": name})
|
22 |
-
return response.payload.data.decode('UTF-8')
|
23 |
-
|
24 |
-
def calculate_features(self, question: str, answer: str, probability: float, backspace_count: int, typing_duration: int, letter_click_counts: dict[str, int]):
|
25 |
backspace_count_normalized = backspace_count / len(answer)
|
26 |
typing_duration_normalized = typing_duration / len(answer)
|
27 |
letter_discrepancy = self.calculate_letter_discrepancy(
|
28 |
answer, letter_click_counts)
|
29 |
|
30 |
-
gpt35_answer = self.generate_gpt35_answer(question)
|
31 |
-
gpt4_answer = self.generate_gpt4_answer(question)
|
32 |
-
|
33 |
cosine_sim_gpt35 = self.calculate_similarity_gpt35(
|
34 |
answer, gpt35_answer)
|
35 |
cosine_sim_gpt4 = self.calculate_similarity_gpt4(answer, gpt4_answer)
|
@@ -54,26 +38,6 @@ class SecondaryModelDependencies:
|
|
54 |
|
55 |
return discrepancy_ratio_normalized
|
56 |
|
57 |
-
def generate_gpt35_answer(self, question: str):
|
58 |
-
messages = [
|
59 |
-
SystemMessage(
|
60 |
-
content="Please answer the following question based solely on your internal knowledge, without external references. Assume you are the human."),
|
61 |
-
HumanMessage(question)
|
62 |
-
]
|
63 |
-
|
64 |
-
gpt35_answer = self.llm_gpt35.invoke(messages)
|
65 |
-
return gpt35_answer.content
|
66 |
-
|
67 |
-
def generate_gpt4_answer(self, question: str):
|
68 |
-
messages = [
|
69 |
-
SystemMessage(
|
70 |
-
content="Please answer the following question based solely on your internal knowledge, without external references. Assume you are the human."),
|
71 |
-
HumanMessage(question)
|
72 |
-
]
|
73 |
-
|
74 |
-
gpt4_answer = self.llm_gpt4.invoke(messages)
|
75 |
-
return gpt4_answer.content
|
76 |
-
|
77 |
def calculate_similarity_gpt35(self, answer: str, gpt35_answer: str) -> float:
|
78 |
embedding1 = self.text_similarity_model.encode(
|
79 |
[answer], convert_to_tensor=True)
|
|
|
1 |
from sentence_transformers import SentenceTransformer, util
|
2 |
from collections import Counter
|
|
|
|
|
|
|
3 |
|
4 |
|
5 |
class SecondaryModelDependencies:
|
6 |
def __init__(self):
|
7 |
self.text_similarity_model = SentenceTransformer(
|
8 |
'sentence-transformers/all-mpnet-base-v2')
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
+
def calculate_features(self, question: str, answer: str, probability: float, backspace_count: int, typing_duration: int,
|
11 |
+
letter_click_counts: dict[str, int], gpt35_answer: str, gpt4_answer: str):
|
|
|
|
|
|
|
|
|
|
|
12 |
backspace_count_normalized = backspace_count / len(answer)
|
13 |
typing_duration_normalized = typing_duration / len(answer)
|
14 |
letter_discrepancy = self.calculate_letter_discrepancy(
|
15 |
answer, letter_click_counts)
|
16 |
|
|
|
|
|
|
|
17 |
cosine_sim_gpt35 = self.calculate_similarity_gpt35(
|
18 |
answer, gpt35_answer)
|
19 |
cosine_sim_gpt4 = self.calculate_similarity_gpt4(answer, gpt4_answer)
|
|
|
38 |
|
39 |
return discrepancy_ratio_normalized
|
40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
def calculate_similarity_gpt35(self, answer: str, gpt35_answer: str) -> float:
|
42 |
embedding1 = self.text_similarity_model.encode(
|
43 |
[answer], convert_to_tensor=True)
|