simondh commited on
Commit
442b8d8
·
1 Parent(s): d3bdf42

isolate prompts

Browse files
Files changed (2) hide show
  1. prompts.py +15 -0
  2. utils.py +4 -16
prompts.py CHANGED
@@ -61,3 +61,18 @@ Example texts:
61
 
62
  Return your answer as a comma-separated list of new category names only.
63
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  Return your answer as a comma-separated list of new category names only.
63
  """
64
+
65
+ # Validation prompt
66
+ VALIDATION_PROMPT = """
67
+ As a validation expert, review the following text classifications and provide feedback.
68
+ For each text, assess whether the assigned category seems appropriate:
69
+
70
+ {}
71
+
72
+ Provide a brief validation report with:
73
+ 1. Overall accuracy assessment (0-100%)
74
+ 2. Any potential misclassifications identified
75
+ 3. Suggestions for improvement
76
+
77
+ Keep your response under 300 words.
78
+ """
utils.py CHANGED
@@ -5,6 +5,7 @@ import matplotlib.pyplot as plt
5
  from sklearn.decomposition import PCA
6
  from sklearn.feature_extraction.text import TfidfVectorizer
7
  import tempfile
 
8
 
9
 
10
  def load_data(file_path):
@@ -133,7 +134,7 @@ def validate_results(df, text_columns, client):
133
  sample_size = min(5, len(df))
134
  sample_df = df.sample(n=sample_size, random_state=42)
135
 
136
- # Build validation prompt
137
  validation_prompts = []
138
  for _, row in sample_df.iterrows():
139
  # Combine text from all selected columns
@@ -145,21 +146,8 @@ def validate_results(df, text_columns, client):
145
  f"Text: {text}\nAssigned Category: {assigned_category}\nConfidence: {confidence}\n"
146
  )
147
 
148
- prompt = """
149
- As a validation expert, review the following text classifications and provide feedback.
150
- For each text, assess whether the assigned category seems appropriate:
151
-
152
- {}
153
-
154
- Provide a brief validation report with:
155
- 1. Overall accuracy assessment (0-100%)
156
- 2. Any potential misclassifications identified
157
- 3. Suggestions for improvement
158
-
159
- Keep your response under 300 words.
160
- """.format(
161
- "\n---\n".join(validation_prompts)
162
- )
163
 
164
  # Call LLM API
165
  response = client.chat.completions.create(
 
5
  from sklearn.decomposition import PCA
6
  from sklearn.feature_extraction.text import TfidfVectorizer
7
  import tempfile
8
+ from prompts import VALIDATION_PROMPT
9
 
10
 
11
  def load_data(file_path):
 
134
  sample_size = min(5, len(df))
135
  sample_df = df.sample(n=sample_size, random_state=42)
136
 
137
+ # Build validation prompts
138
  validation_prompts = []
139
  for _, row in sample_df.iterrows():
140
  # Combine text from all selected columns
 
146
  f"Text: {text}\nAssigned Category: {assigned_category}\nConfidence: {confidence}\n"
147
  )
148
 
149
+ # Use the prompt from prompts.py
150
+ prompt = VALIDATION_PROMPT.format("\n---\n".join(validation_prompts))
 
 
 
 
 
 
 
 
 
 
 
 
 
151
 
152
  # Call LLM API
153
  response = client.chat.completions.create(