Update agent.py
Browse files
agent.py
CHANGED
@@ -124,38 +124,25 @@ All answers are graded by exact string match, so format carefully!"""
|
|
124 |
def __call__(self, prompt: str, **kwargs) -> str:
|
125 |
"""
|
126 |
Call the model with appropriate handling of system prompts for Anthropic
|
127 |
-
|
128 |
-
Args:
|
129 |
-
prompt: The prompt to send to the model
|
130 |
-
**kwargs: Additional arguments to pass to LiteLLM
|
131 |
-
|
132 |
-
Returns:
|
133 |
-
The model's response as a string
|
134 |
"""
|
135 |
# Wait according to rate limiter
|
136 |
RATE_LIMITER.wait()
|
137 |
|
138 |
try:
|
139 |
-
#
|
140 |
-
# We do this by using the 'messages' parameter directly with the system content
|
141 |
-
|
142 |
-
# Extract system_instruction from kwargs if it exists and remove it
|
143 |
-
# (to avoid the "Extra inputs are not permitted" error)
|
144 |
if 'system_instruction' in kwargs:
|
145 |
-
# We'll ignore it and use our stored system prompt instead
|
146 |
del kwargs['system_instruction']
|
147 |
|
148 |
-
#
|
149 |
-
messages = [
|
150 |
-
{"role": "system", "content": self.system_prompt},
|
151 |
-
{"role": "user", "content": prompt}
|
152 |
-
]
|
153 |
-
|
154 |
-
# Call LiteLLM with the proper message format for Anthropic
|
155 |
from litellm import completion
|
|
|
|
|
|
|
|
|
|
|
156 |
response = completion(
|
157 |
model=self.model_id,
|
158 |
-
messages=
|
159 |
api_key=self.api_key,
|
160 |
temperature=self.temperature,
|
161 |
max_tokens=self.max_tokens,
|
@@ -166,15 +153,13 @@ All answers are graded by exact string match, so format carefully!"""
|
|
166 |
return response.choices[0].message.content
|
167 |
|
168 |
except Exception as e:
|
|
|
169 |
if "rate_limit" in str(e).lower():
|
170 |
-
# Specific handling for rate limit errors
|
171 |
print(f"Rate limit error: {e}")
|
172 |
print("Waiting 60 seconds before retrying...")
|
173 |
time.sleep(60)
|
174 |
-
# Recursive retry after waiting
|
175 |
return self.__call__(prompt, **kwargs)
|
176 |
else:
|
177 |
-
# Re-raise other errors
|
178 |
print(f"Error calling Anthropic API: {e}")
|
179 |
raise
|
180 |
|
|
|
124 |
def __call__(self, prompt: str, **kwargs) -> str:
|
125 |
"""
|
126 |
Call the model with appropriate handling of system prompts for Anthropic
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
"""
|
128 |
# Wait according to rate limiter
|
129 |
RATE_LIMITER.wait()
|
130 |
|
131 |
try:
|
132 |
+
# Remove system_instruction if present in kwargs
|
|
|
|
|
|
|
|
|
133 |
if 'system_instruction' in kwargs:
|
|
|
134 |
del kwargs['system_instruction']
|
135 |
|
136 |
+
# For Anthropic via LiteLLM, use the direct completion method
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
from litellm import completion
|
138 |
+
|
139 |
+
# Create a simple prompt with system instructions at the beginning
|
140 |
+
# This avoids the nested message structure issue
|
141 |
+
complete_prompt = f"{self.system_prompt}\n\n{prompt}"
|
142 |
+
|
143 |
response = completion(
|
144 |
model=self.model_id,
|
145 |
+
messages=[{"role": "user", "content": complete_prompt}],
|
146 |
api_key=self.api_key,
|
147 |
temperature=self.temperature,
|
148 |
max_tokens=self.max_tokens,
|
|
|
153 |
return response.choices[0].message.content
|
154 |
|
155 |
except Exception as e:
|
156 |
+
# Handle rate limit errors
|
157 |
if "rate_limit" in str(e).lower():
|
|
|
158 |
print(f"Rate limit error: {e}")
|
159 |
print("Waiting 60 seconds before retrying...")
|
160 |
time.sleep(60)
|
|
|
161 |
return self.__call__(prompt, **kwargs)
|
162 |
else:
|
|
|
163 |
print(f"Error calling Anthropic API: {e}")
|
164 |
raise
|
165 |
|