Update app.py
Browse files
app.py
CHANGED
@@ -312,14 +312,11 @@ COHERE_MODELS = {
|
|
312 |
"c4ai-aya-expanse-32b": 131072,
|
313 |
}
|
314 |
|
315 |
-
# TOGETHER MODELS
|
316 |
TOGETHER_MODELS = {
|
317 |
-
"
|
318 |
-
"meta-llama/Llama-
|
319 |
-
"meta-llama/Llama-3.3-70B-Instruct":
|
320 |
-
"deepseek-ai/deepseek-r1-distill-llama-70b": 8192,
|
321 |
-
"meta-llama/Llama-3.2-11B-Vision-Instruct": 131072,
|
322 |
-
"meta-llama/Llama-3.2-90B-Vision-Instruct": 131072,
|
323 |
}
|
324 |
|
325 |
# OVH MODELS - OVH AI Endpoints (free beta)
|
@@ -339,8 +336,8 @@ OVH_MODELS = {
|
|
339 |
|
340 |
# CEREBRAS MODELS
|
341 |
CEREBRAS_MODELS = {
|
342 |
-
"
|
343 |
-
"
|
344 |
}
|
345 |
|
346 |
# GOOGLE AI MODELS
|
@@ -952,23 +949,47 @@ def call_together_api(payload, api_key_override=None):
|
|
952 |
)
|
953 |
|
954 |
# Extract parameters from payload
|
955 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
956 |
|
957 |
-
#
|
958 |
-
|
959 |
-
|
960 |
-
|
961 |
-
|
962 |
-
|
|
|
|
|
|
|
963 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
964 |
# Create completion
|
965 |
-
response = client.chat.completions.create(
|
966 |
-
model=model,
|
967 |
-
messages=payload.get("messages", []),
|
968 |
-
temperature=payload.get("temperature", 0.7),
|
969 |
-
max_tokens=payload.get("max_tokens", 1000),
|
970 |
-
stream=payload.get("stream", False)
|
971 |
-
)
|
972 |
|
973 |
return response
|
974 |
except Exception as e:
|
@@ -1020,42 +1041,65 @@ def call_cerebras_api(payload, api_key_override=None):
|
|
1020 |
"""Make a call to Cerebras API with error handling"""
|
1021 |
try:
|
1022 |
# Extract parameters from payload
|
1023 |
-
|
1024 |
-
|
1025 |
-
|
1026 |
-
|
1027 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1028 |
messages = payload.get("messages", [])
|
1029 |
temperature = payload.get("temperature", 0.7)
|
1030 |
max_tokens = payload.get("max_tokens", 1000)
|
1031 |
-
|
1032 |
-
data = {
|
1033 |
-
"model": model,
|
1034 |
-
"messages": messages,
|
1035 |
-
"temperature": temperature,
|
1036 |
-
"max_tokens": max_tokens
|
1037 |
-
}
|
1038 |
-
|
1039 |
-
api_key = api_key_override if api_key_override else os.environ.get("CEREBRAS_API_KEY", "")
|
1040 |
-
headers = {
|
1041 |
-
"Content-Type": "application/json",
|
1042 |
-
"Authorization": f"Bearer {api_key}"
|
1043 |
-
}
|
1044 |
|
|
|
1045 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1046 |
response = requests.post(
|
1047 |
"https://api.cloud.cerebras.ai/v1/chat/completions",
|
1048 |
headers=headers,
|
1049 |
json=data,
|
1050 |
-
timeout=
|
1051 |
)
|
1052 |
|
1053 |
if response.status_code != 200:
|
1054 |
raise ValueError(f"Cerebras API returned status code {response.status_code}: {response.text}")
|
1055 |
|
1056 |
return response.json()
|
1057 |
-
except requests.exceptions.
|
1058 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1059 |
except Exception as e:
|
1060 |
logger.error(f"Cerebras API error: {str(e)}")
|
1061 |
raise e
|
|
|
312 |
"c4ai-aya-expanse-32b": 131072,
|
313 |
}
|
314 |
|
315 |
+
# TOGETHER MODELS in the free tier
|
316 |
TOGETHER_MODELS = {
|
317 |
+
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free": 8192,
|
318 |
+
"meta-llama/Llama-Vision-Free": 8192,
|
319 |
+
"meta-llama/Llama-3.3-70B-Instruct-Turbo-Free": 8192,
|
|
|
|
|
|
|
320 |
}
|
321 |
|
322 |
# OVH MODELS - OVH AI Endpoints (free beta)
|
|
|
336 |
|
337 |
# CEREBRAS MODELS
|
338 |
CEREBRAS_MODELS = {
|
339 |
+
"llama3.1-8b": 8192,
|
340 |
+
"llama-3.3-70b": 8192,
|
341 |
}
|
342 |
|
343 |
# GOOGLE AI MODELS
|
|
|
949 |
)
|
950 |
|
951 |
# Extract parameters from payload
|
952 |
+
requested_model = payload.get("model", "")
|
953 |
+
|
954 |
+
# Use a safe model that's known to work in the free tier
|
955 |
+
# these models are available without dedicated endpoints
|
956 |
+
free_models = [
|
957 |
+
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free",
|
958 |
+
"meta-llama/Llama-Vision-Free",
|
959 |
+
"meta-llama/Llama-3.3-70B-Instruct-Turbo-Free"
|
960 |
+
]
|
961 |
+
|
962 |
+
# Default to the first free model
|
963 |
+
model = free_models[0]
|
964 |
+
|
965 |
+
# Try to match a requested model with a free model if possible
|
966 |
+
if requested_model:
|
967 |
+
for free_model in free_models:
|
968 |
+
if requested_model.lower() in free_model.lower():
|
969 |
+
model = free_model
|
970 |
+
break
|
971 |
|
972 |
+
# Create payload with clean messages
|
973 |
+
messages = []
|
974 |
+
for msg in payload.get("messages", []):
|
975 |
+
# Ensure we only include role and content
|
976 |
+
clean_msg = {
|
977 |
+
"role": msg["role"],
|
978 |
+
"content": msg["content"]
|
979 |
+
}
|
980 |
+
messages.append(clean_msg)
|
981 |
|
982 |
+
# Create payload
|
983 |
+
together_payload = {
|
984 |
+
"model": model,
|
985 |
+
"messages": messages,
|
986 |
+
"temperature": payload.get("temperature", 0.7),
|
987 |
+
"max_tokens": payload.get("max_tokens", 1000),
|
988 |
+
"stream": payload.get("stream", False)
|
989 |
+
}
|
990 |
+
|
991 |
# Create completion
|
992 |
+
response = client.chat.completions.create(**together_payload)
|
|
|
|
|
|
|
|
|
|
|
|
|
993 |
|
994 |
return response
|
995 |
except Exception as e:
|
|
|
1041 |
"""Make a call to Cerebras API with error handling"""
|
1042 |
try:
|
1043 |
# Extract parameters from payload
|
1044 |
+
requested_model = payload.get("model", "")
|
1045 |
+
|
1046 |
+
# Map the full model name to the correct Cerebras model ID
|
1047 |
+
model_mapping = {
|
1048 |
+
"cerebras/llama-3.1-8b": "llama3.1-8b",
|
1049 |
+
"cerebras/llama-3.3-70b": "llama-3.3-70b",
|
1050 |
+
"llama-3.1-8b": "llama3.1-8b",
|
1051 |
+
"llama-3.3-70b": "llama-3.3-70b",
|
1052 |
+
"llama3.1-8b": "llama3.1-8b"
|
1053 |
+
}
|
1054 |
+
|
1055 |
+
# Default to the 8B model
|
1056 |
+
model = "llama3.1-8b"
|
1057 |
+
|
1058 |
+
# If the requested model matches any of our mappings, use that instead
|
1059 |
+
if requested_model in model_mapping:
|
1060 |
+
model = model_mapping[requested_model]
|
1061 |
+
elif "3.3" in requested_model or "70b" in requested_model.lower():
|
1062 |
+
model = "llama-3.3-70b"
|
1063 |
+
|
1064 |
messages = payload.get("messages", [])
|
1065 |
temperature = payload.get("temperature", 0.7)
|
1066 |
max_tokens = payload.get("max_tokens", 1000)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1067 |
|
1068 |
+
# Try-except block for network issues
|
1069 |
try:
|
1070 |
+
headers = {
|
1071 |
+
"Content-Type": "application/json",
|
1072 |
+
"Authorization": f"Bearer {api_key_override or os.environ.get('CEREBRAS_API_KEY', '')}"
|
1073 |
+
}
|
1074 |
+
|
1075 |
+
data = {
|
1076 |
+
"model": model,
|
1077 |
+
"messages": messages,
|
1078 |
+
"temperature": temperature,
|
1079 |
+
"max_tokens": max_tokens
|
1080 |
+
}
|
1081 |
+
|
1082 |
response = requests.post(
|
1083 |
"https://api.cloud.cerebras.ai/v1/chat/completions",
|
1084 |
headers=headers,
|
1085 |
json=data,
|
1086 |
+
timeout=30 # Increased timeout
|
1087 |
)
|
1088 |
|
1089 |
if response.status_code != 200:
|
1090 |
raise ValueError(f"Cerebras API returned status code {response.status_code}: {response.text}")
|
1091 |
|
1092 |
return response.json()
|
1093 |
+
except requests.exceptions.RequestException as e:
|
1094 |
+
# More specific error handling for network issues
|
1095 |
+
if "NameResolution" in str(e):
|
1096 |
+
raise ValueError(
|
1097 |
+
"Unable to connect to the Cerebras API. This might be due to network "
|
1098 |
+
"restrictions in your environment. The API requires direct internet access. "
|
1099 |
+
"Please try a different provider or check your network settings."
|
1100 |
+
)
|
1101 |
+
else:
|
1102 |
+
raise ValueError(f"Request to Cerebras API failed: {str(e)}")
|
1103 |
except Exception as e:
|
1104 |
logger.error(f"Cerebras API error: {str(e)}")
|
1105 |
raise e
|