cstr commited on
Commit
52f55d2
·
verified ·
1 Parent(s): a9b40ac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -44
app.py CHANGED
@@ -312,14 +312,11 @@ COHERE_MODELS = {
312
  "c4ai-aya-expanse-32b": 131072,
313
  }
314
 
315
- # TOGETHER MODELS
316
  TOGETHER_MODELS = {
317
- "meta-llama/Llama-3.1-70B-Instruct": 131072,
318
- "meta-llama/Llama-3.1-8B-Instruct": 131072,
319
- "meta-llama/Llama-3.3-70B-Instruct": 131072,
320
- "deepseek-ai/deepseek-r1-distill-llama-70b": 8192,
321
- "meta-llama/Llama-3.2-11B-Vision-Instruct": 131072,
322
- "meta-llama/Llama-3.2-90B-Vision-Instruct": 131072,
323
  }
324
 
325
  # OVH MODELS - OVH AI Endpoints (free beta)
@@ -339,8 +336,8 @@ OVH_MODELS = {
339
 
340
  # CEREBRAS MODELS
341
  CEREBRAS_MODELS = {
342
- "cerebras/llama-3.1-8b": 8192,
343
- "cerebras/llama-3.3-70b": 8192,
344
  }
345
 
346
  # GOOGLE AI MODELS
@@ -952,23 +949,47 @@ def call_together_api(payload, api_key_override=None):
952
  )
953
 
954
  # Extract parameters from payload
955
- model = payload.get("model", "meta-llama/Meta-Llama-3-8B-Instruct")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
956
 
957
- # Fix model name format - Together API uses a different format
958
- # Check documentation for correct model names: https://api.together.ai/models
959
- if "llama-3.1" in model.lower():
960
- model = "meta-llama/Meta-Llama-3-8B-Instruct"
961
- elif "llama-3.3" in model.lower():
962
- model = "meta-llama/Meta-Llama-3.3-70B-Instruct"
 
 
 
963
 
 
 
 
 
 
 
 
 
 
964
  # Create completion
965
- response = client.chat.completions.create(
966
- model=model,
967
- messages=payload.get("messages", []),
968
- temperature=payload.get("temperature", 0.7),
969
- max_tokens=payload.get("max_tokens", 1000),
970
- stream=payload.get("stream", False)
971
- )
972
 
973
  return response
974
  except Exception as e:
@@ -1020,42 +1041,65 @@ def call_cerebras_api(payload, api_key_override=None):
1020
  """Make a call to Cerebras API with error handling"""
1021
  try:
1022
  # Extract parameters from payload
1023
- model = payload.get("model", "cerebras/llama-3.1-8b")
1024
- # Strip 'cerebras/' prefix if present
1025
- if model.startswith("cerebras/"):
1026
- model = model[9:]
1027
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1028
  messages = payload.get("messages", [])
1029
  temperature = payload.get("temperature", 0.7)
1030
  max_tokens = payload.get("max_tokens", 1000)
1031
-
1032
- data = {
1033
- "model": model,
1034
- "messages": messages,
1035
- "temperature": temperature,
1036
- "max_tokens": max_tokens
1037
- }
1038
-
1039
- api_key = api_key_override if api_key_override else os.environ.get("CEREBRAS_API_KEY", "")
1040
- headers = {
1041
- "Content-Type": "application/json",
1042
- "Authorization": f"Bearer {api_key}"
1043
- }
1044
 
 
1045
  try:
 
 
 
 
 
 
 
 
 
 
 
 
1046
  response = requests.post(
1047
  "https://api.cloud.cerebras.ai/v1/chat/completions",
1048
  headers=headers,
1049
  json=data,
1050
- timeout=10 # Add timeout to avoid hanging
1051
  )
1052
 
1053
  if response.status_code != 200:
1054
  raise ValueError(f"Cerebras API returned status code {response.status_code}: {response.text}")
1055
 
1056
  return response.json()
1057
- except requests.exceptions.ConnectionError as e:
1058
- raise ValueError(f"Connection error to Cerebras API. This may be due to network restrictions in the environment: {str(e)}")
 
 
 
 
 
 
 
 
1059
  except Exception as e:
1060
  logger.error(f"Cerebras API error: {str(e)}")
1061
  raise e
 
312
  "c4ai-aya-expanse-32b": 131072,
313
  }
314
 
315
+ # TOGETHER MODELS in the free tier
316
  TOGETHER_MODELS = {
317
+ "deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free": 8192,
318
+ "meta-llama/Llama-Vision-Free": 8192,
319
+ "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free": 8192,
 
 
 
320
  }
321
 
322
  # OVH MODELS - OVH AI Endpoints (free beta)
 
336
 
337
  # CEREBRAS MODELS
338
  CEREBRAS_MODELS = {
339
+ "llama3.1-8b": 8192,
340
+ "llama-3.3-70b": 8192,
341
  }
342
 
343
  # GOOGLE AI MODELS
 
949
  )
950
 
951
  # Extract parameters from payload
952
+ requested_model = payload.get("model", "")
953
+
954
+ # Use a safe model that's known to work in the free tier
955
+ # these models are available without dedicated endpoints
956
+ free_models = [
957
+ "deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free",
958
+ "meta-llama/Llama-Vision-Free",
959
+ "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free"
960
+ ]
961
+
962
+ # Default to the first free model
963
+ model = free_models[0]
964
+
965
+ # Try to match a requested model with a free model if possible
966
+ if requested_model:
967
+ for free_model in free_models:
968
+ if requested_model.lower() in free_model.lower():
969
+ model = free_model
970
+ break
971
 
972
+ # Create payload with clean messages
973
+ messages = []
974
+ for msg in payload.get("messages", []):
975
+ # Ensure we only include role and content
976
+ clean_msg = {
977
+ "role": msg["role"],
978
+ "content": msg["content"]
979
+ }
980
+ messages.append(clean_msg)
981
 
982
+ # Create payload
983
+ together_payload = {
984
+ "model": model,
985
+ "messages": messages,
986
+ "temperature": payload.get("temperature", 0.7),
987
+ "max_tokens": payload.get("max_tokens", 1000),
988
+ "stream": payload.get("stream", False)
989
+ }
990
+
991
  # Create completion
992
+ response = client.chat.completions.create(**together_payload)
 
 
 
 
 
 
993
 
994
  return response
995
  except Exception as e:
 
1041
  """Make a call to Cerebras API with error handling"""
1042
  try:
1043
  # Extract parameters from payload
1044
+ requested_model = payload.get("model", "")
1045
+
1046
+ # Map the full model name to the correct Cerebras model ID
1047
+ model_mapping = {
1048
+ "cerebras/llama-3.1-8b": "llama3.1-8b",
1049
+ "cerebras/llama-3.3-70b": "llama-3.3-70b",
1050
+ "llama-3.1-8b": "llama3.1-8b",
1051
+ "llama-3.3-70b": "llama-3.3-70b",
1052
+ "llama3.1-8b": "llama3.1-8b"
1053
+ }
1054
+
1055
+ # Default to the 8B model
1056
+ model = "llama3.1-8b"
1057
+
1058
+ # If the requested model matches any of our mappings, use that instead
1059
+ if requested_model in model_mapping:
1060
+ model = model_mapping[requested_model]
1061
+ elif "3.3" in requested_model or "70b" in requested_model.lower():
1062
+ model = "llama-3.3-70b"
1063
+
1064
  messages = payload.get("messages", [])
1065
  temperature = payload.get("temperature", 0.7)
1066
  max_tokens = payload.get("max_tokens", 1000)
 
 
 
 
 
 
 
 
 
 
 
 
 
1067
 
1068
+ # Try-except block for network issues
1069
  try:
1070
+ headers = {
1071
+ "Content-Type": "application/json",
1072
+ "Authorization": f"Bearer {api_key_override or os.environ.get('CEREBRAS_API_KEY', '')}"
1073
+ }
1074
+
1075
+ data = {
1076
+ "model": model,
1077
+ "messages": messages,
1078
+ "temperature": temperature,
1079
+ "max_tokens": max_tokens
1080
+ }
1081
+
1082
  response = requests.post(
1083
  "https://api.cloud.cerebras.ai/v1/chat/completions",
1084
  headers=headers,
1085
  json=data,
1086
+ timeout=30 # Increased timeout
1087
  )
1088
 
1089
  if response.status_code != 200:
1090
  raise ValueError(f"Cerebras API returned status code {response.status_code}: {response.text}")
1091
 
1092
  return response.json()
1093
+ except requests.exceptions.RequestException as e:
1094
+ # More specific error handling for network issues
1095
+ if "NameResolution" in str(e):
1096
+ raise ValueError(
1097
+ "Unable to connect to the Cerebras API. This might be due to network "
1098
+ "restrictions in your environment. The API requires direct internet access. "
1099
+ "Please try a different provider or check your network settings."
1100
+ )
1101
+ else:
1102
+ raise ValueError(f"Request to Cerebras API failed: {str(e)}")
1103
  except Exception as e:
1104
  logger.error(f"Cerebras API error: {str(e)}")
1105
  raise e