cstr commited on
Commit
a84b4e4
·
verified ·
1 Parent(s): 7d23974

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +115 -132
app.py CHANGED
@@ -856,28 +856,11 @@ def call_cohere_api(payload, api_key_override=None):
856
  temperature = payload.get("temperature", 0.7)
857
  max_tokens = payload.get("max_tokens", 1000)
858
 
859
- # Transform messages to Cohere format - IMPORTANT
860
- # Cohere uses specific role names: USER, ASSISTANT, SYSTEM, TOOL
861
- cohere_messages = []
862
- for msg in messages:
863
- role = msg["role"].upper() # Cohere requires uppercase roles
864
- content = msg["content"]
865
-
866
- # Handle multimodal content
867
- if isinstance(content, list):
868
- text_parts = []
869
- for item in content:
870
- if item["type"] == "text":
871
- text_parts.append(item["text"])
872
- content = "\n".join(text_parts)
873
-
874
- cohere_messages.append({"role": role, "content": content})
875
-
876
- # Create chat completion
877
  response = client.chat(
878
- message=cohere_messages[-1]["content"] if cohere_messages else "",
879
- chat_history=cohere_messages[:-1] if len(cohere_messages) > 1 else [],
880
  model=model,
 
881
  temperature=temperature,
882
  max_tokens=max_tokens
883
  )
@@ -904,35 +887,23 @@ def call_together_api(payload, api_key_override=None):
904
  )
905
 
906
  # Extract parameters from payload
907
- model = payload.get("model", "meta-llama/Llama-3.1-8B-Instruct")
908
 
909
- # Fix model name format - Together API expects this format
910
- if not model.startswith("meta-llama/") and "llama" in model.lower():
911
- # Convert model ID format from "llama-3.1-8b-instruct" to "meta-llama/Llama-3.1-8B-Instruct"
912
- parts = model.split("-")
913
- formatted_name = "meta-llama/L" + "".join([p.capitalize() for p in parts])
914
- model = formatted_name
915
 
916
- # Clean up messages - remove any unexpected properties
917
- messages = []
918
- for msg in payload.get("messages", []):
919
- clean_msg = {
920
- "role": msg["role"],
921
- "content": msg["content"]
922
- }
923
- messages.append(clean_msg)
924
-
925
- # Create payload
926
- together_payload = {
927
- "model": model,
928
- "messages": messages,
929
- "temperature": payload.get("temperature", 0.7),
930
- "max_tokens": payload.get("max_tokens", 1000),
931
- "stream": payload.get("stream", False)
932
- }
933
-
934
  # Create completion
935
- response = client.chat.completions.create(**together_payload)
 
 
 
 
 
 
936
 
937
  return response
938
  except Exception as e:
@@ -952,33 +923,30 @@ def call_ovh_api(payload, api_key_override=None):
952
  "Content-Type": "application/json"
953
  }
954
 
955
- # Clean up messages - remove any unexpected properties
956
- clean_messages = []
957
- for msg in messages:
958
- clean_msg = {
959
- "role": msg["role"],
960
- "content": msg["content"]
961
- }
962
- clean_messages.append(clean_msg)
963
-
964
  data = {
965
  "model": model,
966
- "messages": clean_messages,
967
  "temperature": temperature,
968
  "max_tokens": max_tokens
969
  }
970
 
971
- # Updated endpoint with correct path
972
- response = requests.post(
973
- "https://api.ai.cloud.ovh.net/v1/chat/completions",
974
- headers=headers,
975
- json=data
976
- )
977
-
978
- if response.status_code != 200:
979
- raise ValueError(f"OVH API returned status code {response.status_code}: {response.text}")
 
 
 
 
 
 
 
980
 
981
- return response.json()
982
  except Exception as e:
983
  logger.error(f"OVH API error: {str(e)}")
984
  raise e
@@ -988,38 +956,41 @@ def call_cerebras_api(payload, api_key_override=None):
988
  try:
989
  # Extract parameters from payload
990
  model = payload.get("model", "cerebras/llama-3.1-8b")
991
-
992
- # Clean up messages - remove any unexpected properties
993
- messages = []
994
- for msg in payload.get("messages", []):
995
- clean_msg = {
996
- "role": msg["role"],
997
- "content": msg["content"]
998
- }
999
- messages.append(clean_msg)
1000
 
1001
  data = {
1002
  "model": model,
1003
  "messages": messages,
1004
- "temperature": payload.get("temperature", 0.7),
1005
- "max_tokens": payload.get("max_tokens", 1000)
1006
  }
1007
 
 
1008
  headers = {
1009
  "Content-Type": "application/json",
1010
- "Authorization": f"Bearer {api_key_override or os.environ.get('CEREBRAS_API_KEY', '')}"
1011
  }
1012
 
1013
- response = requests.post(
1014
- "https://api.cloud.cerebras.ai/v1/chat/completions",
1015
- headers=headers,
1016
- json=data
1017
- )
1018
-
1019
- if response.status_code != 200:
1020
- raise ValueError(f"Cerebras API returned status code {response.status_code}: {response.text}")
1021
 
1022
- return response.json()
 
 
 
 
 
1023
  except Exception as e:
1024
  logger.error(f"Cerebras API error: {str(e)}")
1025
  raise e
@@ -1027,80 +998,92 @@ def call_cerebras_api(payload, api_key_override=None):
1027
  def call_googleai_api(payload, api_key_override=None):
1028
  """Make a call to Google AI (Gemini) API with error handling"""
1029
  try:
1030
- from google.generativeai import configure, GenerativeModel
1031
-
1032
  api_key = api_key_override if api_key_override else GOOGLEAI_API_KEY
1033
  if not api_key:
1034
  raise ValueError("Google AI API key is required")
1035
 
1036
- configure(api_key=api_key)
 
1037
 
1038
  # Extract parameters from payload
1039
- model_name = payload.get("model", "gemini-1.5-pro")
1040
  messages = payload.get("messages", [])
1041
  temperature = payload.get("temperature", 0.7)
 
1042
 
1043
- # Convert messages to Google AI format
1044
- google_messages = []
 
 
1045
  for msg in messages:
1046
  role = msg["role"]
1047
  content = msg["content"]
1048
 
1049
- # Skip system messages for now (Gemini doesn't support them directly)
1050
  if role == "system":
 
1051
  continue
1052
-
1053
- # Map user/assistant roles to Google's roles
1054
- gemini_role = "user" if role == "user" else "model"
 
 
 
 
 
 
1055
 
1056
- # Process content (text or multimodal)
1057
- if isinstance(content, list):
1058
- # Multimodal content handling for Gemini
1059
- parts = []
1060
- for item in content:
1061
- if item["type"] == "text":
1062
- parts.append({"text": item["text"]})
1063
- elif item["type"] == "image_url":
1064
- image_data = item["image_url"]["url"]
1065
- if image_data.startswith("data:"):
1066
- # Extract base64 data
1067
- mime, base64_data = image_data.split(";base64,")
1068
- mime_type = mime.split(":")[1]
1069
- parts.append({
1070
- "inline_data": {
1071
- "mime_type": mime_type,
1072
- "data": base64_data
1073
- }
1074
- })
1075
- google_messages.append({"role": gemini_role, "parts": parts})
1076
- else:
1077
- # Simple text content
1078
- google_messages.append({"role": gemini_role, "parts": [{"text": content}]})
1079
-
1080
- # Create Gemini model
1081
- model = GenerativeModel(model_name)
1082
-
1083
- # Generate content
1084
- response = model.generate_content(
1085
- google_messages,
1086
- generation_config={
1087
  "temperature": temperature,
1088
- "max_output_tokens": payload.get("max_tokens", 1000),
1089
- "top_p": payload.get("top_p", 0.95),
1090
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
1091
  )
1092
 
1093
- # Convert response to standard format
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1094
  return {
1095
  "choices": [
1096
  {
1097
  "message": {
1098
  "role": "assistant",
1099
- "content": response.text
1100
  }
1101
  }
1102
  ]
1103
  }
 
1104
  except Exception as e:
1105
  logger.error(f"Google AI API error: {str(e)}")
1106
  raise e
 
856
  temperature = payload.get("temperature", 0.7)
857
  max_tokens = payload.get("max_tokens", 1000)
858
 
859
+ # Create chat completion - note the correct format for CohereLLM V2
860
+ # The ClientV2's chat method expects 'messages' parameter, not 'message'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
861
  response = client.chat(
 
 
862
  model=model,
863
+ messages=messages, # This is directly passed as is
864
  temperature=temperature,
865
  max_tokens=max_tokens
866
  )
 
887
  )
888
 
889
  # Extract parameters from payload
890
+ model = payload.get("model", "meta-llama/Meta-Llama-3-8B-Instruct")
891
 
892
+ # Fix model name format - Together API uses a different format
893
+ # Check documentation for correct model names: https://api.together.ai/models
894
+ if "llama-3.1" in model.lower():
895
+ model = "meta-llama/Meta-Llama-3-8B-Instruct"
896
+ elif "llama-3.3" in model.lower():
897
+ model = "meta-llama/Meta-Llama-3.3-70B-Instruct"
898
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
899
  # Create completion
900
+ response = client.chat.completions.create(
901
+ model=model,
902
+ messages=payload.get("messages", []),
903
+ temperature=payload.get("temperature", 0.7),
904
+ max_tokens=payload.get("max_tokens", 1000),
905
+ stream=payload.get("stream", False)
906
+ )
907
 
908
  return response
909
  except Exception as e:
 
923
  "Content-Type": "application/json"
924
  }
925
 
 
 
 
 
 
 
 
 
 
926
  data = {
927
  "model": model,
928
+ "messages": messages,
929
  "temperature": temperature,
930
  "max_tokens": max_tokens
931
  }
932
 
933
+ # Use a try-except to handle DNS resolution errors and provide a more helpful message
934
+ try:
935
+ # Correct endpoint URL based on documentation
936
+ response = requests.post(
937
+ "https://endpoints.ai.cloud.ovh.net/v1/chat/completions", # Updated endpoint
938
+ headers=headers,
939
+ json=data,
940
+ timeout=10 # Add timeout to avoid hanging
941
+ )
942
+
943
+ if response.status_code != 200:
944
+ raise ValueError(f"OVH API returned status code {response.status_code}: {response.text}")
945
+
946
+ return response.json()
947
+ except requests.exceptions.ConnectionError as e:
948
+ raise ValueError(f"Connection error to OVH API. This may be due to network restrictions in the environment: {str(e)}")
949
 
 
950
  except Exception as e:
951
  logger.error(f"OVH API error: {str(e)}")
952
  raise e
 
956
  try:
957
  # Extract parameters from payload
958
  model = payload.get("model", "cerebras/llama-3.1-8b")
959
+ # Strip 'cerebras/' prefix if present
960
+ if model.startswith("cerebras/"):
961
+ model = model[9:]
962
+
963
+ messages = payload.get("messages", [])
964
+ temperature = payload.get("temperature", 0.7)
965
+ max_tokens = payload.get("max_tokens", 1000)
 
 
966
 
967
  data = {
968
  "model": model,
969
  "messages": messages,
970
+ "temperature": temperature,
971
+ "max_tokens": max_tokens
972
  }
973
 
974
+ api_key = api_key_override if api_key_override else os.environ.get("CEREBRAS_API_KEY", "")
975
  headers = {
976
  "Content-Type": "application/json",
977
+ "Authorization": f"Bearer {api_key}"
978
  }
979
 
980
+ try:
981
+ response = requests.post(
982
+ "https://api.cloud.cerebras.ai/v1/chat/completions",
983
+ headers=headers,
984
+ json=data,
985
+ timeout=10 # Add timeout to avoid hanging
986
+ )
 
987
 
988
+ if response.status_code != 200:
989
+ raise ValueError(f"Cerebras API returned status code {response.status_code}: {response.text}")
990
+
991
+ return response.json()
992
+ except requests.exceptions.ConnectionError as e:
993
+ raise ValueError(f"Connection error to Cerebras API. This may be due to network restrictions in the environment: {str(e)}")
994
  except Exception as e:
995
  logger.error(f"Cerebras API error: {str(e)}")
996
  raise e
 
998
  def call_googleai_api(payload, api_key_override=None):
999
  """Make a call to Google AI (Gemini) API with error handling"""
1000
  try:
 
 
1001
  api_key = api_key_override if api_key_override else GOOGLEAI_API_KEY
1002
  if not api_key:
1003
  raise ValueError("Google AI API key is required")
1004
 
1005
+ # Use regular requests instead of the SDK since it might be missing
1006
+ gemini_api_url = "https://generativelanguage.googleapis.com/v1/models/gemini-1.5-pro:generateContent"
1007
 
1008
  # Extract parameters from payload
 
1009
  messages = payload.get("messages", [])
1010
  temperature = payload.get("temperature", 0.7)
1011
+ max_tokens = payload.get("max_tokens", 1000)
1012
 
1013
+ # Convert to Google's format
1014
+ content_parts = []
1015
+
1016
+ # Add all messages
1017
  for msg in messages:
1018
  role = msg["role"]
1019
  content = msg["content"]
1020
 
1021
+ # Handle different roles
1022
  if role == "system":
1023
+ # For system messages, we add it as part of the first user message
1024
  continue
1025
+ elif role == "user":
1026
+ # For user messages, add as regular content
1027
+ if isinstance(content, str):
1028
+ content_parts.append({"text": content})
1029
+ else:
1030
+ # Handle multimodal content
1031
+ for item in content:
1032
+ if item["type"] == "text":
1033
+ content_parts.append({"text": item["text"]})
1034
 
1035
+ # Form the request data
1036
+ data = {
1037
+ "contents": [{"parts": content_parts}],
1038
+ "generationConfig": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1039
  "temperature": temperature,
1040
+ "maxOutputTokens": max_tokens,
1041
+ "topP": payload.get("top_p", 0.95),
1042
  }
1043
+ }
1044
+
1045
+ headers = {
1046
+ "Content-Type": "application/json",
1047
+ "x-goog-api-key": api_key
1048
+ }
1049
+
1050
+ # Make the request
1051
+ response = requests.post(
1052
+ gemini_api_url,
1053
+ headers=headers,
1054
+ json=data,
1055
+ timeout=30
1056
  )
1057
 
1058
+ if response.status_code != 200:
1059
+ error_msg = f"Google AI API error: {response.status_code} - {response.text}"
1060
+ logger.error(error_msg)
1061
+ raise ValueError(error_msg)
1062
+
1063
+ # Parse response and convert to standard format
1064
+ result = response.json()
1065
+ text_content = ""
1066
+
1067
+ # Extract text from response
1068
+ if "candidates" in result and len(result["candidates"]) > 0:
1069
+ candidate = result["candidates"][0]
1070
+ if "content" in candidate and "parts" in candidate["content"]:
1071
+ for part in candidate["content"]["parts"]:
1072
+ if "text" in part:
1073
+ text_content += part["text"]
1074
+
1075
+ # Create a standardized response format
1076
  return {
1077
  "choices": [
1078
  {
1079
  "message": {
1080
  "role": "assistant",
1081
+ "content": text_content
1082
  }
1083
  }
1084
  ]
1085
  }
1086
+
1087
  except Exception as e:
1088
  logger.error(f"Google AI API error: {str(e)}")
1089
  raise e