Update app.py
Browse files
app.py
CHANGED
@@ -856,28 +856,11 @@ def call_cohere_api(payload, api_key_override=None):
|
|
856 |
temperature = payload.get("temperature", 0.7)
|
857 |
max_tokens = payload.get("max_tokens", 1000)
|
858 |
|
859 |
-
#
|
860 |
-
#
|
861 |
-
cohere_messages = []
|
862 |
-
for msg in messages:
|
863 |
-
role = msg["role"].upper() # Cohere requires uppercase roles
|
864 |
-
content = msg["content"]
|
865 |
-
|
866 |
-
# Handle multimodal content
|
867 |
-
if isinstance(content, list):
|
868 |
-
text_parts = []
|
869 |
-
for item in content:
|
870 |
-
if item["type"] == "text":
|
871 |
-
text_parts.append(item["text"])
|
872 |
-
content = "\n".join(text_parts)
|
873 |
-
|
874 |
-
cohere_messages.append({"role": role, "content": content})
|
875 |
-
|
876 |
-
# Create chat completion
|
877 |
response = client.chat(
|
878 |
-
message=cohere_messages[-1]["content"] if cohere_messages else "",
|
879 |
-
chat_history=cohere_messages[:-1] if len(cohere_messages) > 1 else [],
|
880 |
model=model,
|
|
|
881 |
temperature=temperature,
|
882 |
max_tokens=max_tokens
|
883 |
)
|
@@ -904,35 +887,23 @@ def call_together_api(payload, api_key_override=None):
|
|
904 |
)
|
905 |
|
906 |
# Extract parameters from payload
|
907 |
-
model = payload.get("model", "meta-llama/Llama-3
|
908 |
|
909 |
-
# Fix model name format - Together API
|
910 |
-
|
911 |
-
|
912 |
-
|
913 |
-
|
914 |
-
model =
|
915 |
|
916 |
-
# Clean up messages - remove any unexpected properties
|
917 |
-
messages = []
|
918 |
-
for msg in payload.get("messages", []):
|
919 |
-
clean_msg = {
|
920 |
-
"role": msg["role"],
|
921 |
-
"content": msg["content"]
|
922 |
-
}
|
923 |
-
messages.append(clean_msg)
|
924 |
-
|
925 |
-
# Create payload
|
926 |
-
together_payload = {
|
927 |
-
"model": model,
|
928 |
-
"messages": messages,
|
929 |
-
"temperature": payload.get("temperature", 0.7),
|
930 |
-
"max_tokens": payload.get("max_tokens", 1000),
|
931 |
-
"stream": payload.get("stream", False)
|
932 |
-
}
|
933 |
-
|
934 |
# Create completion
|
935 |
-
response = client.chat.completions.create(
|
|
|
|
|
|
|
|
|
|
|
|
|
936 |
|
937 |
return response
|
938 |
except Exception as e:
|
@@ -952,33 +923,30 @@ def call_ovh_api(payload, api_key_override=None):
|
|
952 |
"Content-Type": "application/json"
|
953 |
}
|
954 |
|
955 |
-
# Clean up messages - remove any unexpected properties
|
956 |
-
clean_messages = []
|
957 |
-
for msg in messages:
|
958 |
-
clean_msg = {
|
959 |
-
"role": msg["role"],
|
960 |
-
"content": msg["content"]
|
961 |
-
}
|
962 |
-
clean_messages.append(clean_msg)
|
963 |
-
|
964 |
data = {
|
965 |
"model": model,
|
966 |
-
"messages":
|
967 |
"temperature": temperature,
|
968 |
"max_tokens": max_tokens
|
969 |
}
|
970 |
|
971 |
-
#
|
972 |
-
|
973 |
-
|
974 |
-
|
975 |
-
|
976 |
-
|
977 |
-
|
978 |
-
|
979 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
980 |
|
981 |
-
return response.json()
|
982 |
except Exception as e:
|
983 |
logger.error(f"OVH API error: {str(e)}")
|
984 |
raise e
|
@@ -988,38 +956,41 @@ def call_cerebras_api(payload, api_key_override=None):
|
|
988 |
try:
|
989 |
# Extract parameters from payload
|
990 |
model = payload.get("model", "cerebras/llama-3.1-8b")
|
991 |
-
|
992 |
-
|
993 |
-
|
994 |
-
|
995 |
-
|
996 |
-
|
997 |
-
|
998 |
-
}
|
999 |
-
messages.append(clean_msg)
|
1000 |
|
1001 |
data = {
|
1002 |
"model": model,
|
1003 |
"messages": messages,
|
1004 |
-
"temperature":
|
1005 |
-
"max_tokens":
|
1006 |
}
|
1007 |
|
|
|
1008 |
headers = {
|
1009 |
"Content-Type": "application/json",
|
1010 |
-
"Authorization": f"Bearer {
|
1011 |
}
|
1012 |
|
1013 |
-
|
1014 |
-
|
1015 |
-
|
1016 |
-
|
1017 |
-
|
1018 |
-
|
1019 |
-
|
1020 |
-
raise ValueError(f"Cerebras API returned status code {response.status_code}: {response.text}")
|
1021 |
|
1022 |
-
|
|
|
|
|
|
|
|
|
|
|
1023 |
except Exception as e:
|
1024 |
logger.error(f"Cerebras API error: {str(e)}")
|
1025 |
raise e
|
@@ -1027,80 +998,92 @@ def call_cerebras_api(payload, api_key_override=None):
|
|
1027 |
def call_googleai_api(payload, api_key_override=None):
|
1028 |
"""Make a call to Google AI (Gemini) API with error handling"""
|
1029 |
try:
|
1030 |
-
from google.generativeai import configure, GenerativeModel
|
1031 |
-
|
1032 |
api_key = api_key_override if api_key_override else GOOGLEAI_API_KEY
|
1033 |
if not api_key:
|
1034 |
raise ValueError("Google AI API key is required")
|
1035 |
|
1036 |
-
|
|
|
1037 |
|
1038 |
# Extract parameters from payload
|
1039 |
-
model_name = payload.get("model", "gemini-1.5-pro")
|
1040 |
messages = payload.get("messages", [])
|
1041 |
temperature = payload.get("temperature", 0.7)
|
|
|
1042 |
|
1043 |
-
# Convert
|
1044 |
-
|
|
|
|
|
1045 |
for msg in messages:
|
1046 |
role = msg["role"]
|
1047 |
content = msg["content"]
|
1048 |
|
1049 |
-
#
|
1050 |
if role == "system":
|
|
|
1051 |
continue
|
1052 |
-
|
1053 |
-
|
1054 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
1055 |
|
1056 |
-
|
1057 |
-
|
1058 |
-
|
1059 |
-
|
1060 |
-
for item in content:
|
1061 |
-
if item["type"] == "text":
|
1062 |
-
parts.append({"text": item["text"]})
|
1063 |
-
elif item["type"] == "image_url":
|
1064 |
-
image_data = item["image_url"]["url"]
|
1065 |
-
if image_data.startswith("data:"):
|
1066 |
-
# Extract base64 data
|
1067 |
-
mime, base64_data = image_data.split(";base64,")
|
1068 |
-
mime_type = mime.split(":")[1]
|
1069 |
-
parts.append({
|
1070 |
-
"inline_data": {
|
1071 |
-
"mime_type": mime_type,
|
1072 |
-
"data": base64_data
|
1073 |
-
}
|
1074 |
-
})
|
1075 |
-
google_messages.append({"role": gemini_role, "parts": parts})
|
1076 |
-
else:
|
1077 |
-
# Simple text content
|
1078 |
-
google_messages.append({"role": gemini_role, "parts": [{"text": content}]})
|
1079 |
-
|
1080 |
-
# Create Gemini model
|
1081 |
-
model = GenerativeModel(model_name)
|
1082 |
-
|
1083 |
-
# Generate content
|
1084 |
-
response = model.generate_content(
|
1085 |
-
google_messages,
|
1086 |
-
generation_config={
|
1087 |
"temperature": temperature,
|
1088 |
-
"
|
1089 |
-
"
|
1090 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1091 |
)
|
1092 |
|
1093 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1094 |
return {
|
1095 |
"choices": [
|
1096 |
{
|
1097 |
"message": {
|
1098 |
"role": "assistant",
|
1099 |
-
"content":
|
1100 |
}
|
1101 |
}
|
1102 |
]
|
1103 |
}
|
|
|
1104 |
except Exception as e:
|
1105 |
logger.error(f"Google AI API error: {str(e)}")
|
1106 |
raise e
|
|
|
856 |
temperature = payload.get("temperature", 0.7)
|
857 |
max_tokens = payload.get("max_tokens", 1000)
|
858 |
|
859 |
+
# Create chat completion - note the correct format for CohereLLM V2
|
860 |
+
# The ClientV2's chat method expects 'messages' parameter, not 'message'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
861 |
response = client.chat(
|
|
|
|
|
862 |
model=model,
|
863 |
+
messages=messages, # This is directly passed as is
|
864 |
temperature=temperature,
|
865 |
max_tokens=max_tokens
|
866 |
)
|
|
|
887 |
)
|
888 |
|
889 |
# Extract parameters from payload
|
890 |
+
model = payload.get("model", "meta-llama/Meta-Llama-3-8B-Instruct")
|
891 |
|
892 |
+
# Fix model name format - Together API uses a different format
|
893 |
+
# Check documentation for correct model names: https://api.together.ai/models
|
894 |
+
if "llama-3.1" in model.lower():
|
895 |
+
model = "meta-llama/Meta-Llama-3-8B-Instruct"
|
896 |
+
elif "llama-3.3" in model.lower():
|
897 |
+
model = "meta-llama/Meta-Llama-3.3-70B-Instruct"
|
898 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
899 |
# Create completion
|
900 |
+
response = client.chat.completions.create(
|
901 |
+
model=model,
|
902 |
+
messages=payload.get("messages", []),
|
903 |
+
temperature=payload.get("temperature", 0.7),
|
904 |
+
max_tokens=payload.get("max_tokens", 1000),
|
905 |
+
stream=payload.get("stream", False)
|
906 |
+
)
|
907 |
|
908 |
return response
|
909 |
except Exception as e:
|
|
|
923 |
"Content-Type": "application/json"
|
924 |
}
|
925 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
926 |
data = {
|
927 |
"model": model,
|
928 |
+
"messages": messages,
|
929 |
"temperature": temperature,
|
930 |
"max_tokens": max_tokens
|
931 |
}
|
932 |
|
933 |
+
# Use a try-except to handle DNS resolution errors and provide a more helpful message
|
934 |
+
try:
|
935 |
+
# Correct endpoint URL based on documentation
|
936 |
+
response = requests.post(
|
937 |
+
"https://endpoints.ai.cloud.ovh.net/v1/chat/completions", # Updated endpoint
|
938 |
+
headers=headers,
|
939 |
+
json=data,
|
940 |
+
timeout=10 # Add timeout to avoid hanging
|
941 |
+
)
|
942 |
+
|
943 |
+
if response.status_code != 200:
|
944 |
+
raise ValueError(f"OVH API returned status code {response.status_code}: {response.text}")
|
945 |
+
|
946 |
+
return response.json()
|
947 |
+
except requests.exceptions.ConnectionError as e:
|
948 |
+
raise ValueError(f"Connection error to OVH API. This may be due to network restrictions in the environment: {str(e)}")
|
949 |
|
|
|
950 |
except Exception as e:
|
951 |
logger.error(f"OVH API error: {str(e)}")
|
952 |
raise e
|
|
|
956 |
try:
|
957 |
# Extract parameters from payload
|
958 |
model = payload.get("model", "cerebras/llama-3.1-8b")
|
959 |
+
# Strip 'cerebras/' prefix if present
|
960 |
+
if model.startswith("cerebras/"):
|
961 |
+
model = model[9:]
|
962 |
+
|
963 |
+
messages = payload.get("messages", [])
|
964 |
+
temperature = payload.get("temperature", 0.7)
|
965 |
+
max_tokens = payload.get("max_tokens", 1000)
|
|
|
|
|
966 |
|
967 |
data = {
|
968 |
"model": model,
|
969 |
"messages": messages,
|
970 |
+
"temperature": temperature,
|
971 |
+
"max_tokens": max_tokens
|
972 |
}
|
973 |
|
974 |
+
api_key = api_key_override if api_key_override else os.environ.get("CEREBRAS_API_KEY", "")
|
975 |
headers = {
|
976 |
"Content-Type": "application/json",
|
977 |
+
"Authorization": f"Bearer {api_key}"
|
978 |
}
|
979 |
|
980 |
+
try:
|
981 |
+
response = requests.post(
|
982 |
+
"https://api.cloud.cerebras.ai/v1/chat/completions",
|
983 |
+
headers=headers,
|
984 |
+
json=data,
|
985 |
+
timeout=10 # Add timeout to avoid hanging
|
986 |
+
)
|
|
|
987 |
|
988 |
+
if response.status_code != 200:
|
989 |
+
raise ValueError(f"Cerebras API returned status code {response.status_code}: {response.text}")
|
990 |
+
|
991 |
+
return response.json()
|
992 |
+
except requests.exceptions.ConnectionError as e:
|
993 |
+
raise ValueError(f"Connection error to Cerebras API. This may be due to network restrictions in the environment: {str(e)}")
|
994 |
except Exception as e:
|
995 |
logger.error(f"Cerebras API error: {str(e)}")
|
996 |
raise e
|
|
|
998 |
def call_googleai_api(payload, api_key_override=None):
|
999 |
"""Make a call to Google AI (Gemini) API with error handling"""
|
1000 |
try:
|
|
|
|
|
1001 |
api_key = api_key_override if api_key_override else GOOGLEAI_API_KEY
|
1002 |
if not api_key:
|
1003 |
raise ValueError("Google AI API key is required")
|
1004 |
|
1005 |
+
# Use regular requests instead of the SDK since it might be missing
|
1006 |
+
gemini_api_url = "https://generativelanguage.googleapis.com/v1/models/gemini-1.5-pro:generateContent"
|
1007 |
|
1008 |
# Extract parameters from payload
|
|
|
1009 |
messages = payload.get("messages", [])
|
1010 |
temperature = payload.get("temperature", 0.7)
|
1011 |
+
max_tokens = payload.get("max_tokens", 1000)
|
1012 |
|
1013 |
+
# Convert to Google's format
|
1014 |
+
content_parts = []
|
1015 |
+
|
1016 |
+
# Add all messages
|
1017 |
for msg in messages:
|
1018 |
role = msg["role"]
|
1019 |
content = msg["content"]
|
1020 |
|
1021 |
+
# Handle different roles
|
1022 |
if role == "system":
|
1023 |
+
# For system messages, we add it as part of the first user message
|
1024 |
continue
|
1025 |
+
elif role == "user":
|
1026 |
+
# For user messages, add as regular content
|
1027 |
+
if isinstance(content, str):
|
1028 |
+
content_parts.append({"text": content})
|
1029 |
+
else:
|
1030 |
+
# Handle multimodal content
|
1031 |
+
for item in content:
|
1032 |
+
if item["type"] == "text":
|
1033 |
+
content_parts.append({"text": item["text"]})
|
1034 |
|
1035 |
+
# Form the request data
|
1036 |
+
data = {
|
1037 |
+
"contents": [{"parts": content_parts}],
|
1038 |
+
"generationConfig": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1039 |
"temperature": temperature,
|
1040 |
+
"maxOutputTokens": max_tokens,
|
1041 |
+
"topP": payload.get("top_p", 0.95),
|
1042 |
}
|
1043 |
+
}
|
1044 |
+
|
1045 |
+
headers = {
|
1046 |
+
"Content-Type": "application/json",
|
1047 |
+
"x-goog-api-key": api_key
|
1048 |
+
}
|
1049 |
+
|
1050 |
+
# Make the request
|
1051 |
+
response = requests.post(
|
1052 |
+
gemini_api_url,
|
1053 |
+
headers=headers,
|
1054 |
+
json=data,
|
1055 |
+
timeout=30
|
1056 |
)
|
1057 |
|
1058 |
+
if response.status_code != 200:
|
1059 |
+
error_msg = f"Google AI API error: {response.status_code} - {response.text}"
|
1060 |
+
logger.error(error_msg)
|
1061 |
+
raise ValueError(error_msg)
|
1062 |
+
|
1063 |
+
# Parse response and convert to standard format
|
1064 |
+
result = response.json()
|
1065 |
+
text_content = ""
|
1066 |
+
|
1067 |
+
# Extract text from response
|
1068 |
+
if "candidates" in result and len(result["candidates"]) > 0:
|
1069 |
+
candidate = result["candidates"][0]
|
1070 |
+
if "content" in candidate and "parts" in candidate["content"]:
|
1071 |
+
for part in candidate["content"]["parts"]:
|
1072 |
+
if "text" in part:
|
1073 |
+
text_content += part["text"]
|
1074 |
+
|
1075 |
+
# Create a standardized response format
|
1076 |
return {
|
1077 |
"choices": [
|
1078 |
{
|
1079 |
"message": {
|
1080 |
"role": "assistant",
|
1081 |
+
"content": text_content
|
1082 |
}
|
1083 |
}
|
1084 |
]
|
1085 |
}
|
1086 |
+
|
1087 |
except Exception as e:
|
1088 |
logger.error(f"Google AI API error: {str(e)}")
|
1089 |
raise e
|