fixup! ai: Better handling of load balancing.
Browse files
jarvis.py
CHANGED
@@ -19,6 +19,7 @@ import io
|
|
19 |
import uuid
|
20 |
import concurrent.futures
|
21 |
import itertools
|
|
|
22 |
|
23 |
from openai import OpenAI
|
24 |
|
@@ -37,14 +38,19 @@ from pptx import Presentation
|
|
37 |
os.system("apt-get update -q -y && apt-get install -q -y tesseract-ocr tesseract-ocr-eng tesseract-ocr-ind libleptonica-dev libtesseract-dev")
|
38 |
|
39 |
LINUX_SERVER_HOSTS = [host for host in json.loads(os.getenv("LINUX_SERVER_HOST", "[]")) if host]
|
|
|
|
|
|
|
40 |
LINUX_SERVER_PROVIDER_KEYS = [key for key in json.loads(os.getenv("LINUX_SERVER_PROVIDER_KEY", "[]")) if key]
|
|
|
|
|
41 |
|
42 |
AI_TYPES = {f"AI_TYPE_{i}": os.getenv(f"AI_TYPE_{i}") for i in range(1, 7)}
|
43 |
RESPONSES = {f"RESPONSE_{i}": os.getenv(f"RESPONSE_{i}") for i in range(1, 10)}
|
44 |
|
45 |
MODEL_MAPPING = json.loads(os.getenv("MODEL_MAPPING", "{}"))
|
46 |
MODEL_CONFIG = json.loads(os.getenv("MODEL_CONFIG", "{}"))
|
47 |
-
MODEL_CHOICES = list(MODEL_MAPPING.values())
|
48 |
DEFAULT_CONFIG = json.loads(os.getenv("DEFAULT_CONFIG", "{}"))
|
49 |
|
50 |
META_TAGS = os.getenv("META_TAGS")
|
@@ -53,6 +59,21 @@ ALLOWED_EXTENSIONS = json.loads(os.getenv("ALLOWED_EXTENSIONS"))
|
|
53 |
|
54 |
ACTIVE_CANDIDATE = None
|
55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
class SessionWithID(requests.Session):
|
57 |
def __init__(self):
|
58 |
super().__init__()
|
@@ -121,15 +142,20 @@ def process_ai_response(ai_text):
|
|
121 |
return ai_text
|
122 |
|
123 |
def fetch_response(host, provider_key, selected_model, messages, model_config, session_id):
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
|
|
|
|
|
|
|
|
|
|
129 |
|
130 |
def chat_with_model(history, user_input, selected_model_display, sess):
|
131 |
global ACTIVE_CANDIDATE
|
132 |
-
if not LINUX_SERVER_PROVIDER_KEYS or not LINUX_SERVER_HOSTS:
|
133 |
return RESPONSES["RESPONSE_3"]
|
134 |
if not hasattr(sess, "session_id"):
|
135 |
sess.session_id = str(uuid.uuid4())
|
@@ -143,7 +169,9 @@ def chat_with_model(history, user_input, selected_model_display, sess):
|
|
143 |
return fetch_response(ACTIVE_CANDIDATE[0], ACTIVE_CANDIDATE[1], selected_model, messages, model_config, sess.session_id)
|
144 |
except Exception:
|
145 |
ACTIVE_CANDIDATE = None
|
146 |
-
|
|
|
|
|
147 |
random.shuffle(candidates)
|
148 |
with concurrent.futures.ThreadPoolExecutor(max_workers=len(candidates)) as executor:
|
149 |
futures = {executor.submit(fetch_response, host, key, selected_model, messages, model_config, sess.session_id): (host, key) for (host, key) in candidates}
|
|
|
19 |
import uuid
|
20 |
import concurrent.futures
|
21 |
import itertools
|
22 |
+
import threading
|
23 |
|
24 |
from openai import OpenAI
|
25 |
|
|
|
38 |
os.system("apt-get update -q -y && apt-get install -q -y tesseract-ocr tesseract-ocr-eng tesseract-ocr-ind libleptonica-dev libtesseract-dev")
|
39 |
|
40 |
LINUX_SERVER_HOSTS = [host for host in json.loads(os.getenv("LINUX_SERVER_HOST", "[]")) if host]
|
41 |
+
LINUX_SERVER_HOSTS_MARKED = set()
|
42 |
+
LINUX_SERVER_HOSTS_ATTEMPTS = {}
|
43 |
+
|
44 |
LINUX_SERVER_PROVIDER_KEYS = [key for key in json.loads(os.getenv("LINUX_SERVER_PROVIDER_KEY", "[]")) if key]
|
45 |
+
LINUX_SERVER_PROVIDER_KEYS_MARKED = set()
|
46 |
+
LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS = {}
|
47 |
|
48 |
AI_TYPES = {f"AI_TYPE_{i}": os.getenv(f"AI_TYPE_{i}") for i in range(1, 7)}
|
49 |
RESPONSES = {f"RESPONSE_{i}": os.getenv(f"RESPONSE_{i}") for i in range(1, 10)}
|
50 |
|
51 |
MODEL_MAPPING = json.loads(os.getenv("MODEL_MAPPING", "{}"))
|
52 |
MODEL_CONFIG = json.loads(os.getenv("MODEL_CONFIG", "{}"))
|
53 |
+
MODEL_CHOICES = list(MODEL_MAPPING.values()) if MODEL_MAPPING else []
|
54 |
DEFAULT_CONFIG = json.loads(os.getenv("DEFAULT_CONFIG", "{}"))
|
55 |
|
56 |
META_TAGS = os.getenv("META_TAGS")
|
|
|
59 |
|
60 |
ACTIVE_CANDIDATE = None
|
61 |
|
62 |
+
def get_available_items(items, marked):
|
63 |
+
available = [item for item in items if item not in marked]
|
64 |
+
random.shuffle(available)
|
65 |
+
return available
|
66 |
+
|
67 |
+
def marked_item(item, marked, attempts):
|
68 |
+
marked.add(item)
|
69 |
+
attempts[item] = attempts.get(item, 0) + 1
|
70 |
+
if attempts[item] >= 3:
|
71 |
+
def remove_fail():
|
72 |
+
marked.discard(item)
|
73 |
+
if item in attempts:
|
74 |
+
del attempts[item]
|
75 |
+
threading.Timer(300, remove_fail).start()
|
76 |
+
|
77 |
class SessionWithID(requests.Session):
|
78 |
def __init__(self):
|
79 |
super().__init__()
|
|
|
142 |
return ai_text
|
143 |
|
144 |
def fetch_response(host, provider_key, selected_model, messages, model_config, session_id):
|
145 |
+
try:
|
146 |
+
client = OpenAI(base_url=host, api_key=provider_key)
|
147 |
+
data = {"model": selected_model, "messages": messages, **model_config}
|
148 |
+
response = client.chat.completions.create(extra_body={"optillm_approach": "rto|re2|cot_reflection|self_consistency|plansearch|leap|z3|bon|moa|mcts|mcp|router|privacy|executecode|json", "session_id": session_id}, **data)
|
149 |
+
ai_text = response.choices[0].message.content if response.choices and response.choices[0].message and response.choices[0].message.content else RESPONSES["RESPONSE_2"]
|
150 |
+
return process_ai_response(ai_text)
|
151 |
+
except Exception:
|
152 |
+
marked_item(provider_key, LINUX_SERVER_PROVIDER_KEYS_MARKED, LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS)
|
153 |
+
marked_item(host, LINUX_SERVER_HOSTS_MARKED, LINUX_SERVER_HOSTS_ATTEMPTS)
|
154 |
+
raise
|
155 |
|
156 |
def chat_with_model(history, user_input, selected_model_display, sess):
|
157 |
global ACTIVE_CANDIDATE
|
158 |
+
if not get_available_items(LINUX_SERVER_PROVIDER_KEYS, LINUX_SERVER_PROVIDER_KEYS_MARKED) or not get_available_items(LINUX_SERVER_HOSTS, LINUX_SERVER_HOSTS_MARKED):
|
159 |
return RESPONSES["RESPONSE_3"]
|
160 |
if not hasattr(sess, "session_id"):
|
161 |
sess.session_id = str(uuid.uuid4())
|
|
|
169 |
return fetch_response(ACTIVE_CANDIDATE[0], ACTIVE_CANDIDATE[1], selected_model, messages, model_config, sess.session_id)
|
170 |
except Exception:
|
171 |
ACTIVE_CANDIDATE = None
|
172 |
+
available_keys = get_available_items(LINUX_SERVER_PROVIDER_KEYS, LINUX_SERVER_PROVIDER_KEYS_MARKED)
|
173 |
+
available_servers = get_available_items(LINUX_SERVER_HOSTS, LINUX_SERVER_HOSTS_MARKED)
|
174 |
+
candidates = [(host, key) for host in available_servers for key in available_keys]
|
175 |
random.shuffle(candidates)
|
176 |
with concurrent.futures.ThreadPoolExecutor(max_workers=len(candidates)) as executor:
|
177 |
futures = {executor.submit(fetch_response, host, key, selected_model, messages, model_config, sess.session_id): (host, key) for (host, key) in candidates}
|