ai: Better handling of load balancing.
Browse files
jarvis.py
CHANGED
@@ -51,6 +51,8 @@ META_TAGS = os.getenv("META_TAGS")
|
|
51 |
|
52 |
ALLOWED_EXTENSIONS = json.loads(os.getenv("ALLOWED_EXTENSIONS"))
|
53 |
|
|
|
|
|
54 |
class SessionWithID(requests.Session):
|
55 |
def __init__(self):
|
56 |
super().__init__()
|
@@ -126,6 +128,7 @@ def fetch_response(host, provider_key, selected_model, messages, model_config, s
|
|
126 |
return process_ai_response(ai_text)
|
127 |
|
128 |
def chat_with_model(history, user_input, selected_model_display, sess):
|
|
|
129 |
if not LINUX_SERVER_PROVIDER_KEYS or not LINUX_SERVER_HOSTS:
|
130 |
return RESPONSES["RESPONSE_3"]
|
131 |
if not hasattr(sess, "session_id"):
|
@@ -135,6 +138,11 @@ def chat_with_model(history, user_input, selected_model_display, sess):
|
|
135 |
messages = [{"role": "user", "content": user} for user, _ in history]
|
136 |
messages += [{"role": "assistant", "content": assistant} for _, assistant in history if assistant]
|
137 |
messages.append({"role": "user", "content": user_input})
|
|
|
|
|
|
|
|
|
|
|
138 |
candidates = [(host, key) for host in LINUX_SERVER_HOSTS for key in LINUX_SERVER_PROVIDER_KEYS]
|
139 |
random.shuffle(candidates)
|
140 |
with concurrent.futures.ThreadPoolExecutor(max_workers=len(candidates)) as executor:
|
@@ -142,6 +150,7 @@ def chat_with_model(history, user_input, selected_model_display, sess):
|
|
142 |
for future in concurrent.futures.as_completed(futures):
|
143 |
try:
|
144 |
result = future.result()
|
|
|
145 |
for f in futures:
|
146 |
if f is not future:
|
147 |
f.cancel()
|
|
|
51 |
|
52 |
ALLOWED_EXTENSIONS = json.loads(os.getenv("ALLOWED_EXTENSIONS"))
|
53 |
|
54 |
+
ACTIVE_CANDIDATE = None
|
55 |
+
|
56 |
class SessionWithID(requests.Session):
|
57 |
def __init__(self):
|
58 |
super().__init__()
|
|
|
128 |
return process_ai_response(ai_text)
|
129 |
|
130 |
def chat_with_model(history, user_input, selected_model_display, sess):
|
131 |
+
global ACTIVE_CANDIDATE
|
132 |
if not LINUX_SERVER_PROVIDER_KEYS or not LINUX_SERVER_HOSTS:
|
133 |
return RESPONSES["RESPONSE_3"]
|
134 |
if not hasattr(sess, "session_id"):
|
|
|
138 |
messages = [{"role": "user", "content": user} for user, _ in history]
|
139 |
messages += [{"role": "assistant", "content": assistant} for _, assistant in history if assistant]
|
140 |
messages.append({"role": "user", "content": user_input})
|
141 |
+
if ACTIVE_CANDIDATE is not None:
|
142 |
+
try:
|
143 |
+
return fetch_response(ACTIVE_CANDIDATE[0], ACTIVE_CANDIDATE[1], selected_model, messages, model_config, sess.session_id)
|
144 |
+
except Exception:
|
145 |
+
ACTIVE_CANDIDATE = None
|
146 |
candidates = [(host, key) for host in LINUX_SERVER_HOSTS for key in LINUX_SERVER_PROVIDER_KEYS]
|
147 |
random.shuffle(candidates)
|
148 |
with concurrent.futures.ThreadPoolExecutor(max_workers=len(candidates)) as executor:
|
|
|
150 |
for future in concurrent.futures.as_completed(futures):
|
151 |
try:
|
152 |
result = future.result()
|
153 |
+
ACTIVE_CANDIDATE = futures[future]
|
154 |
for f in futures:
|
155 |
if f is not future:
|
156 |
f.cancel()
|