Spaces:

hadadrjt
/

ai

Running

hadadrjt commited on Apr 3

Commit

98abcc7

1 Parent(s): 6785ddc

ai: Better handling of load balancing.

Files changed (1) hide show

jarvis.py CHANGED Viewed

@@ -51,6 +51,8 @@ META_TAGS = os.getenv("META_TAGS")
 ALLOWED_EXTENSIONS = json.loads(os.getenv("ALLOWED_EXTENSIONS"))
 class SessionWithID(requests.Session):
     def __init__(self):
         super().__init__()
@@ -126,6 +128,7 @@ def fetch_response(host, provider_key, selected_model, messages, model_config, s
     return process_ai_response(ai_text)
 def chat_with_model(history, user_input, selected_model_display, sess):
     if not LINUX_SERVER_PROVIDER_KEYS or not LINUX_SERVER_HOSTS:
         return RESPONSES["RESPONSE_3"]
     if not hasattr(sess, "session_id"):
@@ -135,6 +138,11 @@ def chat_with_model(history, user_input, selected_model_display, sess):
     messages = [{"role": "user", "content": user} for user, _ in history]
     messages += [{"role": "assistant", "content": assistant} for _, assistant in history if assistant]
     messages.append({"role": "user", "content": user_input})
     candidates = [(host, key) for host in LINUX_SERVER_HOSTS for key in LINUX_SERVER_PROVIDER_KEYS]
     random.shuffle(candidates)
     with concurrent.futures.ThreadPoolExecutor(max_workers=len(candidates)) as executor:
@@ -142,6 +150,7 @@ def chat_with_model(history, user_input, selected_model_display, sess):
         for future in concurrent.futures.as_completed(futures):
             try:
                 result = future.result()
                 for f in futures:
                     if f is not future:
                         f.cancel()

 ALLOWED_EXTENSIONS = json.loads(os.getenv("ALLOWED_EXTENSIONS"))
+ACTIVE_CANDIDATE = None
 class SessionWithID(requests.Session):
     def __init__(self):
         super().__init__()
     return process_ai_response(ai_text)
 def chat_with_model(history, user_input, selected_model_display, sess):
+    global ACTIVE_CANDIDATE
     if not LINUX_SERVER_PROVIDER_KEYS or not LINUX_SERVER_HOSTS:
         return RESPONSES["RESPONSE_3"]
     if not hasattr(sess, "session_id"):
     messages = [{"role": "user", "content": user} for user, _ in history]
     messages += [{"role": "assistant", "content": assistant} for _, assistant in history if assistant]
     messages.append({"role": "user", "content": user_input})
+    if ACTIVE_CANDIDATE is not None:
+        try:
+            return fetch_response(ACTIVE_CANDIDATE[0], ACTIVE_CANDIDATE[1], selected_model, messages, model_config, sess.session_id)
+        except Exception:
+            ACTIVE_CANDIDATE = None
     candidates = [(host, key) for host in LINUX_SERVER_HOSTS for key in LINUX_SERVER_PROVIDER_KEYS]
     random.shuffle(candidates)
     with concurrent.futures.ThreadPoolExecutor(max_workers=len(candidates)) as executor:
         for future in concurrent.futures.as_completed(futures):
             try:
                 result = future.result()
+                ACTIVE_CANDIDATE = futures[future]
                 for f in futures:
                     if f is not future:
                         f.cancel()