hadadrjt commited on
Commit
3ef53bc
·
1 Parent(s): 23e5505

ai: Refactor the code for JARVI multi platform.

Browse files
Files changed (1) hide show
  1. jarvis.py +108 -109
jarvis.py CHANGED
@@ -26,23 +26,27 @@ os.system("apt-get update -q -y && apt-get install -q -y tesseract-ocr tesseract
26
  INTERNAL_AI_GET_SERVER = os.getenv("INTERNAL_AI_GET_SERVER")
27
  INTERNAL_TRAINING_DATA = os.getenv("INTERNAL_TRAINING_DATA")
28
 
29
- LINUX_SERVER_HOSTS = [host for host in json.loads(os.getenv("LINUX_SERVER_HOST", "[]")) if host]
 
 
 
30
  LINUX_SERVER_HOSTS_MARKED = set()
31
  LINUX_SERVER_HOSTS_ATTEMPTS = {}
32
 
33
- LINUX_SERVER_PROVIDER_KEYS = [key for key in json.loads(os.getenv("LINUX_SERVER_PROVIDER_KEY", "[]")) if key]
34
  LINUX_SERVER_PROVIDER_KEYS_MARKED = set()
35
  LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS = {}
36
 
37
- LINUX_SERVER_ERRORS = set(map(int, os.getenv("LINUX_SERVER_ERROR").split(",")))
38
 
39
- AI_TYPES = {f"AI_TYPE_{i}": os.getenv(f"AI_TYPE_{i}") for i in range(1, 7)}
40
  RESPONSES = {f"RESPONSE_{i}": os.getenv(f"RESPONSE_{i}") for i in range(1, 10)}
41
 
42
  MODEL_MAPPING = json.loads(os.getenv("MODEL_MAPPING", "{}"))
43
  MODEL_CONFIG = json.loads(os.getenv("MODEL_CONFIG", "{}"))
44
- MODEL_CHOICES = list(MODEL_MAPPING.values()) if MODEL_MAPPING else []
45
  DEFAULT_CONFIG = json.loads(os.getenv("DEFAULT_CONFIG", "{}"))
 
46
 
47
  META_TAGS = os.getenv("META_TAGS")
48
 
@@ -50,157 +54,152 @@ ALLOWED_EXTENSIONS = json.loads(os.getenv("ALLOWED_EXTENSIONS", "[]"))
50
 
51
  ACTIVE_CANDIDATE = None
52
 
 
 
 
 
 
 
 
 
53
  def get_available_items(items, marked):
54
- available = [item for item in items if item not in marked]
55
- random.shuffle(available)
56
- return available
57
 
58
  def marked_item(item, marked, attempts):
59
  marked.add(item)
60
  attempts[item] = attempts.get(item, 0) + 1
61
  if attempts[item] >= 3:
62
- def remove_fail():
63
  marked.discard(item)
64
  attempts.pop(item, None)
65
- threading.Timer(300, remove_fail).start()
66
-
67
- class SessionWithID(requests.Session):
68
- def __init__(self):
69
- super().__init__()
70
- self.session_id = str(uuid.uuid4())
71
-
72
- def create_session():
73
- return SessionWithID()
74
 
75
- def get_model_key(display_name):
76
- return next((k for k, v in MODEL_MAPPING.items() if v == display_name), list(MODEL_MAPPING.keys())[0] if MODEL_MAPPING else MODEL_CHOICES[0])
77
 
78
- def extract_file_content(file_path):
79
- ext = Path(file_path).suffix.lower()
80
- content = ""
81
  try:
82
  if ext == ".pdf":
83
- with pdfplumber.open(file_path) as pdf:
84
- for page in pdf.pages:
85
- text = page.extract_text()
86
- if text:
87
- content += text + "\n"
88
- for table in page.extract_tables():
89
- table_str = "\n".join([", ".join(row) for row in table if row])
90
- content += "\n" + table_str + "\n"
91
  elif ext in [".doc", ".docx"]:
92
- doc = docx.Document(file_path)
93
- for para in doc.paragraphs:
94
- content += para.text + "\n"
95
  elif ext in [".xlsx", ".xls"]:
96
- df = pd.read_excel(file_path)
97
- content += df.to_csv(index=False)
98
  elif ext in [".ppt", ".pptx"]:
99
- prs = Presentation(file_path)
100
- for slide in prs.slides:
101
- for shape in slide.shapes:
102
- if hasattr(shape, "text") and shape.text:
103
- content += shape.text + "\n"
104
- elif ext in [".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".gif", ".webp"]:
105
- pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
106
- image = Image.open(file_path)
107
- content += pytesseract.image_to_string(image) + "\n"
108
  else:
109
- content = Path(file_path).read_text(encoding="utf-8")
110
  except Exception as e:
111
- content = f"{file_path}: {e}"
112
- return content.strip()
113
 
114
- async def fetch_response_async(host, provider_key, selected_model, messages, model_config, session_id):
115
- timeouts = [60, 80, 120, 240]
116
- for timeout in timeouts:
117
  try:
118
- async with httpx.AsyncClient(timeout=timeout) as client:
119
- resp = await client.post(host, json={"model": selected_model, "messages": messages, **model_config, "session_id": session_id}, headers={"Authorization": f"Bearer {provider_key}"})
120
- if resp.status_code in LINUX_SERVER_ERRORS:
121
- marked_item(provider_key, LINUX_SERVER_PROVIDER_KEYS_MARKED, LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS)
122
  return None
123
- resp.raise_for_status()
124
- resp_json = resp.json()
125
- if isinstance(resp_json, dict) and resp_json.get("choices"):
126
- choice = resp_json["choices"][0]
127
- if choice.get("message") and isinstance(choice["message"].get("content"), str):
128
- return choice["message"]["content"]
129
  return None
130
- except Exception:
131
  continue
132
- marked_item(provider_key, LINUX_SERVER_PROVIDER_KEYS_MARKED, LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS)
133
  return None
134
 
135
- async def chat_with_model_async(history, user_input, selected_model_display, sess):
136
- if not get_available_items(LINUX_SERVER_PROVIDER_KEYS, LINUX_SERVER_PROVIDER_KEYS_MARKED) or not get_available_items(LINUX_SERVER_HOSTS, LINUX_SERVER_HOSTS_MARKED):
137
  return RESPONSES["RESPONSE_3"]
138
  if not hasattr(sess, "session_id"):
139
  sess.session_id = str(uuid.uuid4())
140
- selected_model = get_model_key(selected_model_display)
141
- model_config = MODEL_CONFIG.get(selected_model, DEFAULT_CONFIG)
142
- messages = [{"role": "user", "content": user} for user, _ in history] + [{"role": "assistant", "content": assistant} for _, assistant in history if assistant]
143
- if INTERNAL_TRAINING_DATA and MODEL_CHOICES and selected_model_display == MODEL_CHOICES[0]:
144
- messages.insert(0, {"role": "system", "content": INTERNAL_TRAINING_DATA})
145
- messages.append({"role": "user", "content": user_input})
 
 
 
146
  global ACTIVE_CANDIDATE
147
  if ACTIVE_CANDIDATE:
148
- result = await fetch_response_async(ACTIVE_CANDIDATE[0], ACTIVE_CANDIDATE[1], selected_model, messages, model_config, sess.session_id)
149
- if result:
150
- return result
151
  ACTIVE_CANDIDATE = None
152
  keys = get_available_items(LINUX_SERVER_PROVIDER_KEYS, LINUX_SERVER_PROVIDER_KEYS_MARKED)
153
- hosts = get_available_items(LINUX_SERVER_HOSTS, LINUX_SERVER_HOSTS_MARKED)
154
- candidates = [(host, key) for host in hosts for key in keys]
155
- random.shuffle(candidates)
156
- for host, key in candidates:
157
- result = await fetch_response_async(host, key, selected_model, messages, model_config, sess.session_id)
158
- if result:
159
- ACTIVE_CANDIDATE = (host, key)
160
- return result
161
  return RESPONSES["RESPONSE_2"]
162
 
163
- async def respond_async(multi_input, history, selected_model_display, sess):
164
- message = {"text": multi_input.get("text", "").strip(), "files": multi_input.get("files", [])}
165
- if not message["text"] and not message["files"]:
166
  yield history, gr.MultimodalTextbox(value=None, interactive=True), sess
167
  return
168
- combined_input = ""
169
- for file_item in message["files"]:
170
- file_path = file_item["name"] if isinstance(file_item, dict) and "name" in file_item else file_item
171
- combined_input += f"{Path(file_path).name}\n\n{extract_file_content(file_path)}\n\n"
172
- if message["text"]:
173
- combined_input += message["text"]
174
- history.append([combined_input, ""])
175
- ai_response = await chat_with_model_async(history, combined_input, selected_model_display, sess)
176
  history[-1][1] = ""
177
- def convert_to_string(data):
178
- if isinstance(data, (str, int, float)):
179
- return str(data)
180
- if isinstance(data, bytes):
181
- return data.decode("utf-8", errors="ignore")
182
- if isinstance(data, (list, tuple)):
183
- return "".join(map(convert_to_string, data))
184
- if isinstance(data, dict):
185
- return json.dumps(data, ensure_ascii=False)
186
- return repr(data)
187
- for character in ai_response:
188
- history[-1][1] += convert_to_string(character)
189
  await asyncio.sleep(0.0001)
190
  yield history, gr.MultimodalTextbox(value=None, interactive=True), sess
191
 
192
- def change_model(new_model_display):
193
- return [], create_session(), new_model_display
 
 
194
 
195
  with gr.Blocks(fill_height=True, fill_width=True, title=AI_TYPES["AI_TYPE_4"], head=META_TAGS) as jarvis:
196
  user_history = gr.State([])
197
  user_session = gr.State(create_session())
198
  selected_model = gr.State(MODEL_CHOICES[0] if MODEL_CHOICES else "")
 
199
  chatbot = gr.Chatbot(label=AI_TYPES["AI_TYPE_1"], show_copy_button=True, scale=1, elem_id=AI_TYPES["AI_TYPE_2"])
200
  with gr.Row():
201
  msg = gr.MultimodalTextbox(show_label=False, placeholder=RESPONSES["RESPONSE_5"], interactive=True, file_count="single", file_types=ALLOWED_EXTENSIONS)
202
  with gr.Accordion(AI_TYPES["AI_TYPE_6"], open=False):
203
  model_dropdown = gr.Dropdown(show_label=False, choices=MODEL_CHOICES, value=MODEL_CHOICES[0])
204
- model_dropdown.change(fn=change_model, inputs=[model_dropdown], outputs=[user_history, user_session, selected_model])
205
- msg.submit(fn=respond_async, inputs=[msg, user_history, selected_model, user_session], outputs=[chatbot, msg, user_session], api_name=INTERNAL_AI_GET_SERVER)
 
 
206
  jarvis.launch(max_file_size="1mb")
 
26
  INTERNAL_AI_GET_SERVER = os.getenv("INTERNAL_AI_GET_SERVER")
27
  INTERNAL_TRAINING_DATA = os.getenv("INTERNAL_TRAINING_DATA")
28
 
29
+ SYSTEM_PROMPT_MAPPING = json.loads(os.getenv("SYSTEM_PROMPT_MAPPING", "{}"))
30
+ SYSTEM_PROMPT_DEFAULT = os.getenv("DEFAULT_SYSTEM")
31
+
32
+ LINUX_SERVER_HOSTS = [h for h in json.loads(os.getenv("LINUX_SERVER_HOST", "[]")) if h]
33
  LINUX_SERVER_HOSTS_MARKED = set()
34
  LINUX_SERVER_HOSTS_ATTEMPTS = {}
35
 
36
+ LINUX_SERVER_PROVIDER_KEYS = [k for k in json.loads(os.getenv("LINUX_SERVER_PROVIDER_KEY", "[]")) if k]
37
  LINUX_SERVER_PROVIDER_KEYS_MARKED = set()
38
  LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS = {}
39
 
40
+ LINUX_SERVER_ERRORS = set(map(int, os.getenv("LINUX_SERVER_ERROR", "").split(",")))
41
 
42
+ AI_TYPES = {f"AI_TYPE_{i}": os.getenv(f"AI_TYPE_{i}") for i in range(1, 8)}
43
  RESPONSES = {f"RESPONSE_{i}": os.getenv(f"RESPONSE_{i}") for i in range(1, 10)}
44
 
45
  MODEL_MAPPING = json.loads(os.getenv("MODEL_MAPPING", "{}"))
46
  MODEL_CONFIG = json.loads(os.getenv("MODEL_CONFIG", "{}"))
47
+ MODEL_CHOICES = list(MODEL_MAPPING.values())
48
  DEFAULT_CONFIG = json.loads(os.getenv("DEFAULT_CONFIG", "{}"))
49
+ DEFAULT_MODEL_KEY = list(MODEL_MAPPING.keys())[0] if MODEL_MAPPING else None
50
 
51
  META_TAGS = os.getenv("META_TAGS")
52
 
 
54
 
55
  ACTIVE_CANDIDATE = None
56
 
57
+ class SessionWithID(requests.Session):
58
+ def __init__(self):
59
+ super().__init__()
60
+ self.session_id = str(uuid.uuid4())
61
+
62
+ def create_session():
63
+ return SessionWithID()
64
+
65
  def get_available_items(items, marked):
66
+ a = [i for i in items if i not in marked]
67
+ random.shuffle(a)
68
+ return a
69
 
70
  def marked_item(item, marked, attempts):
71
  marked.add(item)
72
  attempts[item] = attempts.get(item, 0) + 1
73
  if attempts[item] >= 3:
74
+ def remove():
75
  marked.discard(item)
76
  attempts.pop(item, None)
77
+ threading.Timer(300, remove).start()
 
 
 
 
 
 
 
 
78
 
79
+ def get_model_key(display):
80
+ return next((k for k, v in MODEL_MAPPING.items() if v == display), DEFAULT_MODEL_KEY)
81
 
82
+ def extract_file_content(fp):
83
+ ext = Path(fp).suffix.lower()
84
+ c = ""
85
  try:
86
  if ext == ".pdf":
87
+ with pdfplumber.open(fp) as pdf:
88
+ for p in pdf.pages:
89
+ t = p.extract_text() or ""
90
+ c += t + "\n"
 
 
 
 
91
  elif ext in [".doc", ".docx"]:
92
+ d = docx.Document(fp)
93
+ for para in d.paragraphs:
94
+ c += para.text + "\n"
95
  elif ext in [".xlsx", ".xls"]:
96
+ df = pd.read_excel(fp)
97
+ c += df.to_csv(index=False)
98
  elif ext in [".ppt", ".pptx"]:
99
+ prs = Presentation(fp)
100
+ for s in prs.slides:
101
+ for sh in s.shapes:
102
+ if hasattr(sh, "text") and sh.text:
103
+ c += sh.text + "\n"
 
 
 
 
104
  else:
105
+ c = Path(fp).read_text(encoding="utf-8")
106
  except Exception as e:
107
+ c = f"{fp}: {e}"
108
+ return c.strip()
109
 
110
+ async def fetch_response_async(host, key, model, msgs, cfg, sid):
111
+ for t in [60, 80, 120, 240]:
 
112
  try:
113
+ async with httpx.AsyncClient(timeout=t) as client:
114
+ r = await client.post(host, json={"model": model, "messages": msgs, **cfg, "session_id": sid}, headers={"Authorization": f"Bearer {key}"})
115
+ if r.status_code in LINUX_SERVER_ERRORS:
116
+ marked_item(key, LINUX_SERVER_PROVIDER_KEYS_MARKED, LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS)
117
  return None
118
+ r.raise_for_status()
119
+ j = r.json()
120
+ if isinstance(j, dict) and j.get("choices"):
121
+ ch = j["choices"][0]
122
+ if ch.get("message") and isinstance(ch["message"].get("content"), str):
123
+ return ch["message"]["content"]
124
  return None
125
+ except:
126
  continue
127
+ marked_item(key, LINUX_SERVER_PROVIDER_KEYS_MARKED, LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS)
128
  return None
129
 
130
+ async def chat_with_model_async(history, user_input, model_display, sess, custom_prompt):
131
+ if not get_available_items(LINUX_SERVER_PROVIDER_KEYS, LINUX_SERVER_PROVIDER_KEYS_MARKED) or not get_available_items(LINUX_SERVER_HOSTS, LINUX_SERVER_HOSTS_ATTEMPTS):
132
  return RESPONSES["RESPONSE_3"]
133
  if not hasattr(sess, "session_id"):
134
  sess.session_id = str(uuid.uuid4())
135
+ model_key = get_model_key(model_display)
136
+ cfg = MODEL_CONFIG.get(model_key, DEFAULT_CONFIG)
137
+ msgs = [{"role": "user", "content": u} for u, _ in history] + [{"role": "assistant", "content": a} for _, a in history if a]
138
+ if model_key == DEFAULT_MODEL_KEY and INTERNAL_TRAINING_DATA:
139
+ prompt = INTERNAL_TRAINING_DATA
140
+ else:
141
+ prompt = custom_prompt or SYSTEM_PROMPT_MAPPING.get(model_key, SYSTEM_PROMPT_DEFAULT)
142
+ msgs.insert(0, {"role": "system", "content": prompt})
143
+ msgs.append({"role": "user", "content": user_input})
144
  global ACTIVE_CANDIDATE
145
  if ACTIVE_CANDIDATE:
146
+ res = await fetch_response_async(ACTIVE_CANDIDATE[0], ACTIVE_CANDIDATE[1], model_key, msgs, cfg, sess.session_id)
147
+ if res:
148
+ return res
149
  ACTIVE_CANDIDATE = None
150
  keys = get_available_items(LINUX_SERVER_PROVIDER_KEYS, LINUX_SERVER_PROVIDER_KEYS_MARKED)
151
+ hosts = get_available_items(LINUX_SERVER_HOSTS, LINUX_SERVER_HOSTS_ATTEMPTS)
152
+ cands = [(h, k) for h in hosts for k in keys]
153
+ random.shuffle(cands)
154
+ for h, k in cands:
155
+ res = await fetch_response_async(h, k, model_key, msgs, cfg, sess.session_id)
156
+ if res:
157
+ ACTIVE_CANDIDATE = (h, k)
158
+ return res
159
  return RESPONSES["RESPONSE_2"]
160
 
161
+ async def respond_async(multi, history, model_display, sess, custom_prompt):
162
+ msg = {"text": multi.get("text", "").strip(), "files": multi.get("files", [])}
163
+ if not msg["text"] and not msg["files"]:
164
  yield history, gr.MultimodalTextbox(value=None, interactive=True), sess
165
  return
166
+ inp = ""
167
+ for f in msg["files"]:
168
+ p = f["name"] if isinstance(f, dict) and "name" in f else f
169
+ inp += f"{Path(p).name}\n\n{extract_file_content(p)}\n\n"
170
+ if msg["text"]:
171
+ inp += msg["text"]
172
+ history.append([inp, ""])
173
+ ai = await chat_with_model_async(history, inp, model_display, sess, custom_prompt)
174
  history[-1][1] = ""
175
+ def to_str(d):
176
+ if isinstance(d, (str, int, float)): return str(d)
177
+ if isinstance(d, bytes): return d.decode("utf-8", errors="ignore")
178
+ if isinstance(d, (list, tuple)): return "".join(map(to_str, d))
179
+ if isinstance(d, dict): return json.dumps(d, ensure_ascii=False)
180
+ return repr(d)
181
+ for c in ai:
182
+ history[-1][1] += to_str(c)
 
 
 
 
183
  await asyncio.sleep(0.0001)
184
  yield history, gr.MultimodalTextbox(value=None, interactive=True), sess
185
 
186
+ def change_model(new):
187
+ visible = new != MODEL_CHOICES[0]
188
+ default = SYSTEM_PROMPT_MAPPING.get(get_model_key(new), SYSTEM_PROMPT_DEFAULT)
189
+ return [], create_session(), new, default, gr.update(value=default, visible=visible)
190
 
191
  with gr.Blocks(fill_height=True, fill_width=True, title=AI_TYPES["AI_TYPE_4"], head=META_TAGS) as jarvis:
192
  user_history = gr.State([])
193
  user_session = gr.State(create_session())
194
  selected_model = gr.State(MODEL_CHOICES[0] if MODEL_CHOICES else "")
195
+ custom_prompt_state = gr.State("")
196
  chatbot = gr.Chatbot(label=AI_TYPES["AI_TYPE_1"], show_copy_button=True, scale=1, elem_id=AI_TYPES["AI_TYPE_2"])
197
  with gr.Row():
198
  msg = gr.MultimodalTextbox(show_label=False, placeholder=RESPONSES["RESPONSE_5"], interactive=True, file_count="single", file_types=ALLOWED_EXTENSIONS)
199
  with gr.Accordion(AI_TYPES["AI_TYPE_6"], open=False):
200
  model_dropdown = gr.Dropdown(show_label=False, choices=MODEL_CHOICES, value=MODEL_CHOICES[0])
201
+ system_prompt = gr.Textbox(label=AI_TYPES["AI_TYPE_7"], lines=2, interactive=True, visible=False)
202
+ model_dropdown.change(fn=change_model, inputs=[model_dropdown], outputs=[user_history, user_session, selected_model, custom_prompt_state, system_prompt])
203
+ system_prompt.change(fn=lambda x: x, inputs=[system_prompt], outputs=[custom_prompt_state])
204
+ msg.submit(fn=respond_async, inputs=[msg, user_history, selected_model, user_session, custom_prompt_state], outputs=[chatbot, msg, user_session], api_name=INTERNAL_AI_GET_SERVER)
205
  jarvis.launch(max_file_size="1mb")