Spaces:
sierrafr
/
Runtime error

hadadrjt commited on
Commit
cdd78b7
·
1 Parent(s): 78933d2

ai: Refactor the code for 2.1.1-ft-QwQ-32B.

Browse files
Files changed (2) hide show
  1. jarvis.py +41 -90
  2. requirements.txt +1 -5
jarvis.py CHANGED
@@ -3,38 +3,22 @@
3
  # SPDX-License-Identifier: Apache-2.0
4
  #
5
 
 
 
6
  import gradio as gr
7
- import requests
8
  import json
9
  import os
10
- import random
11
- import time
12
- import pytesseract
13
- import pdfplumber
14
- import docx
15
  import pandas as pd
16
- import pptx
17
- import fitz
18
- import io
19
- import uuid
20
- import concurrent.futures
21
- import itertools
22
  import threading
23
- import httpx
24
- import asyncio
25
-
26
- from openai import OpenAI
27
-
28
- from optillm.cot_reflection import cot_reflection
29
- from optillm.leap import leap
30
- from optillm.plansearch import plansearch
31
- from optillm.reread import re2_approach
32
- from optillm.rto import round_trip_optimization
33
- from optillm.self_consistency import advanced_self_consistency_approach
34
- from optillm.z3_solver import Z3SymPySolverSystem
35
 
36
- from pathlib import Path
37
  from PIL import Image
 
38
  from pptx import Presentation
39
 
40
  os.system("apt-get update -q -y && apt-get install -q -y tesseract-ocr tesseract-ocr-eng tesseract-ocr-ind libleptonica-dev libtesseract-dev")
@@ -60,7 +44,7 @@ DEFAULT_CONFIG = json.loads(os.getenv("DEFAULT_CONFIG", "{}"))
60
 
61
  META_TAGS = os.getenv("META_TAGS")
62
 
63
- ALLOWED_EXTENSIONS = json.loads(os.getenv("ALLOWED_EXTENSIONS"))
64
 
65
  ACTIVE_CANDIDATE = None
66
 
@@ -100,11 +84,9 @@ def extract_file_content(file_path):
100
  text = page.extract_text()
101
  if text:
102
  content += text + "\n"
103
- tables = page.extract_tables()
104
- if tables:
105
- for table in tables:
106
- table_str = "\n".join([", ".join(row) for row in table if row])
107
- content += "\n" + table_str + "\n"
108
  elif ext in [".doc", ".docx"]:
109
  doc = docx.Document(file_path)
110
  for para in doc.paragraphs:
@@ -119,51 +101,28 @@ def extract_file_content(file_path):
119
  if hasattr(shape, "text") and shape.text:
120
  content += shape.text + "\n"
121
  elif ext in [".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".gif", ".webp"]:
122
- try:
123
- pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
124
- image = Image.open(file_path)
125
- text = pytesseract.image_to_string(image)
126
- content += text + "\n"
127
- except Exception as e:
128
- content += f"{e}\n"
129
  else:
130
  content = Path(file_path).read_text(encoding="utf-8")
131
  except Exception as e:
132
  content = f"{file_path}: {e}"
133
  return content.strip()
134
 
135
- def process_ai_response(ai_text):
136
- try:
137
- result = round_trip_optimization(ai_text)
138
- result = re2_approach(result)
139
- result = cot_reflection(result)
140
- result = advanced_self_consistency_approach(result)
141
- result = plansearch(result)
142
- result = leap(result)
143
- solver = Z3SymPySolverSystem()
144
- result = solver.solve(result)
145
- return result
146
- except Exception:
147
- return ai_text
148
-
149
  async def fetch_response_async(host, provider_key, selected_model, messages, model_config, session_id):
150
  timeouts = [60, 80, 120, 240]
151
  for timeout in timeouts:
152
  try:
153
  async with httpx.AsyncClient(timeout=timeout) as client:
154
  data = {"model": selected_model, "messages": messages, **model_config}
155
- extra = {"optillm_approach": "rto|re2|cot_reflection|self_consistency|plansearch|leap|z3|bon|moa|mcts|mcp|router|privacy|executecode|json", "session_id": session_id}
156
- resp = await client.post(host, json={**data, "extra_body": extra, "session_id": session_id}, headers={"Authorization": f"Bearer {provider_key}"})
157
  resp.raise_for_status()
158
- try:
159
- resp_json = resp.json()
160
- except json.JSONDecodeError:
161
- return RESPONSES["RESPONSE_2"]
162
- if isinstance(resp_json, dict) and "choices" in resp_json and isinstance(resp_json["choices"], list) and len(resp_json["choices"]) > 0 and isinstance(resp_json["choices"][0], dict):
163
  choice = resp_json["choices"][0]
164
- if "message" in choice and isinstance(choice["message"], dict) and "content" in choice["message"] and isinstance(choice["message"]["content"], str):
165
- ai_text = choice["message"]["content"]
166
- return process_ai_response(ai_text)
167
  return RESPONSES["RESPONSE_2"]
168
  except Exception:
169
  continue
@@ -177,29 +136,25 @@ async def chat_with_model_async(history, user_input, selected_model_display, ses
177
  sess.session_id = str(uuid.uuid4())
178
  selected_model = get_model_key(selected_model_display)
179
  model_config = MODEL_CONFIG.get(selected_model, DEFAULT_CONFIG)
180
- messages = [{"role": "user", "content": user} for user, _ in history]
181
- messages += [{"role": "assistant", "content": assistant} for _, assistant in history if assistant]
182
  if INTERNAL_TRAINING_DATA:
183
  messages.insert(0, {"role": "system", "content": INTERNAL_TRAINING_DATA})
184
  messages.append({"role": "user", "content": user_input})
185
  global ACTIVE_CANDIDATE
186
- if ACTIVE_CANDIDATE is not None:
187
- try:
188
- return await fetch_response_async(ACTIVE_CANDIDATE[0], ACTIVE_CANDIDATE[1], selected_model, messages, model_config, sess.session_id)
189
- except Exception:
190
- ACTIVE_CANDIDATE = None
191
- available_keys = get_available_items(LINUX_SERVER_PROVIDER_KEYS, LINUX_SERVER_PROVIDER_KEYS_MARKED)
192
- available_servers = get_available_items(LINUX_SERVER_HOSTS, LINUX_SERVER_HOSTS_MARKED)
193
- candidates = [(host, key) for host in available_servers for key in available_keys]
194
  random.shuffle(candidates)
195
- tasks = [fetch_response_async(host, key, selected_model, messages, model_config, sess.session_id) for host, key in candidates]
196
- for task in asyncio.as_completed(tasks):
197
- try:
198
- result = await task
199
- ACTIVE_CANDIDATE = next(((host, key) for host, key in candidates if host and key), None)
200
  return result
201
- except Exception:
202
- continue
203
  return RESPONSES["RESPONSE_2"]
204
 
205
  async def respond_async(multi_input, history, selected_model_display, sess):
@@ -210,8 +165,7 @@ async def respond_async(multi_input, history, selected_model_display, sess):
210
  combined_input = ""
211
  for file_item in message["files"]:
212
  file_path = file_item["name"] if isinstance(file_item, dict) and "name" in file_item else file_item
213
- file_content = extract_file_content(file_path)
214
- combined_input += f"{Path(file_path).name}\n\n{file_content}\n\n"
215
  if message["text"]:
216
  combined_input += message["text"]
217
  history.append([combined_input, ""])
@@ -220,14 +174,13 @@ async def respond_async(multi_input, history, selected_model_display, sess):
220
  def convert_to_string(data):
221
  if isinstance(data, (str, int, float)):
222
  return str(data)
223
- elif isinstance(data, bytes):
224
  return data.decode("utf-8", errors="ignore")
225
- elif isinstance(data, (list, tuple)):
226
  return "".join(map(convert_to_string, data))
227
- elif isinstance(data, dict):
228
  return json.dumps(data, ensure_ascii=False)
229
- else:
230
- return repr(data)
231
  for character in ai_response:
232
  history[-1][1] += convert_to_string(character)
233
  await asyncio.sleep(0.0001)
@@ -239,11 +192,9 @@ def change_model(new_model_display):
239
  with gr.Blocks(fill_height=True, fill_width=True, title=AI_TYPES["AI_TYPE_4"], head=META_TAGS) as jarvis:
240
  user_history = gr.State([])
241
  user_session = gr.State(create_session())
242
- selected_model = gr.State(MODEL_CHOICES[0])
243
  chatbot = gr.Chatbot(label=AI_TYPES["AI_TYPE_1"], show_copy_button=True, scale=1, elem_id=AI_TYPES["AI_TYPE_2"])
244
- #model_dropdown = gr.Dropdown(show_label=False, choices=MODEL_CHOICES, value=MODEL_CHOICES[0])
245
  with gr.Row():
246
  msg = gr.MultimodalTextbox(show_label=False, placeholder=RESPONSES["RESPONSE_5"], interactive=True, file_count="single", file_types=ALLOWED_EXTENSIONS)
247
- #model_dropdown.change(fn=change_model, inputs=[model_dropdown], outputs=[user_history, user_session, selected_model])
248
- msg.submit(fn=respond_async, inputs=[msg, user_history, selected_model, user_session], outputs=[chatbot, msg, user_session], concurrency_limit=None, api_name=INTERNAL_AI_GET_SERVER)
249
  jarvis.launch(max_file_size="1mb")
 
3
  # SPDX-License-Identifier: Apache-2.0
4
  #
5
 
6
+ import asyncio
7
+ import docx
8
  import gradio as gr
9
+ import httpx
10
  import json
11
  import os
 
 
 
 
 
12
  import pandas as pd
13
+ import pdfplumber
14
+ import pytesseract
15
+ import random
16
+ import requests
 
 
17
  import threading
18
+ import uuid
 
 
 
 
 
 
 
 
 
 
 
19
 
 
20
  from PIL import Image
21
+ from pathlib import Path
22
  from pptx import Presentation
23
 
24
  os.system("apt-get update -q -y && apt-get install -q -y tesseract-ocr tesseract-ocr-eng tesseract-ocr-ind libleptonica-dev libtesseract-dev")
 
44
 
45
  META_TAGS = os.getenv("META_TAGS")
46
 
47
+ ALLOWED_EXTENSIONS = json.loads(os.getenv("ALLOWED_EXTENSIONS", "[]"))
48
 
49
  ACTIVE_CANDIDATE = None
50
 
 
84
  text = page.extract_text()
85
  if text:
86
  content += text + "\n"
87
+ for table in page.extract_tables():
88
+ table_str = "\n".join([", ".join(row) for row in table if row])
89
+ content += "\n" + table_str + "\n"
 
 
90
  elif ext in [".doc", ".docx"]:
91
  doc = docx.Document(file_path)
92
  for para in doc.paragraphs:
 
101
  if hasattr(shape, "text") and shape.text:
102
  content += shape.text + "\n"
103
  elif ext in [".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".gif", ".webp"]:
104
+ pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
105
+ image = Image.open(file_path)
106
+ content += pytesseract.image_to_string(image) + "\n"
 
 
 
 
107
  else:
108
  content = Path(file_path).read_text(encoding="utf-8")
109
  except Exception as e:
110
  content = f"{file_path}: {e}"
111
  return content.strip()
112
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  async def fetch_response_async(host, provider_key, selected_model, messages, model_config, session_id):
114
  timeouts = [60, 80, 120, 240]
115
  for timeout in timeouts:
116
  try:
117
  async with httpx.AsyncClient(timeout=timeout) as client:
118
  data = {"model": selected_model, "messages": messages, **model_config}
119
+ resp = await client.post(host, json={**data, "session_id": session_id}, headers={"Authorization": f"Bearer {provider_key}"})
 
120
  resp.raise_for_status()
121
+ resp_json = resp.json()
122
+ if isinstance(resp_json, dict) and resp_json.get("choices"):
 
 
 
123
  choice = resp_json["choices"][0]
124
+ if choice.get("message") and isinstance(choice["message"].get("content"), str):
125
+ return choice["message"]["content"]
 
126
  return RESPONSES["RESPONSE_2"]
127
  except Exception:
128
  continue
 
136
  sess.session_id = str(uuid.uuid4())
137
  selected_model = get_model_key(selected_model_display)
138
  model_config = MODEL_CONFIG.get(selected_model, DEFAULT_CONFIG)
139
+ messages = [{"role": "user", "content": user} for user, _ in history] + [{"role": "assistant", "content": assistant} for _, assistant in history if assistant]
 
140
  if INTERNAL_TRAINING_DATA:
141
  messages.insert(0, {"role": "system", "content": INTERNAL_TRAINING_DATA})
142
  messages.append({"role": "user", "content": user_input})
143
  global ACTIVE_CANDIDATE
144
+ if ACTIVE_CANDIDATE:
145
+ result = await fetch_response_async(ACTIVE_CANDIDATE[0], ACTIVE_CANDIDATE[1], selected_model, messages, model_config, sess.session_id)
146
+ if result != RESPONSES["RESPONSE_2"]:
147
+ return result
148
+ ACTIVE_CANDIDATE = None
149
+ keys = get_available_items(LINUX_SERVER_PROVIDER_KEYS, LINUX_SERVER_PROVIDER_KEYS_MARKED)
150
+ hosts = get_available_items(LINUX_SERVER_HOSTS, LINUX_SERVER_HOSTS_MARKED)
151
+ candidates = [(host, key) for host in hosts for key in keys]
152
  random.shuffle(candidates)
153
+ for host, key in candidates:
154
+ result = await fetch_response_async(host, key, selected_model, messages, model_config, sess.session_id)
155
+ if result != RESPONSES["RESPONSE_2"]:
156
+ ACTIVE_CANDIDATE = (host, key)
 
157
  return result
 
 
158
  return RESPONSES["RESPONSE_2"]
159
 
160
  async def respond_async(multi_input, history, selected_model_display, sess):
 
165
  combined_input = ""
166
  for file_item in message["files"]:
167
  file_path = file_item["name"] if isinstance(file_item, dict) and "name" in file_item else file_item
168
+ combined_input += f"{Path(file_path).name}\n\n{extract_file_content(file_path)}\n\n"
 
169
  if message["text"]:
170
  combined_input += message["text"]
171
  history.append([combined_input, ""])
 
174
  def convert_to_string(data):
175
  if isinstance(data, (str, int, float)):
176
  return str(data)
177
+ if isinstance(data, bytes):
178
  return data.decode("utf-8", errors="ignore")
179
+ if isinstance(data, (list, tuple)):
180
  return "".join(map(convert_to_string, data))
181
+ if isinstance(data, dict):
182
  return json.dumps(data, ensure_ascii=False)
183
+ return repr(data)
 
184
  for character in ai_response:
185
  history[-1][1] += convert_to_string(character)
186
  await asyncio.sleep(0.0001)
 
192
  with gr.Blocks(fill_height=True, fill_width=True, title=AI_TYPES["AI_TYPE_4"], head=META_TAGS) as jarvis:
193
  user_history = gr.State([])
194
  user_session = gr.State(create_session())
195
+ selected_model = gr.State(MODEL_CHOICES[0] if MODEL_CHOICES else "")
196
  chatbot = gr.Chatbot(label=AI_TYPES["AI_TYPE_1"], show_copy_button=True, scale=1, elem_id=AI_TYPES["AI_TYPE_2"])
 
197
  with gr.Row():
198
  msg = gr.MultimodalTextbox(show_label=False, placeholder=RESPONSES["RESPONSE_5"], interactive=True, file_count="single", file_types=ALLOWED_EXTENSIONS)
199
+ msg.submit(fn=respond_async, inputs=[msg, user_history, selected_model, user_session], outputs=[chatbot, msg, user_session], api_name=INTERNAL_AI_GET_SERVER)
 
200
  jarvis.launch(max_file_size="1mb")
requirements.txt CHANGED
@@ -1,11 +1,7 @@
1
- huggingface_hub
2
  httpx
3
- openai
4
- optillm
5
  pandas
6
  pdfplumber
7
- pillow
8
- pymupdf
9
  python-docx
10
  python-pptx
11
  pytesseract
 
 
1
  httpx
 
 
2
  pandas
3
  pdfplumber
4
+ Pillow
 
5
  python-docx
6
  python-pptx
7
  pytesseract