phanerozoic commited on
Commit
e8212fa
·
verified ·
1 Parent(s): 9961fac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -41
app.py CHANGED
@@ -1,42 +1,56 @@
1
- import os, re, time, datetime, traceback, torch, gradio as gr
 
2
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
3
  from transformers.utils import logging as hf_logging
4
 
5
- # ---------------- logging ----------------------------------------------------
6
  os.environ["HF_HOME"] = "/data/.huggingface"
 
 
 
7
  def log(msg: str):
8
  ts = datetime.datetime.utcnow().strftime("%H:%M:%S.%f")[:-3]
9
- print(f"[{ts}] {msg}", flush=True)
10
-
11
- # ---------------- config -----------------------------------------------------
12
- MODEL_ID = "ibm-granite/granite-3.3-2b-instruct"
13
- MAX_TURNS, MAX_TOKS = 4, 64
14
- SYSTEM_PROMPT = (
15
- "You are **SchoolSpirit AI**, digital mascot of SchoolSpirit AI LLC, "
16
- "founded by Charles Norton in 2025. The company installs on‑prem AI chat "
17
- "mascots, fine‑tunes LLMs, and ships turnkey GPU hardware to K‑12 schools.\n\n"
18
- "RULES:\n"
19
- "• Friendly, concise (≤ 4 sentences) unless asked for detail.\n"
20
- "• If unsure or out‑of‑scope: say so & suggest human follow‑up.\n"
21
- "• Do NOT collect personal data; no medical/legal/financial advice.\n"
22
- "• No profanity, politics, or mature themes."
 
 
 
 
 
 
 
 
23
  )
24
- WELCOME_BUBBLE = "Welcome to SchoolSpirit AI! Do you have any questions?"
25
 
26
  strip = lambda s: re.sub(r"\s+", " ", s.strip())
27
 
28
 
29
- # ---------------- load model (GPU fp16 → CPU) --------------------------------
30
  hf_logging.set_verbosity_error()
31
  try:
 
32
  tok = AutoTokenizer.from_pretrained(MODEL_ID)
 
33
  if torch.cuda.is_available():
34
- log("GPU fp16")
35
  model = AutoModelForCausalLM.from_pretrained(
36
  MODEL_ID, device_map="auto", torch_dtype=torch.float16
37
  )
38
  else:
39
- log("CPU fp32")
40
  model = AutoModelForCausalLM.from_pretrained(
41
  MODEL_ID, device_map="cpu", torch_dtype="auto", low_cpu_mem_usage=True
42
  )
@@ -45,44 +59,67 @@ try:
45
  "text-generation",
46
  model=model,
47
  tokenizer=tok,
48
- max_new_tokens=MAX_TOKS,
49
  do_sample=True,
50
  temperature=0.6,
 
51
  )
52
- except Exception as e: # noqa: BLE001
53
- gen = None
54
- log(f"Model load error: {e}")
 
 
 
55
 
56
- # ---------------- chat callback ---------------------------------------------
57
- def chat_fn(user_msg: str, history: list[tuple[str, str]]):
58
- if gen is None:
59
- return "Model failed to load. Please try later."
 
 
 
60
 
61
  user_msg = strip(user_msg or "")
62
  if not user_msg:
63
- return "Please type something."
 
 
 
64
 
65
- # build prompt: system + last MAX_TURNS pairs + new user msg
66
- pairs = history[-MAX_TURNS:]
67
- prompt = SYSTEM_PROMPT + "\n"
68
- for u, a in pairs:
69
- prompt += f"### User:\n{u}\n### Assistant:\n{a}\n"
70
- prompt += f"### User:\n{user_msg}\n### Assistant:\n"
 
 
 
71
 
72
  try:
73
  raw = gen(prompt)[0]["generated_text"]
74
- reply = strip(raw.split("### Assistant:", 1)[-1])
75
- reply = re.split(r"\b###\s*(?:User|Assistant):", reply, 1)[0].strip()
76
  except Exception:
77
- log("generation crash\n" + traceback.format_exc())
78
  reply = "Sorry—backend crashed. Please try again later."
79
 
80
- return reply
 
 
81
 
82
- # ---------------- UI ---------------------------------------------------------
83
  gr.ChatInterface(
84
  fn=chat_fn,
85
- chatbot=gr.Chatbot(value=[("", WELCOME_BUBBLE)], height=480),
 
 
 
 
 
 
 
86
  title="SchoolSpirit AI Chat",
87
  theme=gr.themes.Soft(primary_hue="blue"),
 
88
  ).launch()
 
1
+ import os, re, time, datetime, traceback, torch
2
+ import gradio as gr
3
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
  from transformers.utils import logging as hf_logging
5
 
6
+ # ---------- Logging ---------------------------------------------------------
7
  os.environ["HF_HOME"] = "/data/.huggingface"
8
+ LOG_FILE = "/data/requests.log"
9
+
10
+
11
  def log(msg: str):
12
  ts = datetime.datetime.utcnow().strftime("%H:%M:%S.%f")[:-3]
13
+ line = f"[{ts}] {msg}"
14
+ print(line, flush=True)
15
+ try:
16
+ with open(LOG_FILE, "a") as f:
17
+ f.write(line + "\n")
18
+ except FileNotFoundError:
19
+ pass
20
+
21
+
22
+ # ---------- Config ----------------------------------------------------------
23
+ MODEL_ID = "ibm-granite/granite-3.3-2b-instruct"
24
+ MAX_TURNS, MAX_TOKENS, MAX_INPUT_CH = 6, 128, 400
25
+
26
+ SYSTEM_MSG = (
27
+ "You are **SchoolSpirit AI**, the digital mascot for SchoolSpirit AI LLC, "
28
+ "founded by Charles Norton in 2025. The company installs on‑prem AI chat "
29
+ "mascots, offers custom fine‑tuning, and ships turnkey GPU hardware to schools.\n\n"
30
+ "Guidelines:\n"
31
+ "• Warm, concise answers (max 4 sentences).\n"
32
+ "• No personal‑data collection or sensitive advice.\n"
33
+ "• If unsure, say so and suggest a human follow‑up.\n"
34
+ "• Avoid profanity, politics, or mature themes."
35
  )
36
+ WELCOME_MSG = "Welcome to SchoolSpirit AI! Do you have any questions?"
37
 
38
  strip = lambda s: re.sub(r"\s+", " ", s.strip())
39
 
40
 
41
+ # ---------- Load model (GPU FP‑16 → CPU fallback) ---------------------------
42
  hf_logging.set_verbosity_error()
43
  try:
44
+ log("Loading tokenizer …")
45
  tok = AutoTokenizer.from_pretrained(MODEL_ID)
46
+
47
  if torch.cuda.is_available():
48
+ log("GPU detected → FP‑16")
49
  model = AutoModelForCausalLM.from_pretrained(
50
  MODEL_ID, device_map="auto", torch_dtype=torch.float16
51
  )
52
  else:
53
+ log("CPU fallback")
54
  model = AutoModelForCausalLM.from_pretrained(
55
  MODEL_ID, device_map="cpu", torch_dtype="auto", low_cpu_mem_usage=True
56
  )
 
59
  "text-generation",
60
  model=model,
61
  tokenizer=tok,
62
+ max_new_tokens=MAX_TOKENS,
63
  do_sample=True,
64
  temperature=0.6,
65
+ pad_token_id=tok.eos_token_id,
66
  )
67
+ MODEL_ERR = None
68
+ log("Model loaded ✔")
69
+ except Exception as exc: # noqa: BLE001
70
+ MODEL_ERR, gen = f"Model load error: {exc}", None
71
+ log(MODEL_ERR)
72
+
73
 
74
+ # ---------- Chat callback ---------------------------------------------------
75
+ def chat_fn(user_msg: str, history: list[dict]):
76
+ """
77
+ history comes in/out as list[{'role':'user'|'assistant','content':str}, …]
78
+ """
79
+ if MODEL_ERR:
80
+ return history + [{"role": "assistant", "content": MODEL_ERR}]
81
 
82
  user_msg = strip(user_msg or "")
83
  if not user_msg:
84
+ return history + [{"role": "assistant", "content": "Please type something."}]
85
+ if len(user_msg) > MAX_INPUT_CH:
86
+ warn = f"Message too long (>{MAX_INPUT_CH} chars)."
87
+ return history + [{"role": "assistant", "content": warn}]
88
 
89
+ # Append user to history
90
+ history.append({"role": "user", "content": user_msg})
91
+
92
+ # Keep system + last N messages
93
+ convo = [m for m in history if m["role"] != "system"][-MAX_TURNS * 2 :]
94
+ prompt_parts = [SYSTEM_MSG] + [
95
+ f"{'User' if m['role']=='user' else 'AI'}: {m['content']}" for m in convo
96
+ ] + ["AI:"]
97
+ prompt = "\n".join(prompt_parts)
98
 
99
  try:
100
  raw = gen(prompt)[0]["generated_text"]
101
+ reply = strip(raw.split("AI:", 1)[-1])
102
+ reply = re.split(r"\b(?:User:|AI:)", reply, 1)[0].strip()
103
  except Exception:
104
+ log(" Inference error:\n" + traceback.format_exc())
105
  reply = "Sorry—backend crashed. Please try again later."
106
 
107
+ history.append({"role": "assistant", "content": reply})
108
+ return history
109
+
110
 
111
+ # ---------- Launch ----------------------------------------------------------
112
  gr.ChatInterface(
113
  fn=chat_fn,
114
+ chatbot=gr.Chatbot(
115
+ height=480,
116
+ type="messages",
117
+ value=[
118
+ {"role": "assistant", "content": WELCOME_MSG}
119
+ ], # ONE welcome bubble
120
+ ),
121
+ additional_inputs=None,
122
  title="SchoolSpirit AI Chat",
123
  theme=gr.themes.Soft(primary_hue="blue"),
124
+ examples=None,
125
  ).launch()