phanerozoic commited on
Commit
9961fac
·
verified ·
1 Parent(s): 61ca5d6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -106
app.py CHANGED
@@ -2,54 +2,41 @@ import os, re, time, datetime, traceback, torch, gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
3
  from transformers.utils import logging as hf_logging
4
 
5
- # ───────────────── logging ─────────────────────────────────────────
6
  os.environ["HF_HOME"] = "/data/.huggingface"
7
- LOG = "/data/requests.log"
8
-
9
-
10
  def log(msg: str):
11
  ts = datetime.datetime.utcnow().strftime("%H:%M:%S.%f")[:-3]
12
- line = f"[{ts}] {msg}"
13
- print(line, flush=True)
14
- try:
15
- with open(LOG, "a") as f:
16
- f.write(line + "\n")
17
- except FileNotFoundError:
18
- pass
19
-
20
-
21
- # ───────────────── config ──────────────────────────────────────────
22
- MODEL_ID = "ibm-granite/granite-3.3-2b-instruct"
23
- MAX_PAIRS = 4 # user/assistant pairs to keep
24
- MAX_TOKENS = 128
25
- MAX_INPUT_CH = 300
26
-
27
- SYSTEM_MSG = (
28
- "You are **SchoolSpirit AI**, the digital mascot for SchoolSpirit AI LLC, "
29
- "founded by Charles Norton in 2025. The company installs on‑prem AI chat "
30
- "mascots, offers custom fine‑tuning, and supplies GPU servers to K‑12 schools.\n\n"
31
  "RULES:\n"
32
- "• Friendly, concise (≤ 4 sentences) unless user wants detail.\n"
33
- "• If unsure or out of scope, say so and suggest human follow‑up.\n"
34
- "• No personaldata collection, no medical/legal/financial advice.\n"
35
- "• Avoid profanity, politics, and mature themes."
36
  )
37
- WELCOME_MSG = "Welcome to SchoolSpirit AI! Do you have any questions?"
38
 
39
  strip = lambda s: re.sub(r"\s+", " ", s.strip())
40
 
41
 
42
- # ───────────────── model load (GPU fp16 → CPU) ─────────────────────
43
  hf_logging.set_verbosity_error()
44
  try:
45
  tok = AutoTokenizer.from_pretrained(MODEL_ID)
46
  if torch.cuda.is_available():
47
- log("GPU detected → FP16")
48
  model = AutoModelForCausalLM.from_pretrained(
49
  MODEL_ID, device_map="auto", torch_dtype=torch.float16
50
  )
51
  else:
52
- log("CPU fallback")
53
  model = AutoModelForCausalLM.from_pretrained(
54
  MODEL_ID, device_map="cpu", torch_dtype="auto", low_cpu_mem_usage=True
55
  )
@@ -58,93 +45,44 @@ try:
58
  "text-generation",
59
  model=model,
60
  tokenizer=tok,
61
- max_new_tokens=MAX_TOKENS,
62
  do_sample=True,
63
- temperature=0.65,
64
  )
65
- MODEL_ERR = None
66
- except Exception as exc: # noqa: BLE001
67
- MODEL_ERR, gen = f"Model load error: {exc}", None
68
- log(MODEL_ERR)
69
-
70
-
71
- # ───────────────── helper ──────────────────────────────────────────
72
- def build_prompt(msgs):
73
- """Granite likes ### markers"""
74
- lines = [f"### System:\n{SYSTEM_MSG}"]
75
- for m in msgs:
76
- if m["role"] == "user":
77
- lines.append(f"### User:\n{m['content']}")
78
- elif m["role"] == "assistant":
79
- lines.append(f"### Assistant:\n{m['content']}")
80
- lines.append("### Assistant:")
81
- return "\n".join(lines)
82
-
83
 
84
- def trim(msgs):
85
- """Keep system + last MAX_PAIRS*2 messages"""
86
- convo = [m for m in msgs if m["role"] != "system"]
87
- return [{"role": "system", "content": SYSTEM_MSG}] + convo[-MAX_PAIRS * 2 :]
88
-
89
-
90
- # ───────────────── chat callback ───────────────────────────────────
91
- def chat_fn(user_msg, history, state):
92
- """
93
- user_msg : str
94
- history : list[dict] for UI (assistant & user only)
95
- state : {"msgs": full_message_history_with_system}
96
- """
97
- if MODEL_ERR:
98
- history.append({"role": "assistant", "content": MODEL_ERR})
99
- return history, state
100
 
101
  user_msg = strip(user_msg or "")
102
  if not user_msg:
103
- history.append({"role": "assistant", "content": "Please type something."})
104
- return history, state
105
- if len(user_msg) > MAX_INPUT_CH:
106
- history.append(
107
- {
108
- "role": "assistant",
109
- "content": f"Message too long (>{MAX_INPUT_CH} characters).",
110
- }
111
- )
112
- return history, state
113
 
114
- # Update raw history
115
- state["msgs"].append({"role": "user", "content": user_msg})
116
- state["msgs"] = trim(state["msgs"])
 
 
 
117
 
118
- prompt = build_prompt(state["msgs"])
119
  try:
120
  raw = gen(prompt)[0]["generated_text"]
121
  reply = strip(raw.split("### Assistant:", 1)[-1])
 
122
  except Exception:
123
- log(" Inference error:\n" + traceback.format_exc())
124
  reply = "Sorry—backend crashed. Please try again later."
125
 
126
- # Append to histories
127
- state["msgs"].append({"role": "assistant", "content": reply})
128
- history.append({"role": "assistant", "content": reply})
129
- return history, state
130
-
131
-
132
- # ───────────────── UI ──────────────────────────────────────────────
133
- with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
134
- chatbot = gr.Chatbot(
135
- value=[WELCOME_MSG],
136
- label="SchoolSpirit AI",
137
- height=480,
138
- type="messages",
139
- )
140
- txt = gr.Textbox(
141
- placeholder="Type your question here…",
142
- show_label=False,
143
- container=False,
144
- )
145
- state = gr.State({"msgs": [{"role": "system", "content": SYSTEM_MSG}]})
146
-
147
- txt.submit(chat_fn, [txt, chatbot, state], [chatbot, state])
148
- txt.submit(lambda _: "", None, txt) # clear textbox
149
 
150
- demo.launch()
 
 
 
 
 
 
 
2
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
3
  from transformers.utils import logging as hf_logging
4
 
5
+ # ---------------- logging ----------------------------------------------------
6
  os.environ["HF_HOME"] = "/data/.huggingface"
 
 
 
7
  def log(msg: str):
8
  ts = datetime.datetime.utcnow().strftime("%H:%M:%S.%f")[:-3]
9
+ print(f"[{ts}] {msg}", flush=True)
10
+
11
+ # ---------------- config -----------------------------------------------------
12
+ MODEL_ID = "ibm-granite/granite-3.3-2b-instruct"
13
+ MAX_TURNS, MAX_TOKS = 4, 64
14
+ SYSTEM_PROMPT = (
15
+ "You are **SchoolSpirit AI**, digital mascot of SchoolSpirit AI LLC, "
16
+ "founded by Charles Norton in 2025. The company installs on‑prem AI chat "
17
+ "mascots, fine‑tunes LLMs, and ships turnkey GPU hardware to K‑12 schools.\n\n"
 
 
 
 
 
 
 
 
 
 
18
  "RULES:\n"
19
+ "• Friendly, concise (≤ 4 sentences) unless asked for detail.\n"
20
+ "• If unsure or outofscope: say so & suggest human follow‑up.\n"
21
+ "• Do NOT collect personal data; no medical/legal/financial advice.\n"
22
+ "• No profanity, politics, or mature themes."
23
  )
24
+ WELCOME_BUBBLE = "Welcome to SchoolSpirit AI! Do you have any questions?"
25
 
26
  strip = lambda s: re.sub(r"\s+", " ", s.strip())
27
 
28
 
29
+ # ---------------- load model (GPU fp16 → CPU) --------------------------------
30
  hf_logging.set_verbosity_error()
31
  try:
32
  tok = AutoTokenizer.from_pretrained(MODEL_ID)
33
  if torch.cuda.is_available():
34
+ log("GPU fp16")
35
  model = AutoModelForCausalLM.from_pretrained(
36
  MODEL_ID, device_map="auto", torch_dtype=torch.float16
37
  )
38
  else:
39
+ log("CPU fp32")
40
  model = AutoModelForCausalLM.from_pretrained(
41
  MODEL_ID, device_map="cpu", torch_dtype="auto", low_cpu_mem_usage=True
42
  )
 
45
  "text-generation",
46
  model=model,
47
  tokenizer=tok,
48
+ max_new_tokens=MAX_TOKS,
49
  do_sample=True,
50
+ temperature=0.6,
51
  )
52
+ except Exception as e: # noqa: BLE001
53
+ gen = None
54
+ log(f"Model load error: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
+ # ---------------- chat callback ---------------------------------------------
57
+ def chat_fn(user_msg: str, history: list[tuple[str, str]]):
58
+ if gen is None:
59
+ return "Model failed to load. Please try later."
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
  user_msg = strip(user_msg or "")
62
  if not user_msg:
63
+ return "Please type something."
 
 
 
 
 
 
 
 
 
64
 
65
+ # build prompt: system + last MAX_TURNS pairs + new user msg
66
+ pairs = history[-MAX_TURNS:]
67
+ prompt = SYSTEM_PROMPT + "\n"
68
+ for u, a in pairs:
69
+ prompt += f"### User:\n{u}\n### Assistant:\n{a}\n"
70
+ prompt += f"### User:\n{user_msg}\n### Assistant:\n"
71
 
 
72
  try:
73
  raw = gen(prompt)[0]["generated_text"]
74
  reply = strip(raw.split("### Assistant:", 1)[-1])
75
+ reply = re.split(r"\b###\s*(?:User|Assistant):", reply, 1)[0].strip()
76
  except Exception:
77
+ log("generation crash\n" + traceback.format_exc())
78
  reply = "Sorry—backend crashed. Please try again later."
79
 
80
+ return reply
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
+ # ---------------- UI ---------------------------------------------------------
83
+ gr.ChatInterface(
84
+ fn=chat_fn,
85
+ chatbot=gr.Chatbot(value=[("", WELCOME_BUBBLE)], height=480),
86
+ title="SchoolSpirit AI Chat",
87
+ theme=gr.themes.Soft(primary_hue="blue"),
88
+ ).launch()