Spaces:

phanerozoic
/

SchoolSpiritAI

Paused

App Files Files Community

phanerozoic commited on Apr 21

Commit

61ca5d6

verified ·

1 Parent(s): 502a6b6

Update app.py

Browse files

Files changed (1) hide show

app.py +75 -76

app.py CHANGED Viewed

@@ -1,13 +1,10 @@
-import os, re, time, datetime, traceback, torch
-import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 from transformers.utils import logging as hf_logging
-# -------------------------------------------------------------------
-# 1.  Logging helpers
-# -------------------------------------------------------------------
 os.environ["HF_HOME"] = "/data/.huggingface"
-LOG_FILE = "/data/requests.log"
 def log(msg: str):
@@ -15,47 +12,39 @@ def log(msg: str):
     line = f"[{ts}] {msg}"
     print(line, flush=True)
     try:
-        with open(LOG_FILE, "a") as f:
             f.write(line + "\n")
     except FileNotFoundError:
         pass
-# -------------------------------------------------------------------
-# 2.  Configuration
-# -------------------------------------------------------------------
 MODEL_ID = "ibm-granite/granite-3.3-2b-instruct"
-MAX_TURNS, MAX_TOKENS, MAX_INPUT_CH = 4, 64, 300
 SYSTEM_MSG = (
     "You are **SchoolSpirit AI**, the digital mascot for SchoolSpirit AI LLC, "
     "founded by Charles Norton in 2025. The company installs on‑prem AI chat "
-    "mascots, offers custom fine‑tuning, and ships turnkey GPU hardware to "
-    "K‑12 schools.\n\n"
-    "GUIDELINES:\n"
-    "• Warm, encouraging tone for students, parents, staff.\n"
-    "• Replies ≤ 4 sentences unless asked for detail.\n"
-    "• If unsure/out‑of‑scope: say so and suggest human follow‑up.\n"
-    "• No personal‑data collection or sensitive advice.\n"
-    "• No profanity, politics, or mature themes."
 )
 WELCOME_MSG = "Welcome to SchoolSpirit AI! Do you have any questions?"
-def strip(s: str) -> str:
-    return re.sub(r"\s+", " ", s.strip())
-# -------------------------------------------------------------------
-# 3.  Load model (GPU FP‑16 → CPU fallback)
-# -------------------------------------------------------------------
 hf_logging.set_verbosity_error()
 try:
-    log("Loading tokenizer …")
     tok = AutoTokenizer.from_pretrained(MODEL_ID)
     if torch.cuda.is_available():
-        log("GPU detected → FP‑16")
         model = AutoModelForCausalLM.from_pretrained(
             MODEL_ID, device_map="auto", torch_dtype=torch.float16
         )
@@ -71,81 +60,91 @@ try:
         tokenizer=tok,
         max_new_tokens=MAX_TOKENS,
         do_sample=True,
-        temperature=0.6,
     )
     MODEL_ERR = None
-    log("Model loaded ✔")
 except Exception as exc:  # noqa: BLE001
     MODEL_ERR, gen = f"Model load error: {exc}", None
     log(MODEL_ERR)
-# -------------------------------------------------------------------
-# 4.  Chat callback
-# -------------------------------------------------------------------
-def chat_fn(user_msg: str, history: list[tuple[str, str]], state: dict):
     """
-    history: list of (user, assistant) tuples (Gradio default)
-    state  : dict carrying system_prompt + raw_history for the model
-    Returns updated history (for UI) and state (for next round)
     """
     if MODEL_ERR:
-        return history + [(user_msg, MODEL_ERR)], state
     user_msg = strip(user_msg or "")
     if not user_msg:
-        return history + [(user_msg, "Please type something.")], state
     if len(user_msg) > MAX_INPUT_CH:
-        warn = f"Message too long (>{MAX_INPUT_CH} chars)."
-        return history + [(user_msg, warn)], state
-    # ------------------------------------------------ Prompt assembly
-    raw_hist = state.get("raw", [])
-    raw_hist.append({"role": "user", "content": user_msg})
-    # keep system + last N exchanges
-    convo = [m for m in raw_hist if m["role"] != "system"][-MAX_TURNS * 2 :]
-    raw_hist = [{"role": "system", "content": SYSTEM_MSG}] + convo
-    prompt = "\n".join(
-        [
-            m["content"]
-            if m["role"] == "system"
-            else f'{"User" if m["role"]=="user" else "AI"}: {m["content"]}'
-            for m in raw_hist
-        ]
-        + ["AI:"]
-    )
     try:
         raw = gen(prompt)[0]["generated_text"]
-        reply = strip(raw.split("AI:", 1)[-1])
-        reply = re.split(r"\b(?:User:|AI:)", reply, 1)[0].strip()
     except Exception:
         log("❌ Inference error:\n" + traceback.format_exc())
         reply = "Sorry—backend crashed. Please try again later."
-    # ------------------------------------------------ Update state + UI history
-    raw_hist.append({"role": "assistant", "content": reply})
-    state["raw"] = raw_hist
-    history.append((user_msg, reply))
     return history, state
-# -------------------------------------------------------------------
-# 5.  Launch
-# -------------------------------------------------------------------
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
     chatbot = gr.Chatbot(
-        value=[("", WELCOME_MSG)], height=480, label="SchoolSpirit AI"
     )
-    state = gr.State({"raw": [{"role": "system", "content": SYSTEM_MSG}]})
-    with gr.Row():
-        txt = gr.Textbox(
-            scale=4, placeholder="Type your question here...", show_label=False
-        )
-        send = gr.Button("Send", variant="primary")
-    send.click(chat_fn, inputs=[txt, chatbot, state], outputs=[chatbot, state])
-    txt.submit(chat_fn, inputs=[txt, chatbot, state], outputs=[chatbot, state])
 demo.launch()

+import os, re, time, datetime, traceback, torch, gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 from transformers.utils import logging as hf_logging
+# ───────────────── logging ─────────────────────────────────────────
 os.environ["HF_HOME"] = "/data/.huggingface"
+LOG = "/data/requests.log"
 def log(msg: str):
     line = f"[{ts}] {msg}"
     print(line, flush=True)
     try:
+        with open(LOG, "a") as f:
             f.write(line + "\n")
     except FileNotFoundError:
         pass
+# ───────────────── config ──────────────────────────────────────────
 MODEL_ID = "ibm-granite/granite-3.3-2b-instruct"
+MAX_PAIRS = 4            # user/assistant pairs to keep
+MAX_TOKENS = 128
+MAX_INPUT_CH = 300
 SYSTEM_MSG = (
     "You are **SchoolSpirit AI**, the digital mascot for SchoolSpirit AI LLC, "
     "founded by Charles Norton in 2025. The company installs on‑prem AI chat "
+    "mascots, offers custom fine‑tuning, and supplies GPU servers to K‑12 schools.\n\n"
+    "RULES:\n"
+    "• Friendly, concise (≤ 4 sentences) unless user wants detail.\n"
+    "• If unsure or out of scope, say so and suggest human follow‑up.\n"
+    "• No personal‑data collection, no medical/legal/financial advice.\n"
+    "• Avoid profanity, politics, and mature themes."
 )
 WELCOME_MSG = "Welcome to SchoolSpirit AI! Do you have any questions?"
+strip = lambda s: re.sub(r"\s+", " ", s.strip())
+# ───────────────── model load (GPU fp16 → CPU) ─────────────────────
 hf_logging.set_verbosity_error()
 try:
     tok = AutoTokenizer.from_pretrained(MODEL_ID)
     if torch.cuda.is_available():
+        log("GPU detected → FP16")
         model = AutoModelForCausalLM.from_pretrained(
             MODEL_ID, device_map="auto", torch_dtype=torch.float16
         )
         tokenizer=tok,
         max_new_tokens=MAX_TOKENS,
         do_sample=True,
+        temperature=0.65,
     )
     MODEL_ERR = None
 except Exception as exc:  # noqa: BLE001
     MODEL_ERR, gen = f"Model load error: {exc}", None
     log(MODEL_ERR)
+# ───────────────── helper ──────────────────────────────────────────
+def build_prompt(msgs):
+    """Granite likes ### markers"""
+    lines = [f"### System:\n{SYSTEM_MSG}"]
+    for m in msgs:
+        if m["role"] == "user":
+            lines.append(f"### User:\n{m['content']}")
+        elif m["role"] == "assistant":
+            lines.append(f"### Assistant:\n{m['content']}")
+    lines.append("### Assistant:")
+    return "\n".join(lines)
+def trim(msgs):
+    """Keep system + last MAX_PAIRS*2 messages"""
+    convo = [m for m in msgs if m["role"] != "system"]
+    return [{"role": "system", "content": SYSTEM_MSG}] + convo[-MAX_PAIRS * 2 :]
+# ───────────────── chat callback ───────────────────────────────────
+def chat_fn(user_msg, history, state):
     """
+    user_msg : str
+    history  : list[dict] for UI (assistant & user only)
+    state    : {"msgs": full_message_history_with_system}
     """
     if MODEL_ERR:
+        history.append({"role": "assistant", "content": MODEL_ERR})
+        return history, state
     user_msg = strip(user_msg or "")
     if not user_msg:
+        history.append({"role": "assistant", "content": "Please type something."})
+        return history, state
     if len(user_msg) > MAX_INPUT_CH:
+        history.append(
+            {
+                "role": "assistant",
+                "content": f"Message too long (>{MAX_INPUT_CH} characters).",
+            }
+        )
+        return history, state
+    # Update raw history
+    state["msgs"].append({"role": "user", "content": user_msg})
+    state["msgs"] = trim(state["msgs"])
+    prompt = build_prompt(state["msgs"])
     try:
         raw = gen(prompt)[0]["generated_text"]
+        reply = strip(raw.split("### Assistant:", 1)[-1])
     except Exception:
         log("❌ Inference error:\n" + traceback.format_exc())
         reply = "Sorry—backend crashed. Please try again later."
+    # Append to histories
+    state["msgs"].append({"role": "assistant", "content": reply})
+    history.append({"role": "assistant", "content": reply})
     return history, state
+# ───────────────── UI ──────────────────────────────────────────────
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
     chatbot = gr.Chatbot(
+        value=[WELCOME_MSG],
+        label="SchoolSpirit AI",
+        height=480,
+        type="messages",
     )
+    txt = gr.Textbox(
+        placeholder="Type your question here…",
+        show_label=False,
+        container=False,
+    )
+    state = gr.State({"msgs": [{"role": "system", "content": SYSTEM_MSG}]})
+    txt.submit(chat_fn, [txt, chatbot, state], [chatbot, state])
+    txt.submit(lambda _: "", None, txt)  # clear textbox
 demo.launch()