import os, re, time, datetime, threading, traceback, torch, gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer from transformers.utils import logging as hf_logging os.environ["HF_HOME"] = "/data/.huggingface" LOG_FILE = "/data/requests.log" def log(m): line = f"[{datetime.datetime.utcnow().strftime('%H:%M:%S.%f')[:-3]}] {m}" print(line, flush=True) try: with open(LOG_FILE, "a") as f: f.write(line + "\n") except FileNotFoundError: pass MODEL_ID = "ibm-granite/granite-3.3-2b-instruct" CTX_TOK, MAX_NEW, TEMP = 1800, 64, 0.6 MAX_IN, RATE_N, RATE_T = 300, 5, 60 SYSTEM_MSG = ( "You are **SchoolSpirit AI**, the friendly digital mascot of " "SchoolSpirit AI LLC, founded by Charles Norton in 2025. " "The company installs on‑prem AI chat mascots, fine‑tunes language models, " "and ships turnkey GPU servers to K‑12 schools.\n\n" "RULES:\n" "• Reply in ≤ 4 sentences unless asked for detail.\n" "• No personal‑data collection; no medical/legal/financial advice.\n" "• If uncertain, say so and suggest contacting a human.\n" "• If you can’t answer, politely direct the user to admin@schoolspiritai.com.\n" "• Keep language age‑appropriate; avoid profanity, politics, mature themes." ) WELCOME = "Hi there! I’m SchoolSpirit AI. How can I help?" strip = lambda s: re.sub(r"\s+", " ", s.strip()) hf_logging.set_verbosity_error() try: tok = AutoTokenizer.from_pretrained(MODEL_ID) model = AutoModelForCausalLM.from_pretrained( MODEL_ID, device_map="auto" if torch.cuda.is_available() else "cpu", torch_dtype=torch.float16 if torch.cuda.is_available() else "auto", low_cpu_mem_usage=True, ) MODEL_ERR = None log("Model loaded") except Exception as e: MODEL_ERR = f"Model load error: {e}" log(MODEL_ERR + "\n" + traceback.format_exc()) VISITS = {} def allowed(ip): now = time.time() VISITS[ip] = [t for t in VISITS.get(ip, []) if now - t < RATE_T] if len(VISITS[ip]) >= RATE_N: return False VISITS[ip].append(now) return True def build_prompt(raw): def render(m): if m["role"] == "system": return m["content"] return f"{'User:' if m['role']=='user' else 'AI:'} {m['content']}" sys, convo = raw[0], raw[1:] while True: parts = [sys["content"]] + [render(m) for m in convo] + ["AI:"] if len(tok.encode("\n".join(parts), add_special_tokens=False)) <= CTX_TOK or len(convo) <= 2: return "\n".join(parts) convo = convo[2:] def chat_fn(user_msg, hist, state, request: gr.Request): ip = request.client.host if request else "anon" if not allowed(ip): hist.append((user_msg, "Rate limit exceeded — please wait a minute.")) return hist, state, "" user_msg = strip(user_msg or "") if not user_msg: return hist, state, "" if len(user_msg) > MAX_IN: hist.append((user_msg, f"Input >{MAX_IN} chars.")) return hist, state, "" if MODEL_ERR: hist.append((user_msg, MODEL_ERR)) return hist, state, "" hist.append((user_msg, "")) state["raw"].append({"role": "user", "content": user_msg}) prompt = build_prompt(state["raw"]) ids = tok(prompt, return_tensors="pt").to(model.device).input_ids streamer = TextIteratorStreamer(tok, skip_prompt=True, skip_special_tokens=True) threading.Thread( target=model.generate, kwargs=dict(input_ids=ids, max_new_tokens=MAX_NEW, temperature=TEMP, streamer=streamer), ).start() partial = "" for piece in streamer: partial += piece if "User:" in partial or "\nAI:" in partial: partial = re.split(r"(?:\n?User:|\n?AI:)", partial)[0].strip() break hist[-1] = (user_msg, partial) yield hist, state, "" reply = strip(partial) hist[-1] = (user_msg, reply) state["raw"].append({"role": "assistant", "content": reply}) yield hist, state, "" with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo: gr.Markdown("### SchoolSpirit AI Chat") bot = gr.Chatbot(value=[("", WELCOME)], height=480) st = gr.State({ "raw": [ {"role": "system", "content": SYSTEM_MSG}, {"role": "assistant", "content": WELCOME}, ] }) with gr.Row(): txt = gr.Textbox(placeholder="Type your question here…", show_label=False, lines=1, scale=4) send = gr.Button("Send", variant="primary") send.click(chat_fn, inputs=[txt, bot, st], outputs=[bot, st, txt]) txt.submit(chat_fn, inputs=[txt, bot, st], outputs=[bot, st, txt]) demo.launch()