File size: 2,768 Bytes
ce7d50f
8ab9404
ce7d50f
 
8ab9404
 
ce7d50f
8ab9404
ce7d50f
 
 
 
8ab9404
 
 
 
 
ce7d50f
8ab9404
 
ce7d50f
 
 
 
 
 
 
 
 
8ab9404
ce7d50f
8ab9404
 
ce7d50f
 
8ab9404
ce7d50f
8ab9404
ce7d50f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8ab9404
 
ce7d50f
 
8ab9404
 
ce7d50f
 
8ab9404
 
ce7d50f
 
8ab9404
 
 
 
 
ce7d50f
8ab9404
ce7d50f
8ab9404
ce7d50f
 
 
 
 
 
 
8ab9404
 
ce7d50f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# norag_router.py

from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from groq import Groq
from pymongo import MongoClient
from config import CONNECTION_STRING, CHATGROQ_API_KEY, CUSTOM_PROMPT

# Create router under /norag
router = APIRouter(prefix="/norag", tags=["noRag"])

# Initialize Groq client and MongoDB
client = Groq(api_key=CHATGROQ_API_KEY)
mongo  = MongoClient(CONNECTION_STRING)
db     = mongo["edulearnai"]
chats  = db["chats"]

# System prompt
SYSTEM_PROMPT = "You are a helpful assistant which helps people in their tasks."

# Request model
type ChatRequest(BaseModel):
    session_id: str
    question: str

@router.post("/chat", summary="Ask a question to the noRag assistant")
async def chat_endpoint(req: ChatRequest):
    # Fetch or create session
doc = chats.find_one({"session_id": req.session_id})
    if not doc:
        doc = {"session_id": req.session_id, "history": [], "summary": ""}
        chats.insert_one(doc)

    history = doc["history"]
    summary = doc["summary"]

    # Summarize if history too long
    if len(history) >= 10:
        msgs = [f"{m['role']}: {m['content']}" for m in history]
        combined = summary + "\n" + "\n".join(msgs)
        sum_prompt = (
            "Summarize the following chat history in one or two short sentences:\n\n"
            + combined + "\n\nSummary:"
        )
        sum_resp = client.chat.completions.create(
            model="meta-llama/llama-4-scout-17b-16e-instruct",
            messages=[{"role": "user", "content": sum_prompt}],
            temperature=0.3,
            max_completion_tokens=150,
            top_p=1,
            stream=False,
        )
        summary = sum_resp.choices[0].message.content.strip()
        history = []

    # Build full prompt
    chat_hist_text = "\n".join([f"{m['role']}: {m['content']}" for m in history])
    full_prompt = CUSTOM_PROMPT.format(
        context=SYSTEM_PROMPT,
        chat_history=chat_hist_text or "(no prior messages)",
        question=req.question
    )

    # Call model
    resp = client.chat.completions.create(
        model="meta-llama/llama-4-scout-17b-16e-instruct",
        messages=[{"role": "user", "content": full_prompt}],
        temperature=1,
        max_completion_tokens=1024,
        top_p=1,
        stream=False,
    )
    answer = resp.choices[0].message.content.strip()

    # Update session doc
    history.append({"role": "user", "content": req.question})
    history.append({"role": "assistant", "content": answer})
    chats.replace_one(
        {"session_id": req.session_id},
        {"session_id": req.session_id, "history": history, "summary": summary},
        upsert=True
    )

    return {"session_id": req.session_id, "answer": answer, "summary": summary}