from fastapi import FastAPI, Request from fastapi.responses import StreamingResponse from fastapi.middleware.cors import CORSMiddleware from inference import generate_response import time app = FastAPI() app.add_middleware( CORSMiddleware, allow_origins=["http://localhost:5173"], # your React app's port allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) @app.post("/chat") async def chat(req: Request): body = await req.json() prompt = body.get("prompt") return StreamingResponse(generate_response(prompt), media_type="text/plain")