Interview_AI / app.py
LiamKhoaLe's picture
Upd build
4cb647e
raw
history blame
5.21 kB
# Access site: https://binkhoale1812-interview-ai.hf.space/
import os
import tempfile
from pathlib import Path
from typing import Dict
# Server
from fastapi import FastAPI, File, UploadFile, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, FileResponse
from fastapi.staticfiles import StaticFiles
# AI + LLM
from transformers import WhisperProcessor, WhisperForConditionalGeneration
import torch
import soundfile as sf
from google import genai
from google.genai import types
############################################
# ── Configuration ────────────────────────
############################################
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
if not GEMINI_API_KEY:
raise RuntimeError("GEMINI_API_KEY environment variable must be set!")
# Tiny Whisper model is light enough for CPU Spaces; change if GPU is available
ASR_MODEL_ID = "openai/whisper-tiny" # ~39 MB
ASR_LANGUAGE = "en" # Force to English for interview setting
############################################
# ── FastAPI App ───────────────────────────
############################################
app = FastAPI(title="Interview Q&A Assistant", docs_url="/docs")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
# Serve frontend assets
app.mount("/statics", StaticFiles(directory="statics"), name="statics")
############################################
# ── Global objects (lazy‑loaded) ──────────
############################################
# Globals
processor = None
model = None
@app.on_event("startup")
async def load_models():
global processor, model
processor = WhisperProcessor.from_pretrained(ASR_MODEL_ID)
model = WhisperForConditionalGeneration.from_pretrained(ASR_MODEL_ID)
model.to("cpu")
############################################
# ── Helpers ───────────────────────────────
############################################
def build_prompt(question: str) -> str:
"""Craft a prompt that elicits concise, structured answers."""
return (
"You are a helpful career‑coach AI. Answer the following interview "
"question clearly and concisely, offering practical insights when "
"appropriate.\n\n"
f"Interview question: \"{question}\""
)
def memory_usage_mb() -> float:
return psutil.Process().memory_info().rss / 1_048_576 # bytes→MiB
# Monitor Resources Before Startup
import psutil
def check_system_resources():
memory = psutil.virtual_memory()
cpu = psutil.cpu_percent(interval=1)
disk = psutil.disk_usage("/")
# Defines log info messages
logger.info(f"🔍 System Resources - RAM: {memory.percent}%, CPU: {cpu}%, Disk: {disk.percent}%")
if memory.percent > 85:
logger.warning("⚠️ High RAM usage detected!")
if cpu > 90:
logger.warning("⚠️ High CPU usage detected!")
if disk.percent > 90:
logger.warning("⚠️ High Disk usage detected!")
check_system_resources()
############################################
# ── Routes ────────────────────────────────
############################################
@app.get("/")
async def root() -> FileResponse:
"""Serve the single‑page app."""
return FileResponse(Path("statics/index.html"))
@app.post("/voice-transcribe")
async def voice_transcribe(file: UploadFile = File(...)): # noqa: B008
"""Receive audio, transcribe, push to Gemini, return answer."""
if file.content_type not in {"audio/wav", "audio/x-wav", "audio/mpeg"}:
raise HTTPException(status_code=415, detail="Unsupported audio type")
# Save to a temp file (Whisper expects a filename/bytes)
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
tmp.write(await file.read())
tmp_path = tmp.name
try:
# ── 1. Transcribe
speech, sample_rate = sf.read(tmp_path)
inputs = processor(speech, sampling_rate=sample_rate, return_tensors="pt")
input_ids = inputs.input_features.to("cpu") # adjust if using GPU
generated_ids = model.generate(input_ids)
question = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
if not question:
raise ValueError("Empty transcription")
# ── 2. LLM answer
prompt = build_prompt(question)
# Gemini Flash 2.5 – tuned for short latency
client = genai.Client(api_key=GEMINI_API_KEY)
response = client.models.generate_content(
model="gemini-2.5-flash-preview-04-17",
contents=prompt
)
answer = response.text.strip()
return JSONResponse(
{
"question": question,
"answer": answer,
"memory_mb": round(memory_usage_mb(), 1),
}
)
finally:
os.remove(tmp_path) # Rm audio when done