Spaces:
Sleeping
Sleeping
# Access site: https://binkhoale1812-interview-ai.hf.space/ | |
import os | |
import tempfile | |
from pathlib import Path | |
from typing import Dict | |
# Server | |
from fastapi import FastAPI, File, UploadFile, HTTPException | |
from fastapi.middleware.cors import CORSMiddleware | |
from fastapi.responses import JSONResponse, FileResponse | |
from fastapi.staticfiles import StaticFiles | |
# AI + LLM | |
import torch # For transformer | |
from transformers import WhisperProcessor, WhisperForConditionalGeneration | |
from google import genai | |
from google.genai import types | |
# Audio Transcribe | |
from pydub import AudioSegment | |
import numpy as np | |
############################################ | |
# ── Configuration ──────────────────────── | |
############################################ | |
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") | |
if not GEMINI_API_KEY: | |
raise RuntimeError("GEMINI_API_KEY environment variable must be set!") | |
# Tiny Whisper model is light enough for CPU Spaces; change if GPU is available | |
ASR_MODEL_ID = "openai/whisper-tiny" # ~39 MB | |
ASR_LANGUAGE = "en" # Force to English for interview setting | |
############################################ | |
# ── FastAPI App ─────────────────────────── | |
############################################ | |
app = FastAPI(title="Interview Q&A Assistant", docs_url="/docs") | |
app.add_middleware( | |
CORSMiddleware, | |
allow_origins=["*"], | |
allow_methods=["*"], | |
allow_headers=["*"], | |
) | |
# Serve frontend assets | |
app.mount("/statics", StaticFiles(directory="statics"), name="statics") | |
############################################ | |
# ── Global objects (lazy‑loaded) ────────── | |
############################################ | |
# Globals | |
processor = None | |
model = None | |
async def load_models(): | |
global processor, model | |
cache_path = Path("model_cache") # local writable path inside Hugging Face Space | |
processor = WhisperProcessor.from_pretrained(ASR_MODEL_ID, cache_dir=cache_path) | |
model = WhisperForConditionalGeneration.from_pretrained(ASR_MODEL_ID, cache_dir=cache_path) | |
model.to("cpu") | |
############################################ | |
# ── Helpers ─────────────────────────────── | |
############################################ | |
def build_prompt(question: str) -> str: | |
"""Craft a prompt that elicits concise, structured answers.""" | |
return ( | |
"You are a helpful career‑coach AI. Answer the following interview " | |
"question clearly and concisely, offering practical insights when " | |
"appropriate.\n\n" | |
f"Interview question: \"{question}\"" | |
) | |
def memory_usage_mb() -> float: | |
return psutil.Process().memory_info().rss / 1_048_576 # bytes→MiB | |
# Enable Logging for Debugging | |
import logging | |
# Set up app-specific logger | |
logger = logging.getLogger("triage-response") | |
logger.setLevel(logging.INFO) # Set to DEBUG only when needed | |
# Set log format | |
formatter = logging.Formatter("[%(levelname)s] %(asctime)s - %(message)s") | |
handler = logging.StreamHandler() | |
handler.setFormatter(formatter) | |
logger.addHandler(handler) | |
# Suppress noisy libraries like pymongo, urllib3, etc. | |
for noisy in ["pymongo", "urllib3", "httpx", "uvicorn", "uvicorn.error", "uvicorn.access"]: | |
logging.getLogger(noisy).setLevel(logging.WARNING) | |
# Monitor Resources Before Startup | |
import psutil | |
def check_system_resources(): | |
memory = psutil.virtual_memory() | |
cpu = psutil.cpu_percent(interval=1) | |
disk = psutil.disk_usage("/") | |
# Defines log info messages | |
logger.info(f"🔍 System Resources - RAM: {memory.percent}%, CPU: {cpu}%, Disk: {disk.percent}%") | |
if memory.percent > 85: | |
logger.warning("⚠️ High RAM usage detected!") | |
if cpu > 90: | |
logger.warning("⚠️ High CPU usage detected!") | |
if disk.percent > 90: | |
logger.warning("⚠️ High Disk usage detected!") | |
check_system_resources() | |
############################################ | |
# ── Routes ──────────────────────────────── | |
############################################ | |
async def root() -> FileResponse: | |
"""Serve the single‑page app.""" | |
logger.info("[STATIC] Serving frontend") | |
return FileResponse(Path("statics/index.html")) | |
async def voice_transcribe(file: UploadFile = File(...)): # noqa: B008 | |
"""Receive audio, transcribe, push to Gemini, return answer.""" | |
if file.content_type not in {"audio/wav", "audio/x-wav", "audio/mpeg"}: | |
raise HTTPException(status_code=415, detail="Unsupported audio type") | |
# Save to a temp file (Whisper expects a filename/bytes) | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp: | |
tmp.write(await file.read()) | |
tmp_path = tmp.name | |
try: | |
# ── 1. Transcribe | |
# Load audio using pydub (which handles WebM/Opus/MP3/etc.) | |
audio = AudioSegment.from_file(tmp_path) | |
audio = audio.set_frame_rate(16000).set_channels(1) # Whisper expects mono 16kHz | |
samples = np.array(audio.get_array_of_samples()).astype(np.float32) / (2**15) # normalize int16 | |
# Obtain speech and process to tensor | |
speech = samples | |
inputs = processor(speech, sampling_rate=16000, return_tensors="pt") | |
input_features = inputs["input_features"].to("cpu") | |
generated_ids = model.generate(input_features) | |
question = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip() | |
if not question: | |
raise ValueError("Empty transcription") | |
logger.info(f"[VOICE] Detected transcribe: {question}") | |
# ── 2. LLM answer | |
prompt = build_prompt(question) | |
# Gemini Flash 2.5 – tuned for short latency | |
client = genai.Client(api_key=GEMINI_API_KEY) | |
response = client.models.generate_content( | |
model="gemini-2.5-flash-preview-04-17", | |
contents=prompt | |
) | |
answer = response.text.strip() | |
logger.info(f"[LLM] Decision answer: {answer}") | |
return JSONResponse( | |
{ | |
"question": question, | |
"answer": answer, | |
"memory_mb": round(memory_usage_mb(), 1), | |
} | |
) | |
finally: | |
os.remove(tmp_path) # Rm audio when done |