# Access site: https://binkhoale1812-interview-ai.hf.space/ import os, tempfile from pathlib import Path from typing import Dict # Server from fastapi import FastAPI, File, UploadFile, HTTPException from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse, FileResponse from fastapi.staticfiles import StaticFiles # AI + LLM import torch # For transformer from google import genai from google.genai import types # Audio Transcribe from pydub import AudioSegment from transformers import WhisperProcessor, WhisperForConditionalGeneration import numpy as np ############################################ # ── Configuration ──────────────────────── ############################################ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") if not GEMINI_API_KEY: raise RuntimeError("GEMINI_API_KEY environment variable must be set!") # Tiny Whisper model is light enough for CPU Spaces; change if GPU is available ASR_MODEL_ID = "openai/whisper-small.en" ASR_LANGUAGE = "en" # Force to English for interview setting SAMPLE_RATE = 16000 ############################################ # ── FastAPI App ─────────────────────────── ############################################ app = FastAPI(title="Interview Q&A Assistant", docs_url="/docs") app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"], ) # Serve frontend assets app.mount("/statics", StaticFiles(directory="statics"), name="statics") ############################################ # ── Global objects (lazy‑loaded) ────────── ############################################ # Globals processor = None model = None # Enable Logging for Debugging import psutil import logging # Set up app-specific logger logger = logging.getLogger("triage-response") logger.setLevel(logging.INFO) # Set to DEBUG only when needed # Set log format formatter = logging.Formatter("[%(levelname)s] %(asctime)s - %(message)s") handler = logging.StreamHandler() handler.setFormatter(formatter) logger.addHandler(handler) # Suppress noisy libraries like pymongo, urllib3, etc. for noisy in ["pymongo", "urllib3", "httpx", "uvicorn", "uvicorn.error", "uvicorn.access"]: logging.getLogger(noisy).setLevel(logging.WARNING) # Monitor Resources Before Startup def check_system_resources(): memory = psutil.virtual_memory() cpu = psutil.cpu_percent(interval=1) disk = psutil.disk_usage("/") # Defines log info messages logger.info(f"🔍 System Resources - RAM: {memory.percent}%, CPU: {cpu}%, Disk: {disk.percent}%") if memory.percent > 85: logger.warning("⚠️ High RAM usage detected!") if cpu > 90: logger.warning("⚠️ High CPU usage detected!") if disk.percent > 90: logger.warning("⚠️ High Disk usage detected!") check_system_resources() # Startup @app.on_event("startup") async def load_models(): global processor, model cache = Path("model_cache"); cache.mkdir(exist_ok=True) # in startup (Transformer Whisper processing) processor = WhisperProcessor.from_pretrained(ASR_MODEL_ID, cache_dir=cache) model = WhisperForConditionalGeneration.from_pretrained(ASR_MODEL_ID, cache_dir=cache) # Force English transcription – never translate forced = processor.get_decoder_prompt_ids(language="english", task="transcribe") model.config.forced_decoder_ids = forced model.to("cpu") model.eval() logger.info("[STARTUP] Whisper loaded ✔") ############################################ # ── Helpers ─────────────────────────────── ############################################ def build_prompt(question: str) -> str: """Craft a prompt that elicits concise, structured answers.""" return ( "You are a helpful career‑coach AI. Answer the following interview " "question clearly and concisely, offering practical insights when appropriate.\n" "Use markdown for **bold**, *italic*, and bullet‑lists when helpful. \n" "Ensure your answer is less than 200 words.\n\n" f"Interview question: \"{question}\"" ) def memory_usage_mb() -> float: return psutil.Process().memory_info().rss / 1_048_576 # bytes→MiB ############################################ # ── Routes ──────────────────────────────── ############################################ @app.get("/") async def root() -> FileResponse: """Serve the single‑page app.""" logger.info("[STATIC] Serving frontend") return FileResponse(Path("statics/index.html")) @app.post("/voice-transcribe") async def voice_transcribe(file: UploadFile = File(...)): # noqa: B008 """Receive audio, transcribe, push to Gemini, return answer.""" if file.content_type not in {"audio/wav", "audio/x-wav", "audio/mpeg"}: raise HTTPException(status_code=415, detail="Unsupported audio type") # Save to a temp file (Whisper expects a filename/bytes) with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp: tmp.write(await file.read()) tmp_path = tmp.name try: # ── 1. Transcribe seg = AudioSegment.from_file(tmp_path).set_frame_rate(SAMPLE_RATE).set_channels(1) audio = np.array(seg.get_array_of_samples()).astype(np.float32) / (2**15) inputs = processor(audio, sampling_rate=SAMPLE_RATE, return_tensors="pt") ids = model.generate(inputs.input_features.to(model.device)) question = processor.decode(ids[0], skip_special_tokens=True).strip() if not question: raise ValueError("Could not detect speech") logger.info(f"[VOICE] Detected transcribe: {question}") # ── 2. LLM answer prompt = build_prompt(question) # Gemini Flash 2.5 – tuned for short latency client = genai.Client(api_key=GEMINI_API_KEY) response = client.models.generate_content( model="gemini-2.5-flash-preview-04-17", contents=prompt ) answer = response.text.strip() logger.info(f"[LLM] Decision answer: {answer}") return JSONResponse( { "question": question, "answer": answer, "memory_mb": round(memory_usage_mb(), 1), } ) finally: os.remove(tmp_path) # Rm audio when done