Spaces:

BinKhoaLe1812
/

Interview_AI

Sleeping

App Files Files Community

Interview_AI / app.py

LiamKhoaLe

Rm resampy Use pydub with ffmpeg

c45c039 12 days ago

raw

history blame

6.46 kB

	# Access site: https://binkhoale1812-interview-ai.hf.space/
	import os
	import tempfile
	from pathlib import Path
	from typing import Dict

	# Server
	from fastapi import FastAPI, File, UploadFile, HTTPException
	from fastapi.middleware.cors import CORSMiddleware
	from fastapi.responses import JSONResponse, FileResponse
	from fastapi.staticfiles import StaticFiles

	# AI + LLM
	import torch # For transformer
	from transformers import WhisperProcessor, WhisperForConditionalGeneration
	from google import genai
	from google.genai import types

	# Audio Transcribe
	from pydub import AudioSegment
	import numpy as np

	############################################
	# ── Configuration ────────────────────────
	############################################

	GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
	if not GEMINI_API_KEY:
	raise RuntimeError("GEMINI_API_KEY environment variable must be set!")

	# Tiny Whisper model is light enough for CPU Spaces; change if GPU is available
	ASR_MODEL_ID = "openai/whisper-tiny" # ~39 MB
	ASR_LANGUAGE = "en" # Force to English for interview setting

	############################################
	# ── FastAPI App ───────────────────────────
	############################################

	app = FastAPI(title="Interview Q&A Assistant", docs_url="/docs")
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# Serve frontend assets
	app.mount("/statics", StaticFiles(directory="statics"), name="statics")

	############################################
	# ── Global objects (lazy‑loaded) ──────────
	############################################

	# Globals
	processor = None
	model = None


	@app.on_event("startup")
	async def load_models():
	global processor, model
	cache_path = Path("model_cache") # local writable path inside Hugging Face Space
	processor = WhisperProcessor.from_pretrained(ASR_MODEL_ID, cache_dir=cache_path)
	model = WhisperForConditionalGeneration.from_pretrained(ASR_MODEL_ID, cache_dir=cache_path)
	model.to("cpu")


	############################################
	# ── Helpers ───────────────────────────────
	############################################

	def build_prompt(question: str) -> str:
	"""Craft a prompt that elicits concise, structured answers."""
	return (
	"You are a helpful career‑coach AI. Answer the following interview "
	"question clearly and concisely, offering practical insights when "
	"appropriate.\n\n"
	f"Interview question: \"{question}\""
	)

	def memory_usage_mb() -> float:
	return psutil.Process().memory_info().rss / 1_048_576 # bytes→MiB

	# Enable Logging for Debugging
	import logging
	# Set up app-specific logger
	logger = logging.getLogger("triage-response")
	logger.setLevel(logging.INFO) # Set to DEBUG only when needed
	# Set log format
	formatter = logging.Formatter("[%(levelname)s] %(asctime)s - %(message)s")
	handler = logging.StreamHandler()
	handler.setFormatter(formatter)
	logger.addHandler(handler)
	# Suppress noisy libraries like pymongo, urllib3, etc.
	for noisy in ["pymongo", "urllib3", "httpx", "uvicorn", "uvicorn.error", "uvicorn.access"]:
	logging.getLogger(noisy).setLevel(logging.WARNING)
	# Monitor Resources Before Startup
	import psutil
	def check_system_resources():
	memory = psutil.virtual_memory()
	cpu = psutil.cpu_percent(interval=1)
	disk = psutil.disk_usage("/")
	# Defines log info messages
	logger.info(f"🔍 System Resources - RAM: {memory.percent}%, CPU: {cpu}%, Disk: {disk.percent}%")
	if memory.percent > 85:
	logger.warning("⚠️ High RAM usage detected!")
	if cpu > 90:
	logger.warning("⚠️ High CPU usage detected!")
	if disk.percent > 90:
	logger.warning("⚠️ High Disk usage detected!")
	check_system_resources()


	############################################
	# ── Routes ────────────────────────────────
	############################################

	@app.get("/")
	async def root() -> FileResponse:
	"""Serve the single‑page app."""
	logger.info("[STATIC] Serving frontend")
	return FileResponse(Path("statics/index.html"))


	@app.post("/voice-transcribe")
	async def voice_transcribe(file: UploadFile = File(...)): # noqa: B008
	"""Receive audio, transcribe, push to Gemini, return answer."""
	if file.content_type not in {"audio/wav", "audio/x-wav", "audio/mpeg"}:
	raise HTTPException(status_code=415, detail="Unsupported audio type")
	# Save to a temp file (Whisper expects a filename/bytes)
	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
	tmp.write(await file.read())
	tmp_path = tmp.name
	try:
	# ── 1. Transcribe
	# Load audio using pydub (which handles WebM/Opus/MP3/etc.)
	audio = AudioSegment.from_file(tmp_path)
	audio = audio.set_frame_rate(16000).set_channels(1) # Whisper expects mono 16kHz
	samples = np.array(audio.get_array_of_samples()).astype(np.float32) / (2**15) # normalize int16
	# Obtain speech and process to tensor
	speech = samples
	inputs = processor(speech, sampling_rate=16000, return_tensors="pt")
	input_features = inputs["input_features"].to("cpu")
	generated_ids = model.generate(input_features)
	question = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
	if not question:
	raise ValueError("Empty transcription")
	logger.info(f"[VOICE] Detected transcribe: {question}")
	# ── 2. LLM answer
	prompt = build_prompt(question)
	# Gemini Flash 2.5 – tuned for short latency
	client = genai.Client(api_key=GEMINI_API_KEY)
	response = client.models.generate_content(
	model="gemini-2.5-flash-preview-04-17",
	contents=prompt
	)
	answer = response.text.strip()
	logger.info(f"[LLM] Decision answer: {answer}")
	return JSONResponse(
	{
	"question": question,
	"answer": answer,
	"memory_mb": round(memory_usage_mb(), 1),
	}
	)
	finally:
	os.remove(tmp_path) # Rm audio when done