# Access site: https://binkhoale1812-interview-ai.hf.space/
import os, tempfile
from pathlib import Path
from typing import Dict

# Server
from fastapi import FastAPI, File, UploadFile, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, FileResponse
from fastapi.staticfiles import StaticFiles

# AI + LLM
import torch # For transformer
from google import genai
from google.genai import types

# Audio Transcribe
from pydub import AudioSegment
from transformers import WhisperProcessor, WhisperForConditionalGeneration
import numpy as np

############################################
# ── Configuration ────────────────────────
############################################

GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
if not GEMINI_API_KEY:
    raise RuntimeError("GEMINI_API_KEY environment variable must be set!")

# Tiny Whisper model is light enough for CPU Spaces; change if GPU is available
ASR_MODEL_ID = "openai/whisper-small.en"
ASR_LANGUAGE = "en"  # Force to English for interview setting
SAMPLE_RATE  = 16000

############################################
# ── FastAPI App ───────────────────────────
############################################

app = FastAPI(title="Interview Q&A Assistant", docs_url="/docs")
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["*"],
    allow_headers=["*"],
)

# Serve frontend assets
app.mount("/statics", StaticFiles(directory="statics"), name="statics")

############################################
# ── Global objects (lazy‑loaded) ──────────
############################################

# Globals
processor = None
model = None

# Enable Logging for Debugging
import psutil
import logging
# Set up app-specific logger
logger = logging.getLogger("triage-response")
logger.setLevel(logging.INFO)  # Set to DEBUG only when needed
# Set log format
formatter = logging.Formatter("[%(levelname)s] %(asctime)s - %(message)s")
handler = logging.StreamHandler()
handler.setFormatter(formatter)
logger.addHandler(handler)
# Suppress noisy libraries like pymongo, urllib3, etc.
for noisy in ["pymongo", "urllib3", "httpx", "uvicorn", "uvicorn.error", "uvicorn.access"]:
    logging.getLogger(noisy).setLevel(logging.WARNING)
# Monitor Resources Before Startup
def check_system_resources():
    memory = psutil.virtual_memory()
    cpu = psutil.cpu_percent(interval=1)
    disk = psutil.disk_usage("/")
    # Defines log info messages
    logger.info(f"🔍 System Resources - RAM: {memory.percent}%, CPU: {cpu}%, Disk: {disk.percent}%")
    if memory.percent > 85:
        logger.warning("⚠️ High RAM usage detected!")
    if cpu > 90:
        logger.warning("⚠️ High CPU usage detected!")
    if disk.percent > 90:
        logger.warning("⚠️ High Disk usage detected!")
check_system_resources()

# Startup
@app.on_event("startup")
async def load_models():
    global processor, model
    cache = Path("model_cache"); cache.mkdir(exist_ok=True)
    # in startup (Transformer Whisper processing)
    processor = WhisperProcessor.from_pretrained(ASR_MODEL_ID, cache_dir=cache)
    model      = WhisperForConditionalGeneration.from_pretrained(ASR_MODEL_ID, cache_dir=cache)
    # Force English transcription – never translate
    forced = processor.get_decoder_prompt_ids(language="english", task="transcribe")
    model.config.forced_decoder_ids = forced
    model.to("cpu")
    model.eval()
    logger.info("[STARTUP] Whisper loaded ✔")


############################################
# ── Helpers ───────────────────────────────
############################################

def build_prompt(question: str) -> str:
    """Craft a prompt that elicits concise, structured answers."""
    return (
        "You are a helpful career‑coach AI. Answer the following interview "
        "question clearly and concisely, offering practical insights when appropriate.\n"
        "Use markdown for **bold**, *italic*, and bullet‑lists when helpful. \n"
        "Ensure your answer is less than 200 words.\n\n"
        f"Interview question: \"{question}\""
    )

def memory_usage_mb() -> float:
    return psutil.Process().memory_info().rss / 1_048_576  # bytes→MiB

############################################
# ── Routes ────────────────────────────────
############################################

@app.get("/")
async def root() -> FileResponse:
    """Serve the single‑page app."""
    logger.info("[STATIC] Serving frontend")
    return FileResponse(Path("statics/index.html"))


@app.post("/voice-transcribe")
async def voice_transcribe(file: UploadFile = File(...)):  # noqa: B008
    """Receive audio, transcribe, push to Gemini, return answer."""
    if file.content_type not in {"audio/wav", "audio/x-wav", "audio/mpeg"}:
        raise HTTPException(status_code=415, detail="Unsupported audio type")
    # Save to a temp file (Whisper expects a filename/bytes)
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
        tmp.write(await file.read())
        tmp_path = tmp.name
    try:
        # ── 1. Transcribe
        seg  = AudioSegment.from_file(tmp_path).set_frame_rate(SAMPLE_RATE).set_channels(1)
        audio = np.array(seg.get_array_of_samples()).astype(np.float32) / (2**15)
        inputs = processor(audio, sampling_rate=SAMPLE_RATE, return_tensors="pt")
        ids    = model.generate(inputs.input_features.to(model.device))
        question = processor.decode(ids[0], skip_special_tokens=True).strip()
        if not question: raise ValueError("Could not detect speech")
        logger.info(f"[VOICE] Detected transcribe: {question}")
        # ── 2. LLM answer
        prompt = build_prompt(question)
        # Gemini Flash 2.5 – tuned for short latency
        client = genai.Client(api_key=GEMINI_API_KEY)
        response = client.models.generate_content(
                model="gemini-2.5-flash-preview-04-17",
                contents=prompt
            )
        answer = response.text.strip()
        logger.info(f"[LLM] Decision answer: {answer}")
        return JSONResponse(
            {
                "question": question,
                "answer": answer,
                "memory_mb": round(memory_usage_mb(), 1),
            }
        )
    finally:
        os.remove(tmp_path) # Rm audio when done