import logging import sys from fastapi import FastAPI, UploadFile, File, HTTPException from pydantic import BaseModel import config from models import load_whisper, load_summarizer, load_spacy from services import process_transcription, process_summary logger = logging.getLogger(__name__) app = FastAPI( title="Transcription and Summarization API", description="API using Faster-Whisper, spaCy, and Hugging Face Transformers", version="1.0.0" ) logger.info("Application starting up - loading models...") whisper_model = load_whisper(config) summarizer_pipeline = load_summarizer(config) nlp_spacy = load_spacy(config) logger.info("Model loading complete.") if not whisper_model: logger.critical("Whisper model failed to load. Transcription endpoint will be unavailable.") if not summarizer_pipeline: logger.critical("Summarizer pipeline failed to load. Summarization endpoint will be unavailable.") if not nlp_spacy: logger.warning("SpaCy model failed to load. Summarization will proceed without spaCy preprocessing.") class TranscriptInput(BaseModel): transcript: str @app.get("/health") def health(): return {"status": "ok", "whisper_loaded": whisper_model is not None, "summarizer_loaded": summarizer_pipeline is not None, "spacy_loaded": nlp_spacy is not None } @app.post("/transcribe") async def transcription(audio_file : UploadFile = File(...)): if whisper_model is None: raise HTTPException(status_code=503, detail="Transcription service unavailable.") try: content = await audio_file.read() transcript, info = process_transcription(content, whisper_model) logger.info(f"Transcription successful. Language: {info.language}") return {"transcript": transcript} except ValueError as ve: logger.error(f"Value error during transcription processing: {ve}") raise HTTPException(status_code=400, detail=str(ve)) except Exception as e: logger.error(f"Unhandled error during transcription: {e}", exc_info=True) raise HTTPException(status_code=500, detail="Internal server error during transcription.") @app.post("/summarize") def summarize(input: TranscriptInput): if summarizer_pipeline is None: raise HTTPException(status_code=503, detail="Summarization service unavailable.") if not input.transcript: raise HTTPException(status_code=400, detail="Transcript cannot be empty.") try: summary = process_summary(input.transcript, summarizer_pipeline, nlp_spacy, config) return {"summary": summary} except ValueError as ve: logger.error(f"Value error during summary processing: {ve}") raise HTTPException(status_code=400, detail=str(ve)) except Exception as e: logger.error(f"Unhandled error during summarization: {e}", exc_info=True) raise HTTPException(status_code=500, detail="Internal server error during summarization.")