File size: 2,982 Bytes
5f0a430
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
08654c0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import logging
import sys
from fastapi import FastAPI, UploadFile, File, HTTPException
from pydantic import BaseModel
import config
from models import load_whisper, load_summarizer, load_spacy
from services import process_transcription, process_summary


logger = logging.getLogger(__name__)

app = FastAPI(
    title="Transcription and Summarization API",
    description="API using Faster-Whisper, spaCy, and Hugging Face Transformers",
    version="1.0.0"
)

logger.info("Application starting up - loading models...")
whisper_model = load_whisper(config)
summarizer_pipeline = load_summarizer(config)
nlp_spacy = load_spacy(config)
logger.info("Model loading complete.")

if not whisper_model:
    logger.critical("Whisper model failed to load. Transcription endpoint will be unavailable.")
if not summarizer_pipeline:
    logger.critical("Summarizer pipeline failed to load. Summarization endpoint will be unavailable.")
if not nlp_spacy:
    logger.warning("SpaCy model failed to load. Summarization will proceed without spaCy preprocessing.")


class TranscriptInput(BaseModel):
    transcript: str


@app.get("/health")
def health():
    return {"status": "ok",
        "whisper_loaded": whisper_model is not None,
        "summarizer_loaded": summarizer_pipeline is not None,
        "spacy_loaded": nlp_spacy is not None
        }


@app.post("/transcribe")
async def transcription(audio_file : UploadFile = File(...)):
    if whisper_model is None:
         raise HTTPException(status_code=503, detail="Transcription service unavailable.")

    try:
        content = await audio_file.read()
        transcript, info = process_transcription(content, whisper_model)
        logger.info(f"Transcription successful. Language: {info.language}")
        return {"transcript": transcript}
    except ValueError as ve:
         logger.error(f"Value error during transcription processing: {ve}")
         raise HTTPException(status_code=400, detail=str(ve))
    except Exception as e:
        logger.error(f"Unhandled error during transcription: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail="Internal server error during transcription.")


@app.post("/summarize")
def summarize(input: TranscriptInput):

    if summarizer_pipeline is None:
         raise HTTPException(status_code=503, detail="Summarization service unavailable.")
    if not input.transcript:
         raise HTTPException(status_code=400, detail="Transcript cannot be empty.")

    try:
        summary = process_summary(input.transcript, summarizer_pipeline, nlp_spacy, config)
        return {"summary": summary}

    except ValueError as ve:
         logger.error(f"Value error during summary processing: {ve}")
         raise HTTPException(status_code=400, detail=str(ve))

    except Exception as e:
        logger.error(f"Unhandled error during summarization: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail="Internal server error during summarization.")