Spaces:

ChandimaPrabath
/

stt

Sleeping

stt

File size: 3,329 Bytes

# app.py
import os
import uuid
import shutil
import tempfile

from fastapi import FastAPI, UploadFile, File, HTTPException, Query
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse

from stt import SpeechToText

# -----------------------------------------------------------------------------
# CONFIGURATION
# -----------------------------------------------------------------------------
MODEL_NAME      = os.getenv("WHISPER_MODEL", "base")
DEFAULT_DUR     = float(os.getenv("RECORD_DURATION", "5.0"))
TEMP_DIR        = os.getenv("TEMP_DIR", tempfile.gettempdir())
ALLOWED_TYPES   = {"audio/wav", "audio/x-wav", "audio/mpeg", "audio/mp3"}
# -----------------------------------------------------------------------------

app = FastAPI(
    title="STT Service",
    description="Speech-to-Text API using pywhispercpp's Whisper",
    version="1.0",
)

# Allow any origin (adjust for production)
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["POST", "GET", "OPTIONS"],
    allow_headers=["*"],
)

# Load the STT engine once at startup
stt_engine = SpeechToText(
    model_name=MODEL_NAME,
    sample_rate=16_000,
    record_duration=DEFAULT_DUR,
    temp_dir=TEMP_DIR,
    verbose=False,       # mute console logs in API
)


@app.get("/health", summary="Health check")
def health():
    return {"status": "ok", "model": MODEL_NAME}


@app.post("/transcribe", summary="Transcribe uploaded audio file")
async def transcribe_audio(
    file: UploadFile = File(..., description="An audio file (WAV, MP3, etc.)"),
):
    if file.content_type not in ALLOWED_TYPES:
        raise HTTPException(415, detail=f"Unsupported Media Type: {file.content_type}")

    # 1) save upload to temp WAV path
    ext = os.path.splitext(file.filename)[1] or ".wav"
    tmp_name = f"{uuid.uuid4()}{ext}"
    tmp_path = os.path.join(TEMP_DIR, tmp_name)

    try:
        with open(tmp_path, "wb") as out_f:
            shutil.copyfileobj(file.file, out_f)
        # 2) run transcription
        text = stt_engine.transcribe_file(tmp_path)
        return {"text": text}
    except HTTPException:
        raise
    except Exception as e:
        raise HTTPException(500, detail=str(e))
    finally:
        # clean up
        if os.path.exists(tmp_path):
            os.remove(tmp_path)


@app.post("/record", summary="Record from mic + transcribe")
def record_and_transcribe(
    duration: float = Query(
        DEFAULT_DUR, gt=0, le=30,
        description="Seconds to record from server mic"
    )
):
    """
    Records from the server's default microphone for `duration` seconds,
    then transcribes that chunk of audio.
    """
    try:
        # temporarily override record_duration
        original = stt_engine.record_duration
        stt_engine.record_duration = duration

        # record & transcribe
        text = stt_engine.transcribe(save_temp=False)
        return {"text": text}
    except Exception as e:
        raise HTTPException(500, detail=str(e))
    finally:
        stt_engine.record_duration = original


# If you run with `python app.py`, this block ensures Uvicorn starts
if __name__ == "__main__":
    import uvicorn

    uvicorn.run(
        "app:app",
        host="0.0.0.0",
        port=int(os.getenv("PORT", 7860)),
        reload=True
    )