Atlas / app.py
Zulelee's picture
Create app.py
1851f66 verified
raw
history blame contribute delete
2.72 kB
from fastapi import FastAPI, File, UploadFile, HTTPException
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
import requests
import json
import tempfile
import os
app = FastAPI()
# Set up Whisper model
device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
model_id = "openai/whisper-large-v3-turbo"
model = AutoModelForSpeechSeq2Seq.from_pretrained(
model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
)
model.to(device)
processor = AutoProcessor.from_pretrained(model_id)
pipe = pipeline(
"automatic-speech-recognition",
model=model,
tokenizer=processor.tokenizer,
feature_extractor=processor.feature_extractor,
torch_dtype=torch_dtype,
device=device,
)
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "")
OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
@app.post("/transcribe-analyze/")
async def transcribe_analyze(file: UploadFile = File(...)):
try:
# Save the uploaded file temporarily
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
temp_audio.write(await file.read())
temp_audio_path = temp_audio.name
# Transcribe audio
transcription_result = pipe(temp_audio_path, return_timestamps=True)
transcription = transcription_result["text"]
# Send transcription to AI for classification
response = requests.post(
url=OPENROUTER_URL,
headers={
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
"Content-Type": "application/json"
},
data=json.dumps({
"model": "meta-llama/llama-3.1-70b-instruct:free",
"messages": [
{
"role": "user",
"content": f"You are an AI Assistant that is given the transcript between a call agent and a lead, and you must classify if the lead happily agreed to the booking. The response should have 4 parts: 1. Appointment Booked: Yes/No, 2. Short reason for your answer, 3. Short summary of the call, 4. Lead's overall emotion. \n Here is the transcription: {transcription}",
}
]
})
)
ai_response = response.json().get("choices", [{}])[0].get("message", {}).get("content", "No response from AI.")
# Remove temporary file
os.remove(temp_audio_path)
return {"transcription": transcription, "ai_response": ai_response}
except Exception as e:
return HTTPException(status_code=500, detail=str(e))