Spaces:

Zulelee
/

Atlas

Sleeping

App Files Files Community

Atlas / app.py

Zulelee

Create app.py

1851f66 verified 3 months ago

raw

history blame contribute delete

2.72 kB

	from fastapi import FastAPI, File, UploadFile, HTTPException
	import torch
	from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
	import requests
	import json
	import tempfile
	import os

	app = FastAPI()

	# Set up Whisper model
	device = "cuda:0" if torch.cuda.is_available() else "cpu"
	torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

	model_id = "openai/whisper-large-v3-turbo"

	model = AutoModelForSpeechSeq2Seq.from_pretrained(
	model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
	)
	model.to(device)

	processor = AutoProcessor.from_pretrained(model_id)

	pipe = pipeline(
	"automatic-speech-recognition",
	model=model,
	tokenizer=processor.tokenizer,
	feature_extractor=processor.feature_extractor,
	torch_dtype=torch_dtype,
	device=device,
	)

	OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "")
	OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"

	@app.post("/transcribe-analyze/")
	async def transcribe_analyze(file: UploadFile = File(...)):
	try:
	# Save the uploaded file temporarily
	with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
	temp_audio.write(await file.read())
	temp_audio_path = temp_audio.name

	# Transcribe audio
	transcription_result = pipe(temp_audio_path, return_timestamps=True)
	transcription = transcription_result["text"]

	# Send transcription to AI for classification
	response = requests.post(
	url=OPENROUTER_URL,
	headers={
	"Authorization": f"Bearer {OPENROUTER_API_KEY}",
	"Content-Type": "application/json"
	},
	data=json.dumps({
	"model": "meta-llama/llama-3.1-70b-instruct:free",
	"messages": [
	{
	"role": "user",
	"content": f"You are an AI Assistant that is given the transcript between a call agent and a lead, and you must classify if the lead happily agreed to the booking. The response should have 4 parts: 1. Appointment Booked: Yes/No, 2. Short reason for your answer, 3. Short summary of the call, 4. Lead's overall emotion. \n Here is the transcription: {transcription}",
	}
	]
	})
	)

	ai_response = response.json().get("choices", [{}])[0].get("message", {}).get("content", "No response from AI.")

	# Remove temporary file
	os.remove(temp_audio_path)

	return {"transcription": transcription, "ai_response": ai_response}

	except Exception as e:
	return HTTPException(status_code=500, detail=str(e))