import os from fastapi import FastAPI, UploadFile, File from google import genai from google.genai import types import uvicorn app = FastAPI() # Retrieve the GenAI API key from the environment variable. api_key = os.getenv("GENAI_API_KEY") if not api_key: raise EnvironmentError("GENAI_API_KEY environment variable not set") # Initialize the GenAI client. client = genai.Client(api_key=api_key) @app.get("/") async def root(): return { "message": "Welcome to the Audio Similarity API!", "usage": { "endpoint": "/compare-audio", "description": "POST two audio files (user recitation and professional qarri) for similarity analysis.", "instructions": "Send audio files as form-data with keys 'audio1' and 'audio2'." } } @app.post("/compare-audio") async def compare_audio( audio1: UploadFile = File(...), audio2: UploadFile = File(...) ): # Read the uploaded audio files. audio1_bytes = await audio1.read() audio2_bytes = await audio2.read() # Create a refined prompt that clearly identifies the audio sources. prompt = ( """Please analyze and compare the two provided audio clips. The first audio is the user's recitation, and the second audio is the professional qarri recitation. Evaluate their similarity on a scale from 0 to 1, where: - 1 indicates the user's recitation contains no mistakes compared to the professional version, - 0 indicates there are significant mistakes. Provide your response with: 1. A numerical similarity score on the first line. 2. A single sentence that indicates whether the user's recitation is similar, moderately similar, or dissimilar to the professional qarri.""" ) # Generate the content using the Gemini model with the two audio inputs. response = client.models.generate_content( model='gemini-2.0-flash', contents=[ prompt, types.Part.from_bytes( data=audio1_bytes, mime_type=audio1.content_type, ), types.Part.from_bytes( data=audio2_bytes, mime_type=audio2.content_type, ) ] ) # Return the model's response. return {"result": response.text} if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=8000)