Spaces:

Hammad712
/

recitation-compare

Runtime error

App Files Files Community

Hammad712 commited on 1 day ago

Commit

a9589dc

verified ·

1 Parent(s): 899a99b

Update main.py

Browse files

Files changed (1) hide show

main.py +92 -51

main.py CHANGED Viewed

@@ -142,62 +142,103 @@ async def startup_event():
 @app.get("/")
 async def root():
     return {
-        "message": "Welcome to the Audio Similarity API!",
-        "usage": {
-            "endpoints": {
-                "gemini": {
-                    "path": "/compare-audio",
-                    "description": "POST two audio files (user recitation and professional qarri) for similarity analysis using Gemini."
-                },
-                "dtw": {
-                    "path": "/compare-dtw",
-                    "description": "POST two audio files (user recitation and professional qarri) for similarity analysis using deep embeddings and DTW."
-                }
-            }
-        }
-    }
-@app.post("/compare-audio")
 async def compare_audio(
-    audio1: UploadFile = File(...),
-    audio2: UploadFile = File(...)
 ):
     """
-    Compare two audio files using the Gemini approach.
-    The first audio is the user's recitation and the second is the professional qarri recitation.
     """
-    # Read the uploaded audio files.
-    audio1_bytes = await audio1.read()
-    audio2_bytes = await audio2.read()
-    # Create a refined prompt that clearly identifies the audio sources.
-    prompt = (
-        """Please analyze and compare the two provided audio clips.
-The first audio is the user's recitation, and the second audio is the professional qarri recitation.
-Evaluate their similarity on a scale from 0 to 1, where:
-  - 1 indicates the user's recitation contains no mistakes compared to the professional version,
-  - 0 indicates there are significant mistakes.
-Provide your response with:
-  1. A numerical similarity score on the first line.
-  2. A single sentence that indicates whether the user's recitation is similar, moderately similar, or dissimilar to the professional qarri."""
-    )
-    # Generate the content using the Gemini model with the two audio inputs.
-    response = client.models.generate_content(
-        model='gemini-2.0-flash',
-        contents=[
-            prompt,
-            types.Part.from_bytes(
-                data=audio1_bytes,
-                mime_type=audio1.content_type,
-            ),
-            types.Part.from_bytes(
-                data=audio2_bytes,
-                mime_type=audio2.content_type,
-            )
-        ]
-    )
-    return {"result": response.text}
 @app.post("/compare-dtw")
 async def compare_dtw(

 @app.get("/")
 async def root():
     return {
+        "message": "Welcome to the Audio Similarity API!"
+# Load GROQ API key from environment variable
+API_KEY = os.getenv("GROQ_API_KEY")
+if not API_KEY:
+    raise RuntimeError("GROQ_API_KEY environment variable not set")
+client = Groq(api_key=API_KEY)
+def transcribe_audio(file_tuple: tuple) -> str:
+    """
+    Transcribes speech from an audio file using the GROQ Whisper model.
+    Args:
+        file_tuple (tuple): (filename, file_bytes)
+    Returns:
+        str: The transcription text or error message.
+    """
+    try:
+        transcription = client.audio.transcriptions.create(
+            file=file_tuple,
+            model="whisper-large-v3",
+            response_format="text"
+        )
+        return transcription
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Transcription error: {e}")
+def levenshtein_similarity(text1: str, text2: str) -> float:
+    """
+    Calculate normalized Levenshtein similarity between two texts.
+    Returns a score between 0 and 1.
+    """
+    distance = Levenshtein.distance(text1, text2)
+    max_len = max(len(text1), len(text2))
+    return 1 - distance / max_len if max_len > 0 else 1.0
+def find_differences(text_original: str, text_user: str) -> str:
+    """
+    Identify differences between original and user transcriptions using GROQ chat.
+    """
+    messages = [
+        {"role": "system", "content":
+            "You are a helpful assistant that finds mistakes between two texts. "
+            "Provide only the mistakes, no extra explanation."},
+        {"role": "user", "content": (
+            f"Original transcription: '{text_original}'\n"
+            f"User transcription: '{text_user}'\n"
+            "Explain the differences between these texts."
+        )}
+    ]
+    try:
+        completion = client.chat.completions.create(
+            model="mistral-saba-24b",
+            messages=messages,
+            temperature=1,
+            max_tokens=1024,
+            top_p=1,
+            stream=False
+        )
+        return completion.choices[0].message.content
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error generating explanation: {e}")
+@app.post("/compare")
 async def compare_audio(
+    original_audio: UploadFile = File(...),
+    user_audio: UploadFile = File(...)
 ):
     """
+    Endpoint to upload two audio files, transcribe, compare, and return similarity and differences.
     """
+    # Read uploaded files
+    original_bytes = await original_audio.read()
+    user_bytes = await user_audio.read()
+    # Transcribe
+    transcription_original = transcribe_audio((original_audio.filename, original_bytes))
+    transcription_user = transcribe_audio((user_audio.filename, user_bytes))
+    # Compute similarity
+    similarity_score = levenshtein_similarity(transcription_original, transcription_user)
+    # Find differences
+    explanation = find_differences(transcription_original, transcription_user)
+    # Build response
+    result = {
+        "original_transcription": transcription_original,
+        "user_transcription": transcription_user,
+        "levenshtein_similarity": round(similarity_score, 2),
+        "explanation_of_differences": explanation
+    }
+    return JSONResponse(content=result)
 @app.post("/compare-dtw")
 async def compare_dtw(