File size: 2,856 Bytes
b8a29bf
d8682b4
 
b8a29bf
 
444cc49
 
fe85304
b8a29bf
 
 
 
 
1ec0e70
b8a29bf
1ec0e70
b8a29bf
 
 
 
411d6c8
b8a29bf
 
 
d8682b4
b8a29bf
 
1ec0e70
38b5697
 
1ec0e70
61c94e1
 
 
 
 
 
 
 
fe85304
61c94e1
 
 
 
 
 
1ec0e70
 
82c3cf1
1ec0e70
 
 
 
 
 
 
 
fe85304
b8a29bf
 
 
3d3b3f0
b8a29bf
 
 
fe85304
b8a29bf
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import gradio as gr
import azure.cognitiveservices.speech as speechsdk

def assess_pronunciation(audio_file):
    # Configure Azure Speech Service
    speech_key = "12afe22c558a4f8d8bd28d6a67cdb9b0"
    service_region = "westus"
    speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)
    
    # Set up the audio configuration
    audio_config = speechsdk.audio.AudioConfig(filename=audio_file)
    
    # Create pronunciation assessment config
    reference_text = "你好"
    pronunciation_config = speechsdk.PronunciationAssessmentConfig(
        reference_text=reference_text,
        grading_system=speechsdk.PronunciationAssessmentGradingSystem.HundredMark,
        granularity=speechsdk.PronunciationAssessmentGranularity.Phoneme
    )
    pronunciation_config.enable_prosody_assessment()

    # Create the recognizer
    recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
    pronunciation_config.apply_to(recognizer)

    # Recognize speech and assess pronunciation
    result = recognizer.recognize_once()

    # Debug information
    print(f"Recognition result reason: {result.reason}")

    if result.reason == speechsdk.ResultReason.RecognizedSpeech:
        pronunciation_result = speechsdk.PronunciationAssessmentResult(result)
        
        # Extract and format the results
        accuracy_score = pronunciation_result.accuracy_score
        fluency_score = pronunciation_result.fluency_score
        completeness_score = pronunciation_result.completeness_score
        prosody_score = pronunciation_result.prosody_score

        return {
            "Accuracy": accuracy_score,
            "Fluency": fluency_score,
            "Completeness": completeness_score,
            "Prosody": prosody_score
        }
    elif result.reason == speechsdk.ResultReason.NoMatch:
        print("NOMATCH: Speech could not be recognized.")
        return {"Error": "There was an error. Speech could not be recognized."}
    elif result.reason == speechsdk.ResultReason.Canceled:
        cancellation_details = speechsdk.CancellationDetails(result)
        print(f"CANCELED: Reason={cancellation_details.reason}")
        if cancellation_details.reason == speechsdk.CancellationReason.Error:
            print(f"CANCELED: ErrorCode={cancellation_details.error_code}")
            print(f"CANCELED: ErrorDetails={cancellation_details.error_details}")
            print("CANCELED: Did you update the subscription info?")
        return {"Error": f"Speech recognition canceled: {cancellation_details.error_details}"}

# Create Gradio interface
interface = gr.Interface(
    fn=assess_pronunciation,
    inputs=gr.Audio(type="filepath"),  # Corrected input
    outputs="json",
    title="Chinese Pronunciation Checker"
)

if __name__ == "__main__":
    interface.launch()