Spaces:

jacktol
/

ATC-Transcription-Assistant

Paused

App Files Files Community

Jack commited on Oct 8, 2024

Commit

2c8e4b5

1 Parent(s): 81f3005

added various files

Browse files

Files changed (1) hide show

app.py +82 -49

app.py CHANGED Viewed

@@ -17,69 +17,102 @@ system_prompt = """Convert the provided transcript into standard pilot-ATC synta
 Ensure that all runway and heading numbers are formatted correctly (e.g., '11L' for 'one one left'). Use standard
 aviation phraseology wherever applicable. Maintain the segmentation of the transcript as provided, but exclude the timestamps.
 Based on the context and segmentation of each transmission, label it as either 'ATC' or 'Pilot'. At the very beginning of your
-response place a horizonal div with "---" and then line-break, and then add a H2 which says "Transcription", and then
 proceed with the transcription."""
 # Function to transcribe audio and return the concatenated transcript with segment info
 def transcribe_audio(file_path):
     segments, info = whisper_model.transcribe(file_path, beam_size=5)
     transcript = []
     # Combine all segments with timestamps
     for segment in segments:
         transcript.append(f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}")
     return '\n'.join(transcript).strip()
 # Start chat session
 @cl.on_chat_start
-def on_chat_start():
-    # Initialize the session data
-    cl.user_session.set("transcription_counter", 0)
 # Handle incoming chat
 @cl.on_message
 async def handle_message(message: cl.Message):
-    # Retrieve transcription counter for the user session
-    counter = cl.user_session.get("transcription_counter")
-    counter += 1
-    cl.user_session.set("transcription_counter", counter)
-    # Get the uploaded audio file
-    files = await cl.AskFileMessage(
-        content="",
-        accept={
-            "audio/wav": [".wav"],
-            "audio/mpeg": [".mp3"]
-        },
-        max_size_mb=50,
-        timeout=3600
-    ).send()
-    if files:
-        audio_file = files[0]
-        # Get the full segmented transcription with timestamps
-        transcription = transcribe_audio(audio_file.path)
-        # Send the entire transcription to the LLM for ATC syntax processing
-        msg = cl.Message(content="Processing your transcription...")
-        await msg.send()
-        # Process the transcription via the LLM
-        stream = await client.chat.completions.create(
-            messages=[
-                {"role": "system", "content": system_prompt},
-                {"role": "user", "content": transcription},
-            ],
-            stream=True,
-            model="gpt-4o",  # Use the appropriate model
-            temperature=0,
-        )
-        # Stream the ATC-processed output
-        async for part in stream:
-            token = part.choices[0].delta.content or ""
-            await msg.stream_token(token)
-        await msg.update(content="Here is the ATC transcription:")

 Ensure that all runway and heading numbers are formatted correctly (e.g., '11L' for 'one one left'). Use standard
 aviation phraseology wherever applicable. Maintain the segmentation of the transcript as provided, but exclude the timestamps.
 Based on the context and segmentation of each transmission, label it as either 'ATC' or 'Pilot'. At the very beginning of your
+response place a horizontal div with "---" and then line-break, and then add a H2 which says "Transcription", and then
 proceed with the transcription."""
 # Function to transcribe audio and return the concatenated transcript with segment info
 def transcribe_audio(file_path):
     segments, info = whisper_model.transcribe(file_path, beam_size=5)
     transcript = []
     # Combine all segments with timestamps
     for segment in segments:
         transcript.append(f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}")
     return '\n'.join(transcript).strip()
 # Start chat session
 @cl.on_chat_start
+async def on_chat_start():
+    try:
+        # Initialize the session data
+        if cl.user_session.get("transcription_counter") is None:
+            cl.user_session.set("transcription_counter", 0)
+        # Display welcome message
+        welcome_message = """
+## Welcome to the **ATC Transcription Assistant**
+---
+### What is this tool for?
+This tool transcribes **Air Traffic Control (ATC)** audio using OpenAI’s **Whisper medium.en** model, fine-tuned for ATC communications. Developed as part of a research project, the fine-tuned **Whisper medium.en** model offers significant improvements in transcription accuracy for ATC audio.
+---
+To get started, upload the audio below.
+"""
+        await cl.Message(content=welcome_message).send()
+    except Exception as e:
+        # Log any errors that occur during session initialization
+        print(f"Error during on_chat_start: {str(e)}")
+# Stop chat session cleanup
+@cl.on_stop
+async def on_chat_stop():
+    # Clean up any session data or resources here, if needed
+    print("Session ended, resources cleaned up.")
 # Handle incoming chat
 @cl.on_message
 async def handle_message(message: cl.Message):
+    try:
+        # Retrieve transcription counter for the user session
+        counter = cl.user_session.get("transcription_counter", 0)
+        counter += 1
+        cl.user_session.set("transcription_counter", counter)
+        # Get the uploaded audio file
+        files = await cl.AskFileMessage(
+            content="",
+            accept={
+                "audio/wav": [".wav"],
+                "audio/mpeg": [".mp3"]
+            },
+            max_size_mb=50,
+            timeout=3600
+        ).send()
+        if files:
+            audio_file = files[0]
+            # Get the full segmented transcription with timestamps
+            transcription = transcribe_audio(audio_file.path)
+            # Send the entire transcription to the LLM for ATC syntax processing
+            msg = cl.Message(content="Processing your transcription...")
+            await msg.send()
+            # Process the transcription via the LLM
+            stream = await client.chat.completions.create(
+                messages=[
+                    {"role": "system", "content": system_prompt},
+                    {"role": "user", "content": transcription},
+                ],
+                stream=True,
+                model="gpt-4o",  # Use the appropriate model
+                temperature=0,
+            )
+            # Stream the ATC-processed output
+            async for part in stream:
+                token = part.choices[0].delta.content or ""
+                await msg.stream_token(token)
+            await msg.update(content="Here is the ATC transcription:")
+    except Exception as e:
+        # Log any errors that occur during transcription
+        print(f"Error during handle_message: {str(e)}")