Jack
commited on
Commit
·
2c8e4b5
1
Parent(s):
81f3005
added various files
Browse files
app.py
CHANGED
@@ -17,69 +17,102 @@ system_prompt = """Convert the provided transcript into standard pilot-ATC synta
|
|
17 |
Ensure that all runway and heading numbers are formatted correctly (e.g., '11L' for 'one one left'). Use standard
|
18 |
aviation phraseology wherever applicable. Maintain the segmentation of the transcript as provided, but exclude the timestamps.
|
19 |
Based on the context and segmentation of each transmission, label it as either 'ATC' or 'Pilot'. At the very beginning of your
|
20 |
-
response place a
|
21 |
proceed with the transcription."""
|
22 |
|
23 |
# Function to transcribe audio and return the concatenated transcript with segment info
|
24 |
def transcribe_audio(file_path):
|
25 |
segments, info = whisper_model.transcribe(file_path, beam_size=5)
|
26 |
transcript = []
|
27 |
-
|
28 |
# Combine all segments with timestamps
|
29 |
for segment in segments:
|
30 |
transcript.append(f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}")
|
31 |
-
|
32 |
return '\n'.join(transcript).strip()
|
33 |
|
34 |
# Start chat session
|
35 |
@cl.on_chat_start
|
36 |
-
def on_chat_start():
|
37 |
-
|
38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
# Handle incoming chat
|
41 |
@cl.on_message
|
42 |
async def handle_message(message: cl.Message):
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
17 |
Ensure that all runway and heading numbers are formatted correctly (e.g., '11L' for 'one one left'). Use standard
|
18 |
aviation phraseology wherever applicable. Maintain the segmentation of the transcript as provided, but exclude the timestamps.
|
19 |
Based on the context and segmentation of each transmission, label it as either 'ATC' or 'Pilot'. At the very beginning of your
|
20 |
+
response place a horizontal div with "---" and then line-break, and then add a H2 which says "Transcription", and then
|
21 |
proceed with the transcription."""
|
22 |
|
23 |
# Function to transcribe audio and return the concatenated transcript with segment info
|
24 |
def transcribe_audio(file_path):
|
25 |
segments, info = whisper_model.transcribe(file_path, beam_size=5)
|
26 |
transcript = []
|
27 |
+
|
28 |
# Combine all segments with timestamps
|
29 |
for segment in segments:
|
30 |
transcript.append(f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}")
|
31 |
+
|
32 |
return '\n'.join(transcript).strip()
|
33 |
|
34 |
# Start chat session
|
35 |
@cl.on_chat_start
|
36 |
+
async def on_chat_start():
|
37 |
+
try:
|
38 |
+
# Initialize the session data
|
39 |
+
if cl.user_session.get("transcription_counter") is None:
|
40 |
+
cl.user_session.set("transcription_counter", 0)
|
41 |
+
|
42 |
+
# Display welcome message
|
43 |
+
welcome_message = """
|
44 |
+
## Welcome to the **ATC Transcription Assistant**
|
45 |
+
|
46 |
+
---
|
47 |
+
|
48 |
+
### What is this tool for?
|
49 |
+
|
50 |
+
This tool transcribes **Air Traffic Control (ATC)** audio using OpenAI’s **Whisper medium.en** model, fine-tuned for ATC communications. Developed as part of a research project, the fine-tuned **Whisper medium.en** model offers significant improvements in transcription accuracy for ATC audio.
|
51 |
+
|
52 |
+
---
|
53 |
+
|
54 |
+
To get started, upload the audio below.
|
55 |
+
"""
|
56 |
+
await cl.Message(content=welcome_message).send()
|
57 |
+
|
58 |
+
except Exception as e:
|
59 |
+
# Log any errors that occur during session initialization
|
60 |
+
print(f"Error during on_chat_start: {str(e)}")
|
61 |
+
|
62 |
+
# Stop chat session cleanup
|
63 |
+
@cl.on_stop
|
64 |
+
async def on_chat_stop():
|
65 |
+
# Clean up any session data or resources here, if needed
|
66 |
+
print("Session ended, resources cleaned up.")
|
67 |
|
68 |
# Handle incoming chat
|
69 |
@cl.on_message
|
70 |
async def handle_message(message: cl.Message):
|
71 |
+
try:
|
72 |
+
# Retrieve transcription counter for the user session
|
73 |
+
counter = cl.user_session.get("transcription_counter", 0)
|
74 |
+
counter += 1
|
75 |
+
cl.user_session.set("transcription_counter", counter)
|
76 |
+
|
77 |
+
# Get the uploaded audio file
|
78 |
+
files = await cl.AskFileMessage(
|
79 |
+
content="",
|
80 |
+
accept={
|
81 |
+
"audio/wav": [".wav"],
|
82 |
+
"audio/mpeg": [".mp3"]
|
83 |
+
},
|
84 |
+
max_size_mb=50,
|
85 |
+
timeout=3600
|
86 |
+
).send()
|
87 |
+
|
88 |
+
if files:
|
89 |
+
audio_file = files[0]
|
90 |
+
|
91 |
+
# Get the full segmented transcription with timestamps
|
92 |
+
transcription = transcribe_audio(audio_file.path)
|
93 |
+
|
94 |
+
# Send the entire transcription to the LLM for ATC syntax processing
|
95 |
+
msg = cl.Message(content="Processing your transcription...")
|
96 |
+
await msg.send()
|
97 |
+
|
98 |
+
# Process the transcription via the LLM
|
99 |
+
stream = await client.chat.completions.create(
|
100 |
+
messages=[
|
101 |
+
{"role": "system", "content": system_prompt},
|
102 |
+
{"role": "user", "content": transcription},
|
103 |
+
],
|
104 |
+
stream=True,
|
105 |
+
model="gpt-4o", # Use the appropriate model
|
106 |
+
temperature=0,
|
107 |
+
)
|
108 |
+
|
109 |
+
# Stream the ATC-processed output
|
110 |
+
async for part in stream:
|
111 |
+
token = part.choices[0].delta.content or ""
|
112 |
+
await msg.stream_token(token)
|
113 |
+
|
114 |
+
await msg.update(content="Here is the ATC transcription:")
|
115 |
+
|
116 |
+
except Exception as e:
|
117 |
+
# Log any errors that occur during transcription
|
118 |
+
print(f"Error during handle_message: {str(e)}")
|