Jack commited on
Commit
2c8e4b5
·
1 Parent(s): 81f3005

added various files

Browse files
Files changed (1) hide show
  1. app.py +82 -49
app.py CHANGED
@@ -17,69 +17,102 @@ system_prompt = """Convert the provided transcript into standard pilot-ATC synta
17
  Ensure that all runway and heading numbers are formatted correctly (e.g., '11L' for 'one one left'). Use standard
18
  aviation phraseology wherever applicable. Maintain the segmentation of the transcript as provided, but exclude the timestamps.
19
  Based on the context and segmentation of each transmission, label it as either 'ATC' or 'Pilot'. At the very beginning of your
20
- response place a horizonal div with "---" and then line-break, and then add a H2 which says "Transcription", and then
21
  proceed with the transcription."""
22
 
23
  # Function to transcribe audio and return the concatenated transcript with segment info
24
  def transcribe_audio(file_path):
25
  segments, info = whisper_model.transcribe(file_path, beam_size=5)
26
  transcript = []
27
-
28
  # Combine all segments with timestamps
29
  for segment in segments:
30
  transcript.append(f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}")
31
-
32
  return '\n'.join(transcript).strip()
33
 
34
  # Start chat session
35
  @cl.on_chat_start
36
- def on_chat_start():
37
- # Initialize the session data
38
- cl.user_session.set("transcription_counter", 0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
  # Handle incoming chat
41
  @cl.on_message
42
  async def handle_message(message: cl.Message):
43
- # Retrieve transcription counter for the user session
44
- counter = cl.user_session.get("transcription_counter")
45
- counter += 1
46
- cl.user_session.set("transcription_counter", counter)
47
-
48
- # Get the uploaded audio file
49
- files = await cl.AskFileMessage(
50
- content="",
51
- accept={
52
- "audio/wav": [".wav"],
53
- "audio/mpeg": [".mp3"]
54
- },
55
- max_size_mb=50,
56
- timeout=3600
57
- ).send()
58
-
59
- if files:
60
- audio_file = files[0]
61
-
62
- # Get the full segmented transcription with timestamps
63
- transcription = transcribe_audio(audio_file.path)
64
-
65
- # Send the entire transcription to the LLM for ATC syntax processing
66
- msg = cl.Message(content="Processing your transcription...")
67
- await msg.send()
68
-
69
- # Process the transcription via the LLM
70
- stream = await client.chat.completions.create(
71
- messages=[
72
- {"role": "system", "content": system_prompt},
73
- {"role": "user", "content": transcription},
74
- ],
75
- stream=True,
76
- model="gpt-4o", # Use the appropriate model
77
- temperature=0,
78
- )
79
-
80
- # Stream the ATC-processed output
81
- async for part in stream:
82
- token = part.choices[0].delta.content or ""
83
- await msg.stream_token(token)
84
-
85
- await msg.update(content="Here is the ATC transcription:")
 
 
 
 
 
 
17
  Ensure that all runway and heading numbers are formatted correctly (e.g., '11L' for 'one one left'). Use standard
18
  aviation phraseology wherever applicable. Maintain the segmentation of the transcript as provided, but exclude the timestamps.
19
  Based on the context and segmentation of each transmission, label it as either 'ATC' or 'Pilot'. At the very beginning of your
20
+ response place a horizontal div with "---" and then line-break, and then add a H2 which says "Transcription", and then
21
  proceed with the transcription."""
22
 
23
  # Function to transcribe audio and return the concatenated transcript with segment info
24
  def transcribe_audio(file_path):
25
  segments, info = whisper_model.transcribe(file_path, beam_size=5)
26
  transcript = []
27
+
28
  # Combine all segments with timestamps
29
  for segment in segments:
30
  transcript.append(f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}")
31
+
32
  return '\n'.join(transcript).strip()
33
 
34
  # Start chat session
35
  @cl.on_chat_start
36
+ async def on_chat_start():
37
+ try:
38
+ # Initialize the session data
39
+ if cl.user_session.get("transcription_counter") is None:
40
+ cl.user_session.set("transcription_counter", 0)
41
+
42
+ # Display welcome message
43
+ welcome_message = """
44
+ ## Welcome to the **ATC Transcription Assistant**
45
+
46
+ ---
47
+
48
+ ### What is this tool for?
49
+
50
+ This tool transcribes **Air Traffic Control (ATC)** audio using OpenAI’s **Whisper medium.en** model, fine-tuned for ATC communications. Developed as part of a research project, the fine-tuned **Whisper medium.en** model offers significant improvements in transcription accuracy for ATC audio.
51
+
52
+ ---
53
+
54
+ To get started, upload the audio below.
55
+ """
56
+ await cl.Message(content=welcome_message).send()
57
+
58
+ except Exception as e:
59
+ # Log any errors that occur during session initialization
60
+ print(f"Error during on_chat_start: {str(e)}")
61
+
62
+ # Stop chat session cleanup
63
+ @cl.on_stop
64
+ async def on_chat_stop():
65
+ # Clean up any session data or resources here, if needed
66
+ print("Session ended, resources cleaned up.")
67
 
68
  # Handle incoming chat
69
  @cl.on_message
70
  async def handle_message(message: cl.Message):
71
+ try:
72
+ # Retrieve transcription counter for the user session
73
+ counter = cl.user_session.get("transcription_counter", 0)
74
+ counter += 1
75
+ cl.user_session.set("transcription_counter", counter)
76
+
77
+ # Get the uploaded audio file
78
+ files = await cl.AskFileMessage(
79
+ content="",
80
+ accept={
81
+ "audio/wav": [".wav"],
82
+ "audio/mpeg": [".mp3"]
83
+ },
84
+ max_size_mb=50,
85
+ timeout=3600
86
+ ).send()
87
+
88
+ if files:
89
+ audio_file = files[0]
90
+
91
+ # Get the full segmented transcription with timestamps
92
+ transcription = transcribe_audio(audio_file.path)
93
+
94
+ # Send the entire transcription to the LLM for ATC syntax processing
95
+ msg = cl.Message(content="Processing your transcription...")
96
+ await msg.send()
97
+
98
+ # Process the transcription via the LLM
99
+ stream = await client.chat.completions.create(
100
+ messages=[
101
+ {"role": "system", "content": system_prompt},
102
+ {"role": "user", "content": transcription},
103
+ ],
104
+ stream=True,
105
+ model="gpt-4o", # Use the appropriate model
106
+ temperature=0,
107
+ )
108
+
109
+ # Stream the ATC-processed output
110
+ async for part in stream:
111
+ token = part.choices[0].delta.content or ""
112
+ await msg.stream_token(token)
113
+
114
+ await msg.update(content="Here is the ATC transcription:")
115
+
116
+ except Exception as e:
117
+ # Log any errors that occur during transcription
118
+ print(f"Error during handle_message: {str(e)}")