Nayera-2025 commited on
Commit
fd9ec6e
·
verified ·
1 Parent(s): 8d36475

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +238 -411
app.py CHANGED
@@ -1,443 +1,270 @@
1
- severity_match = re.search(r'(\d+)', severity)
2
- if severity_match:
3
- severity = severity_match.group(1)
4
- else:
5
- severity = "unknown"
 
 
 
 
 
 
6
 
7
- system_prompt = """
8
- You are an experienced ER physician. Provide concise medical assessment with:
9
- 1. Priority level (Emergent/Urgent/Non-urgent)
10
- 2. Three most likely diagnoses
11
- 3. Immediate actions required
12
- 4. Patient instructions in simple language
13
- 5. Recommended follow-up timeframe
14
-
15
- Be concise but thorough. Format with clear headers.
16
- """
17
-
18
- if lang == "ar":
19
- system_prompt += " Provide your response in both Arabic and English, with Arabic first."
 
 
 
 
 
 
 
 
 
 
20
 
21
- prompt = f"""
22
- Patient: {patient['name']} ({patient['age']}yo, {patient['gender']})
23
- Patient ID: {patient['id']}
24
- Admitted: {patient['admitted']}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
- Vitals:
27
- - Temperature: {vitals['temp']}C
28
- - Heart Rate: {vitals['hr']}bpm
29
- - Blood Pressure: {vitals['bp']}
30
- - SpO2: {vitals['o2']}%
31
 
32
- Symptoms: {symptom} (Duration: {duration} hours)
33
- Pain Severity: {severity}/10
34
-
35
- Provide:
36
- 1. Priority (Emergent/Urgent/Non-urgent)
37
- 2. Three most likely diagnoses
38
- 3. Immediate actions required
39
- 4. Patient instructions in simple language
40
- 5. Recommended follow-up timeframe
41
- """
42
 
43
- try:
44
- response = client.chat.completions.create(
45
- model="gpt-4",
46
- messages=[
47
- {"role": "system", "content": system_prompt},
48
- {"role": "user", "content": prompt}
49
- ],
50
- temperature=0.2
51
- )
52
- result = response.choices[0].message.content
53
-
54
- # Determine priority class based on content
55
- priority = "stable" # Default to stable
56
- if "Emergent" in result or "emergency" in result.lower() or "طارئ" in result or "طوارئ" in result:
57
- priority = "emergency"
58
- elif "Urgent" in result or "urgent" in result.lower() or "عاجل" in result or "ملح" in result:
59
- priority = "urgent"
60
-
61
- # Update priority in state
62
- state.priority_level = priority
63
 
64
- # Format the result with proper HTML formatting
65
- formatted_result = result.replace("# ", "<h3>").replace("\n# ", "</h3><h3>").replace("\n\n", "</p><p>")
66
- if not formatted_result.endswith("</p>"):
67
- formatted_result += "</p>"
68
 
69
- # Reset loading state
70
- state.loading = False
71
 
72
- # Return the formatted result with the appropriate CSS class
73
- return f"""
74
- <div class="{priority} report-section">
75
- <div>{formatted_result}</div>
76
- </div>
77
- </div> <!-- Closing tag for analysis-container -->
78
- """
79
 
80
- except Exception as e:
81
- # Reset loading state
82
- state.loading = False
83
- return f"Error: {str(e)}"
84
-
85
- def show_loading_indicator():
86
- if state.loading:
87
- return """
88
- <div style="text-align: center; padding: 20px;">
89
- <div class="loading-spinner"></div>
90
- <p>Analyzing patient data...</p>
91
- </div>
92
- """
93
- return ""
 
 
 
 
 
 
 
 
94
 
95
- def reset_app():
96
- stop_vital_monitoring()
97
- state.active = False
98
- state.original_vitals = {}
99
- state.current_vitals = {}
100
- state.symptom_answers = {"symptom": "", "duration": "", "severity": ""}
101
- state.loading = False
102
- state.priority_level = "stable"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
- return [
105
- gr.update(value=None, visible=False), # patient_info
106
- gr.update(value="", visible=False), # patient_card
107
- gr.update(value=None, visible=False), # vitals_display
108
- gr.update(value="", visible=False), # formatted_vitals
109
- gr.update(value="", visible=False), # recording_panel
110
- gr.update(value=None, visible=False), # all_symptoms_audio
111
- gr.update(value="", visible=False), # symptom_q
112
- gr.update(value="", visible=False), # duration_q
113
- gr.update(value="", visible=False), # severity_q
114
- gr.update(value="", visible=False), # analysis_title
115
- gr.update(value="", visible=False), # loading
116
- gr.update(value="", visible=False), # report
117
- gr.update(interactive=True), # generate_btn
118
- gr.update(interactive=False), # check_vitals_btn
119
- gr.update(interactive=False) # analyze_btn
120
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
- def load_demo_case(case_name, common_cases):
123
- if case_name in common_cases:
124
- state.symptom_answers = common_cases[case_name].copy()
125
- return [
126
- gr.update(value=common_cases[case_name]["symptom"]),
127
- gr.update(value=common_cases[case_name]["duration"]),
128
- gr.update(value=common_cases[case_name]["severity"])
129
- ]
130
- return [gr.update(), gr.update(), gr.update()]
131
 
132
- with gr.Blocks(css=custom_css) as demo:
133
- # Custom header with app branding
134
- gr.HTML("""
135
- <div class="app-header">
136
- <div class="header-logo">🏥</div>
137
- <div class="header-text">
138
- <h1>AI Emergency Triage System</h1>
139
- <p>World-class AI-powered medical triage for emergency departments</p>
140
- </div>
141
- </div>
142
- """)
143
 
144
- # Create tabs for the main interface and additional features
145
- with gr.Tabs() as tabs:
146
- # Main interface tab
147
- with gr.Tab("Main Interface"):
148
- # Control panel
149
- with gr.Row(elem_classes="control-panel"):
150
- with gr.Column(scale=1):
151
- lang = gr.Radio(
152
- ["en", "ar"],
153
- label="Interface Language",
154
- value="en",
155
- elem_classes="language-selector"
156
- )
 
 
 
 
 
 
 
157
 
158
- with gr.Column(scale=3):
159
- with gr.Row():
160
- generate_btn = gr.Button(
161
- "🆕 New Patient",
162
- variant="primary",
163
- interactive=True,
164
- elem_classes="action-button"
165
- )
166
- check_vitals_btn = gr.Button(
167
- "🩺 Check Vital Signs",
168
- interactive=False,
169
- elem_classes="action-button"
170
- )
171
- analyze_btn = gr.Button(
172
- "🔍 Analyze Case",
173
- interactive=False,
174
- elem_classes="action-button"
175
- )
176
- reset_btn = gr.Button(
177
- "🔄 Reset",
178
- variant="secondary",
179
- elem_classes="action-button"
180
- )
181
 
182
- # Main content area
183
- with gr.Column():
184
- # Hidden raw data (not displayed to users)
185
- with gr.Group(visible=False): # This group keeps the JSON data but hides it from view
186
- patient_info = gr.JSON(label="Patient Data")
187
- vitals_display = gr.JSON(label="Raw Vitals Data")
188
-
189
- # NEW LAYOUT: 3 boxes in a horizontal row with equal dimensions using CSS Grid
190
- with gr.Row(elem_classes="equal-height-container"):
191
- # Box 1: Patient Info
192
- with gr.Column(elem_classes="card-container"):
193
- patient_card = gr.HTML(visible=False)
194
-
195
- # Box 2: Vital Signs
196
- with gr.Column(elem_classes="card-container"):
197
- formatted_vitals = gr.HTML(visible=False)
198
-
199
- # Box 3: Symptom Recording
200
- with gr.Column(elem_classes="card-container"):
201
- recording_panel = gr.HTML(visible=False)
202
-
203
- # Audio component for all symptoms - will be placed in the recording panel
204
- all_symptoms_audio = gr.Audio(
205
- label="Record your symptoms",
206
- visible=False,
207
- type="filepath",
208
- format="wav",
209
- elem_id="audio-recorder"
210
- )
211
-
212
- # Hidden text inputs (we only need values, not visual display)
213
- with gr.Group(visible=False):
214
- symptom_q = gr.Textbox(label="What is your primary symptom?")
215
- duration_q = gr.Textbox(label="How long have you had it? (in hours)")
216
- severity_q = gr.Textbox(label="On a scale of 1-10, how bad is the pain?")
217
-
218
- # Title for analysis results
219
- analysis_title = gr.HTML(visible=False)
220
-
221
- # Loading indicator and report appear underneath all three boxes
222
- loading = gr.HTML(visible=False)
223
- report = gr.HTML(visible=False)
224
 
225
- # Demo Cases tab
226
- with gr.Tab("Demo Cases"):
227
- gr.Markdown("### Quick Demo Cases")
228
- gr.Markdown("Select a common case to quickly load for demonstration purposes:")
229
-
230
- # Pre-filled case data for quick demos
231
- common_cases = {
232
- "Chest Pain": {
233
- "symptom": "Sharp chest pain radiating to left arm",
234
- "duration": "2",
235
- "severity": "8"
236
- },
237
- "Abdominal Pain": {
238
- "symptom": "Severe lower right abdominal pain with nausea",
239
- "duration": "12",
240
- "severity": "7"
241
- },
242
- "Shortness of Breath": {
243
- "symptom": "Difficulty breathing, wheezing sounds",
244
- "duration": "4",
245
- "severity": "6"
246
- },
247
- "Head Injury": {
248
- "symptom": "Hit head during fall, brief loss of consciousness",
249
- "duration": "1",
250
- "severity": "5"
251
- }
252
- }
253
 
254
- case_selector = gr.Radio(
255
- list(common_cases.keys()),
256
- label="Common Emergency Cases"
 
257
  )
258
 
259
- load_case_btn = gr.Button("Load Case Data", variant="secondary")
260
-
261
- # Define the event handler inside the Blocks context
262
- load_case_btn.click(
263
- fn=lambda case_name: load_demo_case(case_name, common_cases),
264
- inputs=[case_selector],
265
- outputs=[symptom_q, duration_q, severity_q]
266
- )
267
-
268
- # About tab
269
  with gr.Tab("About"):
270
  gr.Markdown("""
271
- # AI Emergency Triage System
272
 
273
- ## World AI Expo Dubai Demo
274
 
275
- This system demonstrates how advanced AI can assist medical professionals in emergency departments by:
 
276
 
277
- - **Rapid assessment** of patient condition using vital signs and symptoms
278
- - **Intelligent prioritization** of cases based on medical urgency
279
- - **Multilingual support** for diverse patient populations
280
- - **Voice recognition** for hands-free data entry during busy periods
281
- - **Real-time monitoring** of changing patient conditions
282
 
283
- ### Technical Features
 
 
 
284
 
285
- - GPT-4 for medical analysis and triage recommendations
286
- - OpenAI Whisper API for speech recognition
287
- - Gradio for interactive UI components
288
- - Real-time vital sign monitoring and visualization
289
- - Responsive design for various device sizes
290
-
291
- ### Important Notice
292
-
293
- This is a technology demonstration only. All medical advice and diagnoses should be verified by qualified healthcare professionals. This system is not a replacement for medical expertise.
294
  """)
295
 
296
- # Event handlers - all defined inside the Blocks context
297
- # Generate new patient
298
- generate_btn.click(
299
- fn=generate_patient,
300
- inputs=[lang],
301
- outputs=[patient_info]
302
- ).then(
303
- lambda patient, lang: format_patient_card(patient, lang),
304
- inputs=[patient_info, lang],
305
- outputs=[patient_card]
306
- ).then(
307
- # Clear previous data when generating a new patient
308
- lambda: [
309
- gr.update(visible=True),
310
- gr.update(interactive=True),
311
- gr.update(value=""), # Clear formatted_vitals
312
- gr.update(value=""), # Clear recording_panel
313
- gr.update(value=None), # Clear all_symptoms_audio
314
- gr.update(value=""), # Clear analysis_title
315
- gr.update(value=""), # Clear report
316
- gr.update(visible=False), # Hide report
317
- gr.update(visible=False), # Hide analysis_title
318
- gr.update(visible=False), # Hide all_symptoms_audio
319
- gr.update(interactive=False) # Disable analyze button
320
- ],
321
- outputs=[
322
- patient_card,
323
- check_vitals_btn,
324
- formatted_vitals,
325
- recording_panel,
326
- all_symptoms_audio,
327
- analysis_title,
328
- report,
329
- report,
330
- analysis_title,
331
- all_symptoms_audio,
332
- analyze_btn
333
- ]
334
- )
335
-
336
- # Language change handler
337
- lang.change(
338
- fn=lambda patient, vitals, lang, priority: [
339
- format_patient_card(patient, lang),
340
- format_vitals_display(vitals, lang),
341
- format_recording_panel(lang),
342
- format_analysis_title(priority, lang) if priority else ""
343
- ],
344
- inputs=[patient_info, vitals_display, lang, gr.State(lambda: state.priority_level)],
345
- outputs=[patient_card, formatted_vitals, recording_panel, analysis_title]
346
- )
347
-
348
- # Check vital signs
349
- check_vitals_btn.click(
350
- fn=simulate_initial_vitals,
351
- inputs=[],
352
- outputs=[vitals_display]
353
- ).then(
354
- lambda vitals, lang: format_vitals_display(vitals, lang),
355
- inputs=[vitals_display, lang],
356
- outputs=[formatted_vitals]
357
- ).then(
358
- lambda lang: format_recording_panel(lang),
359
- inputs=[lang],
360
- outputs=[recording_panel]
361
- ).then(
362
- lambda: [gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)],
363
- outputs=[formatted_vitals, recording_panel, all_symptoms_audio]
364
- ).then(
365
- fn=lambda vd, fv, lg: start_vital_monitoring(vd, fv, lg),
366
- inputs=[vitals_display, formatted_vitals, lang],
367
- outputs=[]
368
- ).then(
369
- lambda: gr.update(interactive=True),
370
- outputs=[analyze_btn]
371
- )
372
-
373
- # Handle single audio recording for all symptoms
374
- all_symptoms_audio.change(
375
- fn=lambda audio, lang: process_all_symptoms_audio(audio, lang),
376
- inputs=[all_symptoms_audio, lang],
377
- outputs=[symptom_q, duration_q, severity_q]
378
- )
379
-
380
- # Handle text input
381
- symptom_q.change(
382
- fn=lambda text: update_text_input(text, 0),
383
- inputs=[symptom_q],
384
- outputs=[]
385
- )
386
-
387
- duration_q.change(
388
- fn=lambda text: update_text_input(text, 1),
389
- inputs=[duration_q],
390
- outputs=[]
391
- )
392
-
393
- severity_q.change(
394
- fn=lambda text: update_text_input(text, 2),
395
- inputs=[severity_q],
396
- outputs=[]
397
- )
398
-
399
- # Analyze patient case - FIXING THE ORDER HERE
400
- analyze_btn.click(
401
- # First hide any existing analysis
402
- lambda: [gr.update(visible=False), gr.update(visible=False)],
403
- outputs=[analysis_title, report]
404
- ).then(
405
- # Show loading indicator
406
- fn=show_loading_indicator,
407
- inputs=[],
408
- outputs=[loading]
409
- ).then(
410
- lambda: gr.update(visible=True),
411
- outputs=[loading]
412
- ).then(
413
- # Process the analysis
414
- fn=analyze_case,
415
- inputs=[lang, patient_info, vitals_display],
416
- outputs=[report]
417
- ).then(
418
- # Now create the analysis title with correct priority AFTER analyzing
419
- fn=lambda lang: format_analysis_title(state.priority_level, lang),
420
- inputs=[lang],
421
- outputs=[analysis_title]
422
- ).then(
423
- # Show both the title and report, hide loading
424
- lambda: [gr.update(visible=True), gr.update(visible=True), gr.update(visible=False)],
425
- outputs=[analysis_title, report, loading]
426
- )
427
-
428
- # Reset application state
429
- reset_btn.click(
430
- fn=reset_app,
431
- inputs=[],
432
- outputs=[
433
- patient_info, patient_card, vitals_display, formatted_vitals,
434
- recording_panel, all_symptoms_audio,
435
- symptom_q, duration_q, severity_q,
436
- analysis_title, loading, report,
437
- generate_btn, check_vitals_btn, analyze_btn
438
- ]
439
- )
440
 
441
- # For deployment on Hugging Face Spaces
442
  if __name__ == "__main__":
 
443
  demo.launch()
 
1
+ # police_vision_translator.py
2
+ import gradio as gr
3
+ from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer, AutoProcessor
4
+ from transformers import ViTImageProcessor, AutoModelForVisionEncoderDecoder
5
+ from transformers import AutoModelForSpeechSeq2Seq, SpeechT5Processor
6
+ import torch
7
+ import numpy as np
8
+ from PIL import Image, ImageDraw, ImageFont
9
+ import os
10
+ import tempfile
11
+ import cv2
12
 
13
+ # Initialize models
14
+ print("Loading models...")
15
+
16
+ # 1. Vision Document Analysis model
17
+ document_processor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224")
18
+ document_model = AutoModelForVisionEncoderDecoder.from_pretrained("Salesforce/blip-image-captioning-large")
19
+
20
+ # 2. OCR for text extraction - FIX: Use correct model class for TrOCR
21
+ ocr_processor = AutoProcessor.from_pretrained("microsoft/trocr-base-printed")
22
+ ocr_model = AutoModelForVisionEncoderDecoder.from_pretrained("microsoft/trocr-base-printed")
23
+
24
+ # 3. Translation model
25
+ translator_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
26
+ translator_tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
27
+
28
+ # 4. Speech recognition - Use pipeline which handles model loading correctly
29
+ speech_recognizer = pipeline("automatic-speech-recognition", model="openai/whisper-medium")
30
+
31
+ # 5. Text-to-speech - Use correct model type
32
+ tts_processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
33
+ tts_model = AutoModelForSpeechSeq2Seq.from_pretrained("microsoft/speecht5_tts")
34
+
35
+ print("Models loaded!")
36
 
37
+ # Language codes mapping
38
+ LANGUAGE_CODES = {
39
+ "English": "eng_Latn",
40
+ "Arabic": "ara_Arab",
41
+ "Hindi": "hin_Deva",
42
+ "Urdu": "urd_Arab",
43
+ "Chinese": "zho_Hans",
44
+ "Russian": "rus_Cyrl",
45
+ "French": "fra_Latn",
46
+ "German": "deu_Latn",
47
+ "Spanish": "spa_Latn",
48
+ "Japanese": "jpn_Jpan"
49
+ }
50
+
51
+ def detect_document_type(image):
52
+ """Detect what type of document is in the image"""
53
+ # Use vision model to get general description
54
+ inputs = document_processor(images=image, return_tensors="pt")
55
+ outputs = document_model.generate(**inputs, max_length=50)
56
 
57
+ # Convert output IDs to text
58
+ description = document_model.decoder.tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
59
 
60
+ # Simple rule-based classification
61
+ if "passport" in description.lower():
62
+ return "Passport"
63
+ elif "license" in description.lower() or "driving" in description.lower():
64
+ return "Driver's License"
65
+ elif "id" in description.lower() or "identity" in description.lower() or "card" in description.lower():
66
+ return "ID Card"
67
+ else:
68
+ return "Unknown Document"
 
69
 
70
+ def extract_text_from_regions(image, regions):
71
+ """Extract text from specific regions of the document"""
72
+ results = {}
73
+ img_array = np.array(image)
74
+
75
+ for field_name, (x1, y1, x2, y2) in regions.items():
76
+ # Extract region
77
+ region = img_array[y1:y2, x1:x2]
78
+ region_pil = Image.fromarray(region)
 
 
 
 
 
 
 
 
 
 
 
79
 
80
+ # Process with OCR
81
+ inputs = ocr_processor(images=region_pil, return_tensors="pt")
82
+ generated_ids = ocr_model.generate(inputs["pixel_values"])
83
+ text = ocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
84
 
85
+ results[field_name] = text
 
86
 
87
+ return results
 
 
 
 
 
 
88
 
89
+ def translate_text(text, source_lang, target_lang):
90
+ """Translate text between languages"""
91
+ if not text or text.strip() == "":
92
+ return ""
93
+
94
+ # Get language codes
95
+ src_code = LANGUAGE_CODES.get(source_lang, "eng_Latn")
96
+ tgt_code = LANGUAGE_CODES.get(target_lang, "ara_Arab")
97
+
98
+ # Tokenize
99
+ inputs = translator_tokenizer(text, return_tensors="pt", padding=True)
100
+
101
+ # Translate
102
+ translated_tokens = translator_model.generate(
103
+ **inputs,
104
+ forced_bos_token_id=translator_tokenizer.lang_code_to_id[tgt_code],
105
+ max_length=128
106
+ )
107
+
108
+ # Decode
109
+ translation = translator_tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
110
+ return translation
111
 
112
+ def process_document(image, source_language="English", target_language="Arabic"):
113
+ """Main function to process document images"""
114
+ # Convert to PIL if it's not already
115
+ if not isinstance(image, Image.Image):
116
+ image = Image.fromarray(image)
117
+
118
+ # 1. Detect document type
119
+ doc_type = detect_document_type(image)
120
+
121
+ # 2. Define regions based on document type (simplified example)
122
+ # In a real implementation, you would use ML to detect these regions
123
+ width, height = image.size
124
+
125
+ if doc_type == "Passport":
126
+ regions = {
127
+ "Name": (int(width*0.3), int(height*0.2), int(width*0.9), int(height*0.3)),
128
+ "Date of Birth": (int(width*0.3), int(height*0.35), int(width*0.7), int(height*0.45)),
129
+ "Passport Number": (int(width*0.3), int(height*0.5), int(width*0.7), int(height*0.6))
130
+ }
131
+ elif doc_type == "ID Card":
132
+ regions = {
133
+ "Name": (int(width*0.3), int(height*0.15), int(width*0.9), int(height*0.25)),
134
+ "ID Number": (int(width*0.3), int(height*0.3), int(width*0.7), int(height*0.4)),
135
+ "Address": (int(width*0.1), int(height*0.5), int(width*0.9), int(height*0.7))
136
+ }
137
+ else: # Driver's License or Unknown
138
+ regions = {
139
+ "Name": (int(width*0.3), int(height*0.2), int(width*0.9), int(height*0.3)),
140
+ "License Number": (int(width*0.3), int(height*0.4), int(width*0.7), int(height*0.5)),
141
+ "Expiration": (int(width*0.3), int(height*0.6), int(width*0.7), int(height*0.7))
142
+ }
143
+
144
+ # 3. Extract text from regions
145
+ extracted_info = extract_text_from_regions(image, regions)
146
 
147
+ # 4. Translate extracted text
148
+ translated_info = {}
149
+ for field, text in extracted_info.items():
150
+ translated_info[field] = translate_text(text, source_language, target_language)
151
+
152
+ # 5. Create annotated image
153
+ annotated_img = image.copy()
154
+ draw = ImageDraw.Draw(annotated_img)
155
+
156
+ # Attempt to load a font that supports Arabic
157
+ try:
158
+ font = ImageFont.truetype("arial.ttf", 20) # Fallback to system font
159
+ except IOError:
160
+ font = ImageFont.load_default()
161
+
162
+ # Draw boxes and translations
163
+ for field, (x1, y1, x2, y2) in regions.items():
164
+ # Draw rectangle around region
165
+ draw.rectangle([(x1, y1), (x2, y2)], outline="green", width=3)
166
+
167
+ # Draw field name and translated text
168
+ draw.text((x1, y1-25), field, fill="blue", font=font)
169
+ draw.text((x1, y2+5), f"{extracted_info[field]} → {translated_info[field]}",
170
+ fill="red", font=font)
171
+
172
+ # Return results
173
+ return {
174
+ "document_type": doc_type,
175
+ "annotated_image": annotated_img,
176
+ "extracted_text": extracted_info,
177
+ "translated_text": translated_info
178
+ }
179
 
180
+ def transcribe_speech(audio_file, source_language="English"):
181
+ """Transcribe speech from audio file"""
182
+ result = speech_recognizer(audio_file, generate_kwargs={"language": source_language.lower()})
183
+ return result["text"]
 
 
 
 
 
184
 
185
+ def translate_speech(audio_file, source_language="English", target_language="Arabic"):
186
+ """Transcribe and translate speech"""
187
+ # 1. Transcribe speech to text
188
+ transcription = transcribe_speech(audio_file, source_language)
189
+
190
+ # 2. Translate text
191
+ translation = translate_text(transcription, source_language, target_language)
 
 
 
 
192
 
193
+ return {
194
+ "original_text": transcription,
195
+ "translated_text": translation
196
+ }
197
+
198
+ # Gradio Interface
199
+ def create_ui():
200
+ with gr.Blocks(title="Police Vision Translator") as app:
201
+ gr.Markdown("# Dubai Police Vision Translator System")
202
+ gr.Markdown("## Translate documents, environmental text, and speech in real-time")
203
+
204
+ with gr.Tab("Document Translation"):
205
+ with gr.Row():
206
+ with gr.Column():
207
+ doc_input = gr.Image(type="pil", label="Upload Document")
208
+ source_lang = gr.Dropdown(choices=list(LANGUAGE_CODES.keys()),
209
+ value="English", label="Source Language")
210
+ target_lang = gr.Dropdown(choices=list(LANGUAGE_CODES.keys()),
211
+ value="Arabic", label="Target Language")
212
+ process_btn = gr.Button("Process Document")
213
 
214
+ with gr.Column():
215
+ doc_output = gr.Image(label="Annotated Document")
216
+ doc_type = gr.Textbox(label="Document Type")
217
+ extracted_info = gr.JSON(label="Extracted Information")
218
+ translated_info = gr.JSON(label="Translated Information")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
 
220
+ process_btn.click(
221
+ fn=lambda img, src, tgt: process_document(img, src, tgt),
222
+ inputs=[doc_input, source_lang, target_lang],
223
+ outputs=[doc_output, doc_type, extracted_info, translated_info]
224
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
 
226
+ with gr.Tab("Speech Translation"):
227
+ with gr.Row():
228
+ with gr.Column():
229
+ audio_input = gr.Audio(type="filepath", label="Record Speech")
230
+ speech_source_lang = gr.Dropdown(choices=list(LANGUAGE_CODES.keys()),
231
+ value="English", label="Source Language")
232
+ speech_target_lang = gr.Dropdown(choices=list(LANGUAGE_CODES.keys()),
233
+ value="Arabic", label="Target Language")
234
+ translate_btn = gr.Button("Translate Speech")
235
+
236
+ with gr.Column():
237
+ original_text = gr.Textbox(label="Original Speech")
238
+ translated_text = gr.Textbox(label="Translated Text")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
 
240
+ translate_btn.click(
241
+ fn=lambda audio, src, tgt: translate_speech(audio, src, tgt),
242
+ inputs=[audio_input, speech_source_lang, speech_target_lang],
243
+ outputs=[original_text, translated_text]
244
  )
245
 
 
 
 
 
 
 
 
 
 
 
246
  with gr.Tab("About"):
247
  gr.Markdown("""
248
+ # Police Vision Translator MVP
249
 
250
+ This system demonstrates AI-powered translation capabilities for law enforcement:
251
 
252
+ - **Document Translation**: Identify and translate key fields in passports, IDs, and licenses
253
+ - **Speech Translation**: Real-time translation of conversations with civilians
254
 
255
+ ## Technologies Used
 
 
 
 
256
 
257
+ - Vision Transformers for document analysis
258
+ - NLLB-200 for translation between 200+ languages
259
+ - Whisper for multilingual speech recognition
260
+ - SpeechT5 for text-to-speech synthesis
261
 
262
+ Developed for demonstration at the World AI Expo Dubai.
 
 
 
 
 
 
 
 
263
  """)
264
 
265
+ return app
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
 
267
+ # Launch app
268
  if __name__ == "__main__":
269
+ demo = create_ui()
270
  demo.launch()