Nayera-2025 commited on
Commit
ba2761c
·
verified ·
1 Parent(s): 80b4542

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +277 -65
app.py CHANGED
@@ -1,76 +1,288 @@
1
  import gradio as gr
2
  import easyocr
3
- from transformers import pipeline
4
  import numpy as np
5
- from PIL import Image, ImageDraw
6
-
7
- # Load OCR Readers
8
- arabic_reader = easyocr.Reader(['ar', 'en'], gpu=True) # Arabic and English only
9
- hindi_reader = easyocr.Reader(['hi', 'en'], gpu=True) # Hindi and English only
10
- other_reader = easyocr.Reader(['en', 'fr', 'ru'], gpu=True) # French and Russian with English
11
-
12
- # Load Translation Model
13
- translator = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en", device=0) # device=0 means use GPU
14
-
15
- # Load Object Detection Model (smaller version to avoid timeout)
16
- detector = pipeline("object-detection", model="facebook/detr-resnet-50-small", device=0)
17
-
18
- def process_image(image, language_choice):
19
- if not isinstance(image, np.ndarray):
20
- image = np.array(image)
21
-
22
- # Choose correct OCR Reader
23
- if language_choice == "Arabic":
24
- reader = arabic_reader
25
- elif language_choice == "Hindi":
26
- reader = hindi_reader
27
- else:
28
- reader = other_reader
29
-
30
- # Step 1: OCR - Text Extraction
31
- text_results = reader.readtext(image)
32
- extracted_texts = [res[1] for res in text_results]
33
- extracted_text = " ".join(extracted_texts)
34
 
35
- # Step 2: Translation
36
- if extracted_text:
37
- translation = translator(extracted_text)[0]['translation_text']
38
- else:
39
- translation = "No text detected."
40
 
41
- # Step 3: Object Detection
42
- detections = detector(Image.fromarray(image))
43
- image_draw = Image.fromarray(image).convert("RGB")
44
- draw = ImageDraw.Draw(image_draw)
45
 
46
- for det in detections:
47
- box = det['box']
48
- label = det['label']
49
- score = det['score']
50
- if score > 0.5:
51
- draw.rectangle(
52
- [box['xmin'], box['ymin'], box['xmax'], box['ymax']],
53
- outline="red", width=3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  )
55
- draw.text((box['xmin'], box['ymin'] - 10), f"{label} ({score:.2f})", fill="red")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
- return image_draw, extracted_text, translation
 
58
 
59
- # Gradio Interface
60
- iface = gr.Interface(
61
- fn=process_image,
62
- inputs=[
63
- gr.Image(type="pil", label="Upload an Image (e.g., Signs, IDs, License Plates)"),
64
- gr.Dropdown(choices=["Arabic", "Hindi", "Other (French/Russian)"] , label="Select Language Group")
65
- ],
66
- outputs=[
67
- gr.Image(label="Detected Objects"),
68
- gr.Textbox(label="Extracted Text"),
69
- gr.Textbox(label="Translated Text (English)")
70
- ],
71
- title="\ud83d\udea8 Police Smart Glasses - AI Demo (Arabic + Hindi + Multilingual)",
72
- description="Upload an image and select the language group for smart OCR, translation, and object detection."
73
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
  if __name__ == "__main__":
76
- iface.launch()
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import easyocr
 
3
  import numpy as np
4
+ import torch
5
+ from PIL import Image, ImageDraw, ImageFont
6
+ from transformers import pipeline
7
+ import logging
8
+ import os
9
+ import time
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
+ # Configure logging
12
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
13
+ logger = logging.getLogger(__name__)
 
 
14
 
15
+ # Check for GPU availability
16
+ device = "cuda" if torch.cuda.is_available() else "cpu"
17
+ using_gpu = device == "cuda"
18
+ logger.info(f"Using device: {device}")
19
 
20
+ class SmartGlassesSystem:
21
+ """Main class for Police Smart Glasses AI system"""
22
+
23
+ def __init__(self):
24
+ self.initialize_models()
25
+ self.supported_languages = {
26
+ "Arabic": ["ar", "en"],
27
+ "Hindi": ["hi", "en"],
28
+ "Chinese": ["ch_sim", "en"],
29
+ "Japanese": ["ja", "en"],
30
+ "Korean": ["ko", "en"],
31
+ "Russian": ["ru", "en"],
32
+ "French": ["fr", "en"]
33
+ }
34
+ # Cache for OCR readers to avoid reloading
35
+ self.ocr_readers = {}
36
+
37
+ def initialize_models(self):
38
+ """Initialize all AI models with proper error handling"""
39
+ try:
40
+ # Load OCR for most common languages eagerly
41
+ logger.info("Loading initial OCR readers...")
42
+ self.ocr_readers = {
43
+ "Arabic": easyocr.Reader(['ar', 'en'], gpu=using_gpu, verbose=False),
44
+ "Hindi": easyocr.Reader(['hi', 'en'], gpu=using_gpu, verbose=False)
45
+ }
46
+
47
+ # Load translation model
48
+ logger.info("Loading translation model...")
49
+ self.translator = pipeline(
50
+ "translation",
51
+ model="Helsinki-NLP/opus-mt-mul-en",
52
+ device=0 if using_gpu else -1
53
+ )
54
+
55
+ # Load object detection model
56
+ logger.info("Loading object detection model...")
57
+ self.detector = pipeline(
58
+ "object-detection",
59
+ model="facebook/detr-resnet-50",
60
+ device=0 if using_gpu else -1
61
+ )
62
+
63
+ logger.info("All models loaded successfully!")
64
+
65
+ except Exception as e:
66
+ logger.error(f"Error initializing models: {str(e)}")
67
+ raise RuntimeError(f"Failed to initialize AI models: {str(e)}")
68
+
69
+ def get_ocr_reader(self, language_choice):
70
+ """Get or create appropriate OCR reader based on language choice"""
71
+ if language_choice in self.ocr_readers:
72
+ return self.ocr_readers[language_choice]
73
+
74
+ # Create new reader if not already loaded
75
+ if language_choice in self.supported_languages:
76
+ logger.info(f"Loading new OCR reader for {language_choice}...")
77
+ reader = easyocr.Reader(
78
+ self.supported_languages[language_choice],
79
+ gpu=using_gpu,
80
+ verbose=False
81
+ )
82
+ # Cache for future use
83
+ self.ocr_readers[language_choice] = reader
84
+ return reader
85
+ else:
86
+ # Fallback to general reader
87
+ logger.warning(f"Unsupported language: {language_choice}, using default")
88
+ if "Other" not in self.ocr_readers:
89
+ self.ocr_readers["Other"] = easyocr.Reader(['en', 'fr', 'ru'], gpu=using_gpu, verbose=False)
90
+ return self.ocr_readers["Other"]
91
+
92
+ def extract_text(self, image, language_choice):
93
+ """Extract text from image using OCR"""
94
+ start_time = time.time()
95
+ reader = self.get_ocr_reader(language_choice)
96
+
97
+ try:
98
+ text_results = reader.readtext(image)
99
+ extracted_texts = [res[1] for res in text_results]
100
+ extracted_text = " ".join(extracted_texts)
101
+
102
+ # Get bounding boxes for visualization
103
+ text_boxes = [(res[0], res[1]) for res in text_results]
104
+
105
+ logger.info(f"OCR completed in {time.time() - start_time:.2f} seconds")
106
+ return extracted_text, text_boxes
107
+ except Exception as e:
108
+ logger.error(f"OCR error: {str(e)}")
109
+ return "Error during text extraction.", []
110
+
111
+ def translate_text(self, text):
112
+ """Translate extracted text to English"""
113
+ if not text or text == "No text detected." or text.strip() == "":
114
+ return "No text to translate."
115
+
116
+ try:
117
+ translation = self.translator(text)[0]['translation_text']
118
+ return translation
119
+ except Exception as e:
120
+ logger.error(f"Translation error: {str(e)}")
121
+ return f"Translation error: {str(e)}"
122
+
123
+ def detect_objects(self, image_pil):
124
+ """Detect objects in the image"""
125
+ try:
126
+ detections = self.detector(image_pil)
127
+ return detections
128
+ except Exception as e:
129
+ logger.error(f"Object detection error: {str(e)}")
130
+ return []
131
+
132
+ def visualize_results(self, image, text_boxes, detections):
133
+ """Create visualization with detected objects and text"""
134
+ image_draw = image.copy().convert("RGB")
135
+ draw = ImageDraw.Draw(image_draw)
136
+
137
+ # Try to load a better font, fall back to default if necessary
138
+ try:
139
+ font = ImageFont.truetype("Arial", 12)
140
+ except IOError:
141
+ font = ImageFont.load_default()
142
+
143
+ # Draw text bounding boxes
144
+ for box, text in text_boxes:
145
+ # Convert box points to rectangle coordinates
146
+ points = np.array(box).astype(np.int32)
147
+ draw.polygon([tuple(p) for p in points], outline="blue", width=2)
148
+ # Add small text label
149
+ draw.text((points[0][0], points[0][1] - 10), "Text", fill="blue", font=font)
150
+
151
+ # Draw object detection boxes
152
+ for det in detections:
153
+ box = det['box']
154
+ label = det['label']
155
+ score = det['score']
156
+
157
+ if score > 0.6: # Higher confidence threshold
158
+ draw.rectangle(
159
+ [box['xmin'], box['ymin'], box['xmax'], box['ymax']],
160
+ outline="red",
161
+ width=3
162
+ )
163
+ label_text = f"{label} ({score:.2f})"
164
+ draw.text((box['xmin'], box['ymin'] - 15), label_text, fill="red", font=font)
165
+
166
+ return image_draw
167
+
168
+ def process_image(self, image, language_choice):
169
+ """Main processing pipeline"""
170
+ if image is None:
171
+ return (
172
+ None,
173
+ "No image provided. Please upload an image.",
174
+ "No image to process."
175
  )
176
+
177
+ # Convert to numpy array if needed
178
+ if not isinstance(image, np.ndarray):
179
+ image = np.array(image)
180
+
181
+ # Create PIL image for visualization
182
+ image_pil = Image.fromarray(image)
183
+
184
+ # Extract text
185
+ extracted_text, text_boxes = self.extract_text(image, language_choice)
186
+
187
+ # Translate text
188
+ translation = self.translate_text(extracted_text)
189
+
190
+ # Detect objects
191
+ detections = self.detect_objects(image_pil)
192
+
193
+ # Create visualization
194
+ result_image = self.visualize_results(image_pil, text_boxes, detections)
195
+
196
+ return result_image, extracted_text, translation
197
 
198
+ # Create system instance
199
+ smart_glasses = SmartGlassesSystem()
200
 
201
+ def create_interface():
202
+ """Create and configure the Gradio interface"""
203
+
204
+ # Custom CSS for better appearance
205
+ custom_css = """
206
+ .gradio-container {
207
+ background-color: #f0f4f8;
208
+ }
209
+ .output-image {
210
+ border: 2px solid #2c3e50;
211
+ border-radius: 5px;
212
+ }
213
+ """
214
+
215
+ # Create interface
216
+ with gr.Blocks(css=custom_css, title="🚨 Police Smart Glasses - AI Demo") as iface:
217
+ gr.Markdown("""
218
+ # 🚨 Police Smart Glasses - Advanced AI Demo
219
+
220
+ This system demonstrates real-time text recognition, translation, and object detection capabilities
221
+ for law enforcement smart glasses technology.
222
+
223
+ ### Instructions:
224
+ 1. Upload an image containing text in the selected language
225
+ 2. Choose the primary language in the image
226
+ 3. View the detection results, extracted text, and English translation
227
+ """)
228
+
229
+ with gr.Row():
230
+ with gr.Column(scale=1):
231
+ # Input components
232
+ input_image = gr.Image(
233
+ type="pil",
234
+ label="Upload an Image (e.g., Signs, Documents, License Plates)"
235
+ )
236
+
237
+ language_choice = gr.Dropdown(
238
+ choices=list(smart_glasses.supported_languages.keys()) + ["Other"],
239
+ value="Arabic",
240
+ label="Select Primary Language in Image"
241
+ )
242
+
243
+ process_btn = gr.Button("Process Image", variant="primary")
244
+
245
+ with gr.Column(scale=1):
246
+ # Output components
247
+ output_image = gr.Image(label="Analysis Results")
248
+ extracted_text = gr.Textbox(label="Extracted Text")
249
+ translated_text = gr.Textbox(label="Translated Text (English)")
250
+
251
+ # Set up processing function
252
+ process_btn.click(
253
+ fn=smart_glasses.process_image,
254
+ inputs=[input_image, language_choice],
255
+ outputs=[output_image, extracted_text, translated_text]
256
+ )
257
+
258
+ # Examples for testing
259
+ gr.Examples(
260
+ examples=[
261
+ ["examples/arabic_sign.jpg", "Arabic"],
262
+ ["examples/hindi_text.jpg", "Hindi"],
263
+ ["examples/russian_document.jpg", "Russian"]
264
+ ],
265
+ inputs=[input_image, language_choice]
266
+ )
267
+
268
+ # System information
269
+ with gr.Accordion("System Information", open=False):
270
+ gr.Markdown(f"""
271
+ - **Device**: {'GPU' if using_gpu else 'CPU'}
272
+ - **Supported Languages**: {', '.join(smart_glasses.supported_languages.keys())}
273
+ - **AI Models**:
274
+ - OCR: EasyOCR
275
+ - Translation: Helsinki-NLP/opus-mt-mul-en
276
+ - Object Detection: facebook/detr-resnet-50
277
+ """)
278
+
279
+ return iface
280
 
281
  if __name__ == "__main__":
282
+ # Create and launch interface
283
+ iface = create_interface()
284
+ iface.launch(
285
+ share=True, # Enable sharing
286
+ enable_queue=True, # Enable queue for better handling of multiple users
287
+ debug=True # Show debugging information
288
+ )