Nayera-2025 commited on
Commit
09cc49d
·
verified ·
1 Parent(s): 52e3b04

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -19
app.py CHANGED
@@ -1,20 +1,20 @@
1
  import gradio as gr
2
  import easyocr
3
- import torch
4
  from transformers import pipeline
5
  import numpy as np
 
6
 
7
- # Load OCR model (English, Arabic, French, Chinese)
8
- reader = easyocr.Reader(['en', 'ar', 'fr', 'zh'])
9
 
10
- # Load YOLOv5 model (small model for faster performance)
11
- model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
12
 
13
- # Load translation model (multilingual to English)
14
- translator = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en")
15
 
16
  def process_image(image):
17
- # Convert PIL Image to numpy array if needed
18
  if not isinstance(image, np.ndarray):
19
  image = np.array(image)
20
 
@@ -23,29 +23,41 @@ def process_image(image):
23
  extracted_texts = [res[1] for res in text_results]
24
  extracted_text = " ".join(extracted_texts)
25
 
26
- # Step 2: Translate Text
27
  if extracted_text:
28
  translation = translator(extracted_text)[0]['translation_text']
29
  else:
30
  translation = "No text detected."
31
 
32
- # Step 3: Object Detection - Bounding Boxes
33
- results = model(image)
34
- detected_img = results.render()[0] # returns a list, take first image
 
35
 
36
- return detected_img, extracted_text, translation
 
 
 
 
 
 
 
 
 
37
 
38
- # Define Gradio Interface
 
 
39
  iface = gr.Interface(
40
  fn=process_image,
41
- inputs=gr.Image(type="pil", label="Upload Image (Signs, ID, License Plate)"),
42
  outputs=[
43
- gr.Image(label="Detected Objects in Image"),
44
  gr.Textbox(label="Extracted Text"),
45
- gr.Textbox(label="Translated Text to English")
46
  ],
47
- title="🚨 Police Smart Glasses AI Demo",
48
- description="Upload an image to simulate smart glasses detecting text, translating it, and recognizing objects."
49
  )
50
 
51
  if __name__ == "__main__":
 
1
  import gradio as gr
2
  import easyocr
 
3
  from transformers import pipeline
4
  import numpy as np
5
+ from PIL import Image, ImageDraw
6
 
7
+ # Load OCR Reader (GPU-enabled)
8
+ reader = easyocr.Reader(['en', 'ar', 'fr', 'zh'], gpu=True) # Enable GPU if available
9
 
10
+ # Load Translation Model
11
+ translator = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en", device=0) # device=0 means use GPU
12
 
13
+ # Load Object Detection Model
14
+ detector = pipeline("object-detection", model="facebook/detr-resnet-50", device=0)
15
 
16
  def process_image(image):
17
+ # Ensure image is numpy
18
  if not isinstance(image, np.ndarray):
19
  image = np.array(image)
20
 
 
23
  extracted_texts = [res[1] for res in text_results]
24
  extracted_text = " ".join(extracted_texts)
25
 
26
+ # Step 2: Translation
27
  if extracted_text:
28
  translation = translator(extracted_text)[0]['translation_text']
29
  else:
30
  translation = "No text detected."
31
 
32
+ # Step 3: Object Detection
33
+ detections = detector(Image.fromarray(image))
34
+ image_draw = Image.fromarray(image).convert("RGB")
35
+ draw = ImageDraw.Draw(image_draw)
36
 
37
+ for det in detections:
38
+ box = det['box']
39
+ label = det['label']
40
+ score = det['score']
41
+ if score > 0.5:
42
+ draw.rectangle(
43
+ [box['xmin'], box['ymin'], box['xmax'], box['ymax']],
44
+ outline="red", width=3
45
+ )
46
+ draw.text((box['xmin'], box['ymin'] - 10), f"{label} ({score:.2f})", fill="red")
47
 
48
+ return image_draw, extracted_text, translation
49
+
50
+ # Gradio Interface
51
  iface = gr.Interface(
52
  fn=process_image,
53
+ inputs=gr.Image(type="pil", label="Upload an Image (e.g., Signs, IDs, License Plates)"),
54
  outputs=[
55
+ gr.Image(label="Detected Objects"),
56
  gr.Textbox(label="Extracted Text"),
57
+ gr.Textbox(label="Translated Text (English)")
58
  ],
59
+ title="🚨 Police Smart Glasses - AI Demo (GPU Optimized)",
60
+ description="Upload an image to simulate smart glasses detecting text, translating it, and recognizing objects in real-time."
61
  )
62
 
63
  if __name__ == "__main__":