Spaces:

Nayera-2025
/

Police-Vision-Translator

Sleeping

App Files Files Community

Nayera-2025 commited on 7 days ago

Commit

ba2761c

verified ·

1 Parent(s): 80b4542

Update app.py

Browse files

Files changed (1) hide show

app.py +277 -65

app.py CHANGED Viewed

@@ -1,76 +1,288 @@
 import gradio as gr
 import easyocr
-from transformers import pipeline
 import numpy as np
-from PIL import Image, ImageDraw
-# Load OCR Readers
-arabic_reader = easyocr.Reader(['ar', 'en'], gpu=True)  # Arabic and English only
-hindi_reader = easyocr.Reader(['hi', 'en'], gpu=True)   # Hindi and English only
-other_reader = easyocr.Reader(['en', 'fr', 'ru'], gpu=True)  # French and Russian with English
-# Load Translation Model
-translator = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en", device=0)  # device=0 means use GPU
-# Load Object Detection Model (smaller version to avoid timeout)
-detector = pipeline("object-detection", model="facebook/detr-resnet-50-small", device=0)
-def process_image(image, language_choice):
-    if not isinstance(image, np.ndarray):
-        image = np.array(image)
-    # Choose correct OCR Reader
-    if language_choice == "Arabic":
-        reader = arabic_reader
-    elif language_choice == "Hindi":
-        reader = hindi_reader
-    else:
-        reader = other_reader
-    # Step 1: OCR - Text Extraction
-    text_results = reader.readtext(image)
-    extracted_texts = [res[1] for res in text_results]
-    extracted_text = " ".join(extracted_texts)
-    # Step 2: Translation
-    if extracted_text:
-        translation = translator(extracted_text)[0]['translation_text']
-    else:
-        translation = "No text detected."
-    # Step 3: Object Detection
-    detections = detector(Image.fromarray(image))
-    image_draw = Image.fromarray(image).convert("RGB")
-    draw = ImageDraw.Draw(image_draw)
-    for det in detections:
-        box = det['box']
-        label = det['label']
-        score = det['score']
-        if score > 0.5:
-            draw.rectangle(
-                [box['xmin'], box['ymin'], box['xmax'], box['ymax']],
-                outline="red", width=3
             )
-            draw.text((box['xmin'], box['ymin'] - 10), f"{label} ({score:.2f})", fill="red")
-    return image_draw, extracted_text, translation
-# Gradio Interface
-iface = gr.Interface(
-    fn=process_image,
-    inputs=[
-        gr.Image(type="pil", label="Upload an Image (e.g., Signs, IDs, License Plates)"),
-        gr.Dropdown(choices=["Arabic", "Hindi", "Other (French/Russian)"] , label="Select Language Group")
-    ],
-    outputs=[
-        gr.Image(label="Detected Objects"),
-        gr.Textbox(label="Extracted Text"),
-        gr.Textbox(label="Translated Text (English)")
-    ],
-    title="\ud83d\udea8 Police Smart Glasses - AI Demo (Arabic + Hindi + Multilingual)",
-    description="Upload an image and select the language group for smart OCR, translation, and object detection."
-)
 if __name__ == "__main__":
-    iface.launch()

 import gradio as gr
 import easyocr
 import numpy as np
+import torch
+from PIL import Image, ImageDraw, ImageFont
+from transformers import pipeline
+import logging
+import os
+import time
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+# Check for GPU availability
+device = "cuda" if torch.cuda.is_available() else "cpu"
+using_gpu = device == "cuda"
+logger.info(f"Using device: {device}")
+class SmartGlassesSystem:
+    """Main class for Police Smart Glasses AI system"""
+    def __init__(self):
+        self.initialize_models()
+        self.supported_languages = {
+            "Arabic": ["ar", "en"],
+            "Hindi": ["hi", "en"],
+            "Chinese": ["ch_sim", "en"],
+            "Japanese": ["ja", "en"],
+            "Korean": ["ko", "en"],
+            "Russian": ["ru", "en"],
+            "French": ["fr", "en"]
+        }
+        # Cache for OCR readers to avoid reloading
+        self.ocr_readers = {}
+    def initialize_models(self):
+        """Initialize all AI models with proper error handling"""
+        try:
+            # Load OCR for most common languages eagerly
+            logger.info("Loading initial OCR readers...")
+            self.ocr_readers = {
+                "Arabic": easyocr.Reader(['ar', 'en'], gpu=using_gpu, verbose=False),
+                "Hindi": easyocr.Reader(['hi', 'en'], gpu=using_gpu, verbose=False)
+            }
+            # Load translation model
+            logger.info("Loading translation model...")
+            self.translator = pipeline(
+                "translation",
+                model="Helsinki-NLP/opus-mt-mul-en",
+                device=0 if using_gpu else -1
+            )
+            # Load object detection model
+            logger.info("Loading object detection model...")
+            self.detector = pipeline(
+                "object-detection",
+                model="facebook/detr-resnet-50",
+                device=0 if using_gpu else -1
+            )
+            logger.info("All models loaded successfully!")
+        except Exception as e:
+            logger.error(f"Error initializing models: {str(e)}")
+            raise RuntimeError(f"Failed to initialize AI models: {str(e)}")
+    def get_ocr_reader(self, language_choice):
+        """Get or create appropriate OCR reader based on language choice"""
+        if language_choice in self.ocr_readers:
+            return self.ocr_readers[language_choice]
+        # Create new reader if not already loaded
+        if language_choice in self.supported_languages:
+            logger.info(f"Loading new OCR reader for {language_choice}...")
+            reader = easyocr.Reader(
+                self.supported_languages[language_choice],
+                gpu=using_gpu,
+                verbose=False
+            )
+            # Cache for future use
+            self.ocr_readers[language_choice] = reader
+            return reader
+        else:
+            # Fallback to general reader
+            logger.warning(f"Unsupported language: {language_choice}, using default")
+            if "Other" not in self.ocr_readers:
+                self.ocr_readers["Other"] = easyocr.Reader(['en', 'fr', 'ru'], gpu=using_gpu, verbose=False)
+            return self.ocr_readers["Other"]
+    def extract_text(self, image, language_choice):
+        """Extract text from image using OCR"""
+        start_time = time.time()
+        reader = self.get_ocr_reader(language_choice)
+        try:
+            text_results = reader.readtext(image)
+            extracted_texts = [res[1] for res in text_results]
+            extracted_text = " ".join(extracted_texts)
+            # Get bounding boxes for visualization
+            text_boxes = [(res[0], res[1]) for res in text_results]
+            logger.info(f"OCR completed in {time.time() - start_time:.2f} seconds")
+            return extracted_text, text_boxes
+        except Exception as e:
+            logger.error(f"OCR error: {str(e)}")
+            return "Error during text extraction.", []
+    def translate_text(self, text):
+        """Translate extracted text to English"""
+        if not text or text == "No text detected." or text.strip() == "":
+            return "No text to translate."
+        try:
+            translation = self.translator(text)[0]['translation_text']
+            return translation
+        except Exception as e:
+            logger.error(f"Translation error: {str(e)}")
+            return f"Translation error: {str(e)}"
+    def detect_objects(self, image_pil):
+        """Detect objects in the image"""
+        try:
+            detections = self.detector(image_pil)
+            return detections
+        except Exception as e:
+            logger.error(f"Object detection error: {str(e)}")
+            return []
+    def visualize_results(self, image, text_boxes, detections):
+        """Create visualization with detected objects and text"""
+        image_draw = image.copy().convert("RGB")
+        draw = ImageDraw.Draw(image_draw)
+        # Try to load a better font, fall back to default if necessary
+        try:
+            font = ImageFont.truetype("Arial", 12)
+        except IOError:
+            font = ImageFont.load_default()
+        # Draw text bounding boxes
+        for box, text in text_boxes:
+            # Convert box points to rectangle coordinates
+            points = np.array(box).astype(np.int32)
+            draw.polygon([tuple(p) for p in points], outline="blue", width=2)
+            # Add small text label
+            draw.text((points[0][0], points[0][1] - 10), "Text", fill="blue", font=font)
+        # Draw object detection boxes
+        for det in detections:
+            box = det['box']
+            label = det['label']
+            score = det['score']
+            if score > 0.6:  # Higher confidence threshold
+                draw.rectangle(
+                    [box['xmin'], box['ymin'], box['xmax'], box['ymax']],
+                    outline="red",
+                    width=3
+                )
+                label_text = f"{label} ({score:.2f})"
+                draw.text((box['xmin'], box['ymin'] - 15), label_text, fill="red", font=font)
+        return image_draw
+    def process_image(self, image, language_choice):
+        """Main processing pipeline"""
+        if image is None:
+            return (
+                None,
+                "No image provided. Please upload an image.",
+                "No image to process."
             )
+        # Convert to numpy array if needed
+        if not isinstance(image, np.ndarray):
+            image = np.array(image)
+        # Create PIL image for visualization
+        image_pil = Image.fromarray(image)
+        # Extract text
+        extracted_text, text_boxes = self.extract_text(image, language_choice)
+        # Translate text
+        translation = self.translate_text(extracted_text)
+        # Detect objects
+        detections = self.detect_objects(image_pil)
+        # Create visualization
+        result_image = self.visualize_results(image_pil, text_boxes, detections)
+        return result_image, extracted_text, translation
+# Create system instance
+smart_glasses = SmartGlassesSystem()
+def create_interface():
+    """Create and configure the Gradio interface"""
+    # Custom CSS for better appearance
+    custom_css = """
+    .gradio-container {
+        background-color: #f0f4f8;
+    }
+    .output-image {
+        border: 2px solid #2c3e50;
+        border-radius: 5px;
+    }
+    """
+    # Create interface
+    with gr.Blocks(css=custom_css, title="🚨 Police Smart Glasses - AI Demo") as iface:
+        gr.Markdown("""
+        # 🚨 Police Smart Glasses - Advanced AI Demo
+        This system demonstrates real-time text recognition, translation, and object detection capabilities
+        for law enforcement smart glasses technology.
+        ### Instructions:
+        1. Upload an image containing text in the selected language
+        2. Choose the primary language in the image
+        3. View the detection results, extracted text, and English translation
+        """)
+        with gr.Row():
+            with gr.Column(scale=1):
+                # Input components
+                input_image = gr.Image(
+                    type="pil",
+                    label="Upload an Image (e.g., Signs, Documents, License Plates)"
+                )
+                language_choice = gr.Dropdown(
+                    choices=list(smart_glasses.supported_languages.keys()) + ["Other"],
+                    value="Arabic",
+                    label="Select Primary Language in Image"
+                )
+                process_btn = gr.Button("Process Image", variant="primary")
+            with gr.Column(scale=1):
+                # Output components
+                output_image = gr.Image(label="Analysis Results")
+                extracted_text = gr.Textbox(label="Extracted Text")
+                translated_text = gr.Textbox(label="Translated Text (English)")
+        # Set up processing function
+        process_btn.click(
+            fn=smart_glasses.process_image,
+            inputs=[input_image, language_choice],
+            outputs=[output_image, extracted_text, translated_text]
+        )
+        # Examples for testing
+        gr.Examples(
+            examples=[
+                ["examples/arabic_sign.jpg", "Arabic"],
+                ["examples/hindi_text.jpg", "Hindi"],
+                ["examples/russian_document.jpg", "Russian"]
+            ],
+            inputs=[input_image, language_choice]
+        )
+        # System information
+        with gr.Accordion("System Information", open=False):
+            gr.Markdown(f"""
+            - **Device**: {'GPU' if using_gpu else 'CPU'}
+            - **Supported Languages**: {', '.join(smart_glasses.supported_languages.keys())}
+            - **AI Models**:
+                - OCR: EasyOCR
+                - Translation: Helsinki-NLP/opus-mt-mul-en
+                - Object Detection: facebook/detr-resnet-50
+            """)
+    return iface
 if __name__ == "__main__":
+    # Create and launch interface
+    iface = create_interface()
+    iface.launch(
+        share=True,  # Enable sharing
+        enable_queue=True,  # Enable queue for better handling of multiple users
+        debug=True   # Show debugging information
+    )