File size: 3,252 Bytes
dd2ba72
 
2405743
1f353b4
2405743
1f353b4
4735088
2405743
9efaee0
2405743
307c8f3
1dd705c
307c8f3
 
d85faf4
2405743
 
581d1f5
2405743
307c8f3
 
 
dd2ba72
e51c033
307c8f3
 
 
 
 
 
 
 
2405743
307c8f3
 
 
 
2405743
 
307c8f3
2405743
 
 
 
 
 
dd2ba72
2405743
e51c033
1f353b4
 
2405743
1f353b4
2405743
1f353b4
58a562d
 
 
 
2405743
58a562d
 
2405743
58a562d
 
 
 
2405743
58a562d
2405743
 
 
58a562d
 
 
 
2405743
1f353b4
2405743
 
58a562d
2405743
1f353b4
58a562d
1f353b4
 
58a562d
dd2ba72
2405743
58a562d
307c8f3
dd2ba72
1f353b4
 
28eb4e5
1f353b4
f84d408
 
1f353b4
 
 
2405743
 
1f353b4
dd2ba72
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import gradio as gr
import cv2
import easyocr
import numpy as np
import os
from PIL import Image
from ultralytics import YOLO
from datetime import datetime

# Load YOLO model
model = YOLO("/home/user/app/best.pt")

# Label map
label_map = {0: "Analog", 1: "Digital", 2: "Non-LP"}

# EasyOCR Bengali
reader = easyocr.Reader(['bn'])

def annotate_frame(frame):
    input_img = cv2.resize(frame, (640, 640))
    results = model(input_img)[0]
    detections = results.boxes.data.cpu().numpy()

    for det in detections:
        if len(det) < 6:
            continue

        x1, y1, x2, y2, conf, cls = det
        x1, y1, x2, y2 = map(int, [x1, y1, x2, y2])
        label = label_map.get(int(cls), "Unknown")
        percent = f"{conf * 100:.2f}%"

        # Draw box and label
        cv2.rectangle(input_img, (x1, y1), (x2, y2), (255, 0, 0), 2)
        cv2.putText(input_img, f"{label}: {percent}", (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

        # OCR
        cropped = frame[y1:y2, x1:x2]
        if cropped.size > 0:
            ocr_result = reader.readtext(cropped)
            for i, item in enumerate(ocr_result):
                text = item[1].strip()
                conf = item[2]
                cv2.putText(input_img, text, (x1, y2 + 20 + i*25),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)

    return cv2.cvtColor(input_img, cv2.COLOR_BGR2RGB)

def process_input(input_file):
    file_path = input_file.name
    ext = os.path.splitext(file_path)[-1].lower()

    if ext in ['.mp4', '.avi', '.mov']:
        cap = cv2.VideoCapture(file_path)

        if not cap.isOpened():
            return None, "Could not open video file", ""

        fps = cap.get(cv2.CAP_PROP_FPS)
        width = 640
        height = 640

        # Generate safe output path
        output_path = f"annotated_output.mp4"
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

        while True:
            ret, frame = cap.read()
            if not ret:
                break
            frame_resized = cv2.resize(frame, (640, 640))
            annotated_frame = annotate_frame(frame_resized)
            bgr_frame = cv2.cvtColor(annotated_frame, cv2.COLOR_RGB2BGR)
            out.write(bgr_frame)

        cap.release()
        out.release()

        return output_path, "Bangla text in video (see frames)", "OCR confidence displayed on frames"

    else:
        # Handle image
        frame = cv2.imread(file_path)
        if frame is None:
            return None, "Invalid image file", ""

        annotated = annotate_frame(frame)
        return Image.fromarray(annotated), "Bangla text in image", ""


interface = gr.Interface(
    fn=process_input,
    inputs=gr.File(type="filepath", label="Upload Image or Video"),
    outputs=[
        gr.Video(label="Output Video"),
        gr.Image(type="pil", label="Output Image"),
        gr.Textbox(label="Detected Text (Bangla)"),
        gr.Textbox(label="Confidence (%)")
    ],
    title="YOLOv5 License Plate Detector (Bangla OCR)",
    description="Upload an image or video. Detects license plates and extracts Bangla text using EasyOCR."
)

interface.launch()