Spaces:

Nayera-2025
/

Police-Vision-Translator

Sleeping

App Files Files Community

Nayera-2025 commited on 6 days ago

Commit

925a7bd

verified ·

1 Parent(s): 752fac0

Create app.py

Browse files

Files changed (1) hide show

app.py +128 -0

app.py ADDED Viewed

	@@ -0,0 +1,128 @@

+import gradio as gr
+from paddleocr import PaddleOCR
+import numpy as np
+import openai
+import os
+from langdetect import detect
+# Initialize PaddleOCR
+ocr_reader = PaddleOCR(use_angle_cls=True, lang='en')
+# Initialize Whisper Model via Hugging Face Transformers
+from transformers import pipeline
+whisper_model = pipeline(
+    task="automatic-speech-recognition",
+    model="openai/whisper-small",
+    device=0
+)
+# Set your OpenAI API Key (you should set this securely in your environment)
+openai.api_key = os.getenv("OPENAI_API_KEY")
+def detect_language(text):
+    try:
+        lang = detect(text)
+    except:
+        lang = "unknown"
+    return lang
+def gpt_clean_and_translate(text, target_language):
+    if not text.strip():
+        return "No text detected.", ""
+    prompt = f"""
+You are an expert document reader and translator. You will receive a noisy extracted text from a government ID. Your tasks:
+1. Identify and extract these fields: Name, Address, Date of Birth, Expiry Date, Class, Sex.
+2. Output the information in full English sentences.
+3. Translate the full text into {target_language}.
+If the target language is English, just output clean English sentences.
+"""
+    response = openai.ChatCompletion.create(
+        model="gpt-4o",
+        messages=[
+            {"role": "system", "content": prompt},
+            {"role": "user", "content": text}
+        ],
+        temperature=0.2
+    )
+    cleaned_translation = response["choices"][0]["message"]["content"].strip()
+    return cleaned_translation
+def process_document(image, target_language, language_group):
+    if not isinstance(image, np.ndarray):
+        image = np.array(image)
+    # OCR - Text Extraction using PaddleOCR
+    ocr_result = ocr_reader.ocr(image)
+    extracted_texts = []
+    for line in ocr_result[0]:
+        text = line[1][0]
+        extracted_texts.append(text)
+    extracted_text = " ".join(extracted_texts)
+    # Language Detection
+    source_language = detect_language(extracted_text)
+    # GPT Cleaning and Translation
+    translation = gpt_clean_and_translate(extracted_text, target_language)
+    return extracted_text, source_language, translation
+def process_audio(audio, target_language):
+    # Speech Recognition
+    result = whisper_model(audio)
+    extracted_text = result['text']
+    # Language Detection
+    source_language = detect_language(extracted_text)
+    # GPT Cleaning and Translation
+    translation = gpt_clean_and_translate(extracted_text, target_language)
+    return extracted_text, source_language, translation
+# Gradio Interface
+document_interface = gr.Interface(
+    fn=process_document,
+    inputs=[
+        gr.Image(type="pil", label="Upload a Document Image (e.g., Passport, ID, Government Form)"),
+        gr.Radio(choices=["English", "Arabic"], label="Translate To"),
+        gr.Dropdown(choices=["Arabic", "Russian", "Other (French, English)"], label="Document Language Group")
+    ],
+    outputs=[
+        gr.Textbox(label="Extracted Text"),
+        gr.Textbox(label="Detected Source Language"),
+        gr.Textbox(label="Translated and Structured Text")
+    ],
+    title="🚨 Police Vision & Translator - Document Scanner",
+    description="Upload an image document. The system will auto-detect the source language and generate clean translated output."
+)
+audio_interface = gr.Interface(
+    fn=process_audio,
+    inputs=[
+        gr.Audio(type="filepath", label="Record Audio (max 30 sec)"),
+        gr.Radio(choices=["English", "Arabic"], label="Translate To")
+    ],
+    outputs=[
+        gr.Textbox(label="Transcribed Text"),
+        gr.Textbox(label="Detected Source Language"),
+        gr.Textbox(label="Translated and Structured Text")
+    ],
+    title="🚨 Police Vision & Translator - Voice Translator",
+    description="Record audio. The system will auto-detect the source language and generate clean translated output."
+)
+# Combine the Interfaces
+app = gr.TabbedInterface(
+    [document_interface, audio_interface],
+    ["Document Translator", "Voice Translator"]
+)
+if __name__ == "__main__":
+    app.launch(share=True)