Nayera-2025 commited on
Commit
925a7bd
·
verified ·
1 Parent(s): 752fac0

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +128 -0
app.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from paddleocr import PaddleOCR
3
+ import numpy as np
4
+ import openai
5
+ import os
6
+ from langdetect import detect
7
+
8
+ # Initialize PaddleOCR
9
+ ocr_reader = PaddleOCR(use_angle_cls=True, lang='en')
10
+
11
+ # Initialize Whisper Model via Hugging Face Transformers
12
+ from transformers import pipeline
13
+ whisper_model = pipeline(
14
+ task="automatic-speech-recognition",
15
+ model="openai/whisper-small",
16
+ device=0
17
+ )
18
+
19
+ # Set your OpenAI API Key (you should set this securely in your environment)
20
+ openai.api_key = os.getenv("OPENAI_API_KEY")
21
+
22
+ def detect_language(text):
23
+ try:
24
+ lang = detect(text)
25
+ except:
26
+ lang = "unknown"
27
+ return lang
28
+
29
+ def gpt_clean_and_translate(text, target_language):
30
+ if not text.strip():
31
+ return "No text detected.", ""
32
+
33
+ prompt = f"""
34
+ You are an expert document reader and translator. You will receive a noisy extracted text from a government ID. Your tasks:
35
+
36
+ 1. Identify and extract these fields: Name, Address, Date of Birth, Expiry Date, Class, Sex.
37
+ 2. Output the information in full English sentences.
38
+ 3. Translate the full text into {target_language}.
39
+ If the target language is English, just output clean English sentences.
40
+ """
41
+
42
+ response = openai.ChatCompletion.create(
43
+ model="gpt-4o",
44
+ messages=[
45
+ {"role": "system", "content": prompt},
46
+ {"role": "user", "content": text}
47
+ ],
48
+ temperature=0.2
49
+ )
50
+
51
+ cleaned_translation = response["choices"][0]["message"]["content"].strip()
52
+ return cleaned_translation
53
+
54
+ def process_document(image, target_language, language_group):
55
+ if not isinstance(image, np.ndarray):
56
+ image = np.array(image)
57
+
58
+ # OCR - Text Extraction using PaddleOCR
59
+ ocr_result = ocr_reader.ocr(image)
60
+
61
+ extracted_texts = []
62
+ for line in ocr_result[0]:
63
+ text = line[1][0]
64
+ extracted_texts.append(text)
65
+
66
+ extracted_text = " ".join(extracted_texts)
67
+
68
+ # Language Detection
69
+ source_language = detect_language(extracted_text)
70
+
71
+ # GPT Cleaning and Translation
72
+ translation = gpt_clean_and_translate(extracted_text, target_language)
73
+
74
+ return extracted_text, source_language, translation
75
+
76
+ def process_audio(audio, target_language):
77
+ # Speech Recognition
78
+ result = whisper_model(audio)
79
+ extracted_text = result['text']
80
+
81
+ # Language Detection
82
+ source_language = detect_language(extracted_text)
83
+
84
+ # GPT Cleaning and Translation
85
+ translation = gpt_clean_and_translate(extracted_text, target_language)
86
+
87
+ return extracted_text, source_language, translation
88
+
89
+ # Gradio Interface
90
+ document_interface = gr.Interface(
91
+ fn=process_document,
92
+ inputs=[
93
+ gr.Image(type="pil", label="Upload a Document Image (e.g., Passport, ID, Government Form)"),
94
+ gr.Radio(choices=["English", "Arabic"], label="Translate To"),
95
+ gr.Dropdown(choices=["Arabic", "Russian", "Other (French, English)"], label="Document Language Group")
96
+ ],
97
+ outputs=[
98
+ gr.Textbox(label="Extracted Text"),
99
+ gr.Textbox(label="Detected Source Language"),
100
+ gr.Textbox(label="Translated and Structured Text")
101
+ ],
102
+ title="🚨 Police Vision & Translator - Document Scanner",
103
+ description="Upload an image document. The system will auto-detect the source language and generate clean translated output."
104
+ )
105
+
106
+ audio_interface = gr.Interface(
107
+ fn=process_audio,
108
+ inputs=[
109
+ gr.Audio(type="filepath", label="Record Audio (max 30 sec)"),
110
+ gr.Radio(choices=["English", "Arabic"], label="Translate To")
111
+ ],
112
+ outputs=[
113
+ gr.Textbox(label="Transcribed Text"),
114
+ gr.Textbox(label="Detected Source Language"),
115
+ gr.Textbox(label="Translated and Structured Text")
116
+ ],
117
+ title="🚨 Police Vision & Translator - Voice Translator",
118
+ description="Record audio. The system will auto-detect the source language and generate clean translated output."
119
+ )
120
+
121
+ # Combine the Interfaces
122
+ app = gr.TabbedInterface(
123
+ [document_interface, audio_interface],
124
+ ["Document Translator", "Voice Translator"]
125
+ )
126
+
127
+ if __name__ == "__main__":
128
+ app.launch(share=True)