Spaces:

shukdevdatta123
/

MedicineOCR

Running

App Files Files Community

shukdevdatta123 commited on 10 days ago

Commit

beecb06

verified ·

1 Parent(s): 7fb8860

Update app.py

Browse files

Files changed (1) hide show

app.py +102 -120

app.py CHANGED Viewed

@@ -1,146 +1,128 @@
 import gradio as gr
 import torch
-from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
-from qwen_vl_utils import process_vision_info
-import re
-# Load the model on CPU
-def load_model():
-    model = Qwen2VLForConditionalGeneration.from_pretrained(
-        "prithivMLmods/Qwen2-VL-OCR-2B-Instruct",
-        torch_dtype=torch.float32,
-        device_map="cpu"
-    )
-    processor = AutoProcessor.from_pretrained("prithivMLmods/Qwen2-VL-OCR-2B-Instruct")
-    return model, processor
-# Function to extract medicine names
-def extract_medicine_names(image):
-    model, processor = load_model()
-    # Prepare the message with the specific prompt for medicine extraction
-    messages = [
-        {
-            "role": "user",
-            "content": [
-                {
-                    "type": "image",
-                    "image": image,
-                },
-                {"type": "text", "text": "Extract and list ONLY the names of medicines/drugs from this prescription image. Output the medicine names as a numbered list without any additional information or descriptions."},
-            ],
-        }
-    ]
-    # Prepare for inference
-    text = processor.apply_chat_template(
-        messages, tokenize=False, add_generation_prompt=True
-    )
-    image_inputs, video_inputs = process_vision_info(messages)
-    inputs = processor(
-        text=[text],
-        images=image_inputs,
-        videos=video_inputs,
-        padding=True,
-        return_tensors="pt",
-    )
-    # Generate output
-    generated_ids = model.generate(**inputs, max_new_tokens=256)
-    generated_ids_trimmed = [
-        out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
-    ]
-    output_text = processor.batch_decode(
-        generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
-    )[0]
-    # Remove <|im_end|> and any other special tokens that might appear in the output
-    output_text = output_text.replace("<|im_end|>", "").strip()
-    return output_text
-# Create a singleton model and processor to avoid reloading for each request
-model_instance = None
-processor_instance = None
-def get_model_and_processor():
-    global model_instance, processor_instance
-    if model_instance is None or processor_instance is None:
-        model_instance, processor_instance = load_model()
-    return model_instance, processor_instance
-# Optimized extraction function that uses the singleton model
-def extract_medicine_names_optimized(image):
-    if image is None:
-        return "Please upload an image."
-    model, processor = get_model_and_processor()
-    # Prepare the message with the specific prompt for medicine extraction
-    messages = [
-        {
-            "role": "user",
-            "content": [
-                {
-                    "type": "image",
-                    "image": image,
-                },
-                {"type": "text", "text": "Extract and list ONLY the names of medicines/drugs from this prescription image. Output the medicine names as a numbered list without any additional information or descriptions."},
-            ],
-        }
-    ]
-    # Prepare for inference
-    text = processor.apply_chat_template(
-        messages, tokenize=False, add_generation_prompt=True
-    )
-    image_inputs, video_inputs = process_vision_info(messages)
     inputs = processor(
-        text=[text],
-        images=image_inputs,
-        videos=video_inputs,
-        padding=True,
         return_tensors="pt",
-    )
-    # Generate output
-    generated_ids = model.generate(**inputs, max_new_tokens=256)
-    generated_ids_trimmed = [
-        out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
-    ]
-    output_text = processor.batch_decode(
-        generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
-    )[0]
-    # Remove <|im_end|> and any other special tokens that might appear in the output
-    output_text = output_text.replace("<|im_end|>", "").strip()
-    return output_text
-# Create Gradio interface
-with gr.Blocks(title="Medicine Name Extractor") as app:
     gr.Markdown("# Medicine Name Extractor")
-    gr.Markdown("Upload a medical prescription image to extract the names of medicines.")
     with gr.Row():
         with gr.Column():
-            input_image = gr.Image(type="pil", label="Upload Prescription Image")
             extract_btn = gr.Button("Extract Medicine Names", variant="primary")
         with gr.Column():
-            output_text = gr.Textbox(label="Extracted Medicine Names", lines=10)
     extract_btn.click(
-        fn=extract_medicine_names_optimized,
-        inputs=input_image,
-        outputs=output_text
     )
-    gr.Markdown("### Notes")
-    gr.Markdown("- This tool uses the Qwen2-VL-OCR model to extract text from prescription images")
-    gr.Markdown("- For best results, ensure the prescription image is clear and readable")
-    gr.Markdown("- Processing may take some time as the model runs on CPU")
-# Launch the app
-if __name__ == "__main__":
-    app.launch()

 import gradio as gr
+from transformers.image_utils import load_image
+from threading import Thread
+import time
 import torch
+from PIL import Image
+from transformers import (
+    Qwen2VLForConditionalGeneration,
+    AutoProcessor,
+    TextIteratorStreamer,
+)
+# ---------------------------
+# Helper Functions
+# ---------------------------
+def progress_bar_html(label: str, primary_color: str = "#4B0082", secondary_color: str = "#9370DB") -> str:
+    """
+    Returns an HTML snippet for a thin animated progress bar with a label.
+    """
+    return f'''
+<div style="display: flex; align-items: center;">
+    <span style="margin-right: 10px; font-size: 14px;">{label}</span>
+    <div style="width: 110px; height: 5px; background-color: {secondary_color}; border-radius: 2px; overflow: hidden;">
+        <div style="width: 100%; height: 100%; background-color: {primary_color}; animation: loading 1.5s linear infinite;"></div>
+    </div>
+</div>
+<style>
+@keyframes loading {{
+    0% {{ transform: translateX(-100%); }}
+    100% {{ transform: translateX(100%); }}
+}}
+</style>
+    '''
+# Model and Processor Setup - CPU version
+MODEL_ID = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
+processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
+model = Qwen2VLForConditionalGeneration.from_pretrained(
+    MODEL_ID,
+    trust_remote_code=True,
+    torch_dtype=torch.float32  # Using float32 for CPU compatibility
+).to("cpu").eval()
+# Main Inference Function
+def extract_medicines(image_files):
+    """Extract medicine names from prescription images."""
+    if not image_files:
+        return "Please upload a prescription image."
+    images = [load_image(image) for image in image_files]
+    # Specific prompt to extract only medicine names
+    text = "Extract ONLY the names of medications/medicines from this prescription image. Format the output as a numbered list of medicine names only, without dosages or instructions."
+    messages = [{
+        "role": "user",
+        "content": [
+            *[{"type": "image", "image": image} for image in images],
+            {"type": "text", "text": text},
+        ],
+    }]
+    prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     inputs = processor(
+        text=[prompt_full],
+        images=images,
         return_tensors="pt",
+        padding=True,
+    ).to("cpu")
+    streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
+    generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)
+    thread = Thread(target=model.generate, kwargs=generation_kwargs)
+    thread.start()
+    buffer = ""
+    yield progress_bar_html("Extracting Medicine Names")
+    for new_text in streamer:
+        buffer += new_text
+        buffer = buffer.replace("<|im_end|>", "")
+        time.sleep(0.01)
+        yield buffer
+# Gradio Interface
+with gr.Blocks() as demo:
     gr.Markdown("# Medicine Name Extractor")
+    gr.Markdown("Upload prescription images to extract medicine names")
     with gr.Row():
         with gr.Column():
+            image_input = gr.File(
+                label="Upload Prescription Image(s)",
+                file_count="multiple",
+                file_types=["image"]
+            )
             extract_btn = gr.Button("Extract Medicine Names", variant="primary")
         with gr.Column():
+            output = gr.Markdown(label="Extracted Medicine Names")
     extract_btn.click(
+        fn=extract_medicines,
+        inputs=image_input,
+        outputs=output
+    )
+    gr.Examples(
+        examples=[
+            ["examples/prescription1.jpg"],
+            ["examples/prescription2.jpg"],
+        ],
+        inputs=image_input,
+        outputs=output,
+        fn=extract_medicines,
+        cache_examples=True,
     )
+    gr.Markdown("""
+    ### Notes:
+    - This app is optimized to run on CPU
+    - Upload clear images of prescriptions for best results
+    - Only medicine names will be extracted
+    """)
+demo.queue()
+demo.launch(debug=True)