Spaces:

shukdevdatta123
/

MedicineOCR

Running

App Files Files Community

shukdevdatta123 commited on 10 days ago

Commit

97b296f

verified ·

1 Parent(s): 4905934

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -63

app.py CHANGED Viewed

@@ -1,83 +1,84 @@
 import gradio as gr
-from openai import OpenAI
-import os
-def extract_medicine_names(api_key, image):
-    """Extract medicine names from a prescription image using OpenRouter and Gemini."""
-    if not api_key.strip():
-        return "Error: Please provide a valid OpenRouter API key."
-    try:
-        client = OpenAI(
-            base_url="https://openrouter.ai/api/v1",
-            api_key=api_key,
-        )
-        completion = client.chat.completions.create(
-            extra_headers={
-                "HTTP-Referer": "gradio-medicine-extractor-app",
-                "X-Title": "Medicine Name Extractor",
-            },
-            model="google/gemini-2.0-flash-exp:free",
-            messages=[
                 {
-                    "role": "user",
-                    "content": [
-                        {
-                            "type": "text",
-                            "text": "This is a medical prescription image. Please analyze it and ONLY extract the medicine names. Return just a bulleted list of medicine names found, nothing else. If you can't identify any medicines or this isn't a prescription, please respond with 'No medicine names detected'."
-                        },
-                        {
-                            "type": "image_url",
-                            "image_url": {
-                                "url": image
-                            }
-                        }
-                    ]
-                }
-            ]
-        )
-        return completion.choices[0].message.content
-    except Exception as e:
-        return f"Error: {str(e)}"
-# Create the Gradio interface
 with gr.Blocks(title="Medicine Name Extractor") as app:
-    gr.Markdown("# Medicine Name Extractor from Prescriptions")
-    gr.Markdown("Upload a prescription image to extract medicine names.")
     with gr.Row():
         with gr.Column():
-            api_key = gr.Textbox(
-                label="OpenRouter API Key",
-                placeholder="Enter your OpenRouter API key",
-                type="password"
-            )
-            img_input = gr.Image(
-                label="Upload Prescription Image",
-                type="filepath"
-            )
-            extract_btn = gr.Button("Extract Medicine Names")
         with gr.Column():
-            output = gr.Textbox(label="Extracted Medicine Names", lines=10)
     extract_btn.click(
         fn=extract_medicine_names,
-        inputs=[api_key, img_input],
-        outputs=output
     )
-    gr.Markdown("""
-    ## How to use:
-    1. Enter your OpenRouter API key
-    2. Upload a prescription image
-    3. Click 'Extract Medicine Names'
-    The app will process the image and extract only the medicine names from the prescription.
-    """)
 # Launch the app
 if __name__ == "__main__":

 import gradio as gr
+import torch
+from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
+from qwen_vl_utils import process_vision_info
+import re
+# Load the model on CPU
+def load_model():
+    model = Qwen2VLForConditionalGeneration.from_pretrained(
+        "prithivMLmods/Qwen2-VL-OCR-2B-Instruct",
+        torch_dtype=torch.float32,
+        device_map="cpu"
+    )
+    processor = AutoProcessor.from_pretrained("prithivMLmods/Qwen2-VL-OCR-2B-Instruct")
+    return model, processor
+# Function to extract medicine names
+def extract_medicine_names(image):
+    model, processor = load_model()
+    # Prepare the message with the specific prompt for medicine extraction
+    messages = [
+        {
+            "role": "user",
+            "content": [
                 {
+                    "type": "image",
+                    "image": image,
+                },
+                {"type": "text", "text": "Extract and list ONLY the names of medicines/drugs from this prescription image. Output the medicine names as a numbered list without any additional information or descriptions."},
+            ],
+        }
+    ]
+    # Prepare for inference
+    text = processor.apply_chat_template(
+        messages, tokenize=False, add_generation_prompt=True
+    )
+    image_inputs, video_inputs = process_vision_info(messages)
+    inputs = processor(
+        text=[text],
+        images=image_inputs,
+        videos=video_inputs,
+        padding=True,
+        return_tensors="pt",
+    )
+    # Generate output
+    generated_ids = model.generate(**inputs, max_new_tokens=256)
+    generated_ids_trimmed = [
+        out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+    ]
+    output_text = processor.batch_decode(
+        generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+    )[0]
+    return output_text
+# Create Gradio interface
 with gr.Blocks(title="Medicine Name Extractor") as app:
+    gr.Markdown("# Medicine Name Extractor")
+    gr.Markdown("Upload a medical prescription image to extract the names of medicines.")
     with gr.Row():
         with gr.Column():
+            input_image = gr.Image(type="pil", label="Upload Prescription Image")
+            extract_btn = gr.Button("Extract Medicine Names", variant="primary")
         with gr.Column():
+            output_text = gr.Textbox(label="Extracted Medicine Names", lines=10)
     extract_btn.click(
         fn=extract_medicine_names,
+        inputs=input_image,
+        outputs=output_text
     )
+    gr.Markdown("### Notes")
+    gr.Markdown("- This tool uses the Qwen2-VL-OCR model to extract text from prescription images")
+    gr.Markdown("- For best results, ensure the prescription image is clear and readable")
+    gr.Markdown("- Processing may take some time as the model runs on CPU")
 # Launch the app
 if __name__ == "__main__":