shukdevdatta123 commited on
Commit
97b296f
·
verified ·
1 Parent(s): 4905934

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -63
app.py CHANGED
@@ -1,83 +1,84 @@
1
  import gradio as gr
2
- from openai import OpenAI
3
- import os
 
 
4
 
5
- def extract_medicine_names(api_key, image):
6
- """Extract medicine names from a prescription image using OpenRouter and Gemini."""
7
- if not api_key.strip():
8
- return "Error: Please provide a valid OpenRouter API key."
 
 
 
 
 
 
 
 
 
9
 
10
- try:
11
- client = OpenAI(
12
- base_url="https://openrouter.ai/api/v1",
13
- api_key=api_key,
14
- )
15
-
16
- completion = client.chat.completions.create(
17
- extra_headers={
18
- "HTTP-Referer": "gradio-medicine-extractor-app",
19
- "X-Title": "Medicine Name Extractor",
20
- },
21
- model="google/gemini-2.0-flash-exp:free",
22
- messages=[
23
  {
24
- "role": "user",
25
- "content": [
26
- {
27
- "type": "text",
28
- "text": "This is a medical prescription image. Please analyze it and ONLY extract the medicine names. Return just a bulleted list of medicine names found, nothing else. If you can't identify any medicines or this isn't a prescription, please respond with 'No medicine names detected'."
29
- },
30
- {
31
- "type": "image_url",
32
- "image_url": {
33
- "url": image
34
- }
35
- }
36
- ]
37
- }
38
- ]
39
- )
40
-
41
- return completion.choices[0].message.content
42
 
43
- except Exception as e:
44
- return f"Error: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
- # Create the Gradio interface
47
  with gr.Blocks(title="Medicine Name Extractor") as app:
48
- gr.Markdown("# Medicine Name Extractor from Prescriptions")
49
- gr.Markdown("Upload a prescription image to extract medicine names.")
50
 
51
  with gr.Row():
52
  with gr.Column():
53
- api_key = gr.Textbox(
54
- label="OpenRouter API Key",
55
- placeholder="Enter your OpenRouter API key",
56
- type="password"
57
- )
58
- img_input = gr.Image(
59
- label="Upload Prescription Image",
60
- type="filepath"
61
- )
62
- extract_btn = gr.Button("Extract Medicine Names")
63
 
64
  with gr.Column():
65
- output = gr.Textbox(label="Extracted Medicine Names", lines=10)
66
 
67
  extract_btn.click(
68
  fn=extract_medicine_names,
69
- inputs=[api_key, img_input],
70
- outputs=output
71
  )
72
 
73
- gr.Markdown("""
74
- ## How to use:
75
- 1. Enter your OpenRouter API key
76
- 2. Upload a prescription image
77
- 3. Click 'Extract Medicine Names'
78
-
79
- The app will process the image and extract only the medicine names from the prescription.
80
- """)
81
 
82
  # Launch the app
83
  if __name__ == "__main__":
 
1
  import gradio as gr
2
+ import torch
3
+ from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
4
+ from qwen_vl_utils import process_vision_info
5
+ import re
6
 
7
+ # Load the model on CPU
8
+ def load_model():
9
+ model = Qwen2VLForConditionalGeneration.from_pretrained(
10
+ "prithivMLmods/Qwen2-VL-OCR-2B-Instruct",
11
+ torch_dtype=torch.float32,
12
+ device_map="cpu"
13
+ )
14
+ processor = AutoProcessor.from_pretrained("prithivMLmods/Qwen2-VL-OCR-2B-Instruct")
15
+ return model, processor
16
+
17
+ # Function to extract medicine names
18
+ def extract_medicine_names(image):
19
+ model, processor = load_model()
20
 
21
+ # Prepare the message with the specific prompt for medicine extraction
22
+ messages = [
23
+ {
24
+ "role": "user",
25
+ "content": [
 
 
 
 
 
 
 
 
26
  {
27
+ "type": "image",
28
+ "image": image,
29
+ },
30
+ {"type": "text", "text": "Extract and list ONLY the names of medicines/drugs from this prescription image. Output the medicine names as a numbered list without any additional information or descriptions."},
31
+ ],
32
+ }
33
+ ]
 
 
 
 
 
 
 
 
 
 
 
34
 
35
+ # Prepare for inference
36
+ text = processor.apply_chat_template(
37
+ messages, tokenize=False, add_generation_prompt=True
38
+ )
39
+ image_inputs, video_inputs = process_vision_info(messages)
40
+ inputs = processor(
41
+ text=[text],
42
+ images=image_inputs,
43
+ videos=video_inputs,
44
+ padding=True,
45
+ return_tensors="pt",
46
+ )
47
+
48
+ # Generate output
49
+ generated_ids = model.generate(**inputs, max_new_tokens=256)
50
+ generated_ids_trimmed = [
51
+ out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
52
+ ]
53
+ output_text = processor.batch_decode(
54
+ generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
55
+ )[0]
56
+
57
+ return output_text
58
 
59
+ # Create Gradio interface
60
  with gr.Blocks(title="Medicine Name Extractor") as app:
61
+ gr.Markdown("# Medicine Name Extractor")
62
+ gr.Markdown("Upload a medical prescription image to extract the names of medicines.")
63
 
64
  with gr.Row():
65
  with gr.Column():
66
+ input_image = gr.Image(type="pil", label="Upload Prescription Image")
67
+ extract_btn = gr.Button("Extract Medicine Names", variant="primary")
 
 
 
 
 
 
 
 
68
 
69
  with gr.Column():
70
+ output_text = gr.Textbox(label="Extracted Medicine Names", lines=10)
71
 
72
  extract_btn.click(
73
  fn=extract_medicine_names,
74
+ inputs=input_image,
75
+ outputs=output_text
76
  )
77
 
78
+ gr.Markdown("### Notes")
79
+ gr.Markdown("- This tool uses the Qwen2-VL-OCR model to extract text from prescription images")
80
+ gr.Markdown("- For best results, ensure the prescription image is clear and readable")
81
+ gr.Markdown("- Processing may take some time as the model runs on CPU")
 
 
 
 
82
 
83
  # Launch the app
84
  if __name__ == "__main__":