shukdevdatta123 commited on
Commit
f787f97
·
verified ·
1 Parent(s): 4c76bbb

Create v1.txt

Browse files
Files changed (1) hide show
  1. v1.txt +85 -0
v1.txt ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
4
+ from qwen_vl_utils import process_vision_info
5
+ import re
6
+
7
+ # Load the model on CPU
8
+ def load_model():
9
+ model = Qwen2VLForConditionalGeneration.from_pretrained(
10
+ "prithivMLmods/Qwen2-VL-OCR-2B-Instruct",
11
+ torch_dtype=torch.float32,
12
+ device_map="cpu"
13
+ )
14
+ processor = AutoProcessor.from_pretrained("prithivMLmods/Qwen2-VL-OCR-2B-Instruct")
15
+ return model, processor
16
+
17
+ # Function to extract medicine names
18
+ def extract_medicine_names(image):
19
+ model, processor = load_model()
20
+
21
+ # Prepare the message with the specific prompt for medicine extraction
22
+ messages = [
23
+ {
24
+ "role": "user",
25
+ "content": [
26
+ {
27
+ "type": "image",
28
+ "image": image,
29
+ },
30
+ {"type": "text", "text": "Extract and list ONLY the names of medicines/drugs from this prescription image. Output the medicine names as a numbered list without any additional information or descriptions."},
31
+ ],
32
+ }
33
+ ]
34
+
35
+ # Prepare for inference
36
+ text = processor.apply_chat_template(
37
+ messages, tokenize=False, add_generation_prompt=True
38
+ )
39
+ image_inputs, video_inputs = process_vision_info(messages)
40
+ inputs = processor(
41
+ text=[text],
42
+ images=image_inputs,
43
+ videos=video_inputs,
44
+ padding=True,
45
+ return_tensors="pt",
46
+ )
47
+
48
+ # Generate output
49
+ generated_ids = model.generate(**inputs, max_new_tokens=256)
50
+ generated_ids_trimmed = [
51
+ out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
52
+ ]
53
+ output_text = processor.batch_decode(
54
+ generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
55
+ )[0]
56
+
57
+ return output_text
58
+
59
+ # Create Gradio interface
60
+ with gr.Blocks(title="Medicine Name Extractor") as app:
61
+ gr.Markdown("# Medicine Name Extractor")
62
+ gr.Markdown("Upload a medical prescription image to extract the names of medicines.")
63
+
64
+ with gr.Row():
65
+ with gr.Column():
66
+ input_image = gr.Image(type="pil", label="Upload Prescription Image")
67
+ extract_btn = gr.Button("Extract Medicine Names", variant="primary")
68
+
69
+ with gr.Column():
70
+ output_text = gr.Textbox(label="Extracted Medicine Names", lines=10)
71
+
72
+ extract_btn.click(
73
+ fn=extract_medicine_names,
74
+ inputs=input_image,
75
+ outputs=output_text
76
+ )
77
+
78
+ gr.Markdown("### Notes")
79
+ gr.Markdown("- This tool uses the Qwen2-VL-OCR model to extract text from prescription images")
80
+ gr.Markdown("- For best results, ensure the prescription image is clear and readable")
81
+ gr.Markdown("- Processing may take some time as the model runs on CPU")
82
+
83
+ # Launch the app
84
+ if __name__ == "__main__":
85
+ app.launch()