shukdevdatta123 commited on
Commit
7fb8860
·
verified ·
1 Parent(s): ade4954

Update v1.txt

Browse files
Files changed (1) hide show
  1. v1.txt +62 -1
v1.txt CHANGED
@@ -54,6 +54,67 @@ def extract_medicine_names(image):
54
  generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
55
  )[0]
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  return output_text
58
 
59
  # Create Gradio interface
@@ -70,7 +131,7 @@ with gr.Blocks(title="Medicine Name Extractor") as app:
70
  output_text = gr.Textbox(label="Extracted Medicine Names", lines=10)
71
 
72
  extract_btn.click(
73
- fn=extract_medicine_names,
74
  inputs=input_image,
75
  outputs=output_text
76
  )
 
54
  generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
55
  )[0]
56
 
57
+ # Remove <|im_end|> and any other special tokens that might appear in the output
58
+ output_text = output_text.replace("<|im_end|>", "").strip()
59
+
60
+ return output_text
61
+
62
+ # Create a singleton model and processor to avoid reloading for each request
63
+ model_instance = None
64
+ processor_instance = None
65
+
66
+ def get_model_and_processor():
67
+ global model_instance, processor_instance
68
+ if model_instance is None or processor_instance is None:
69
+ model_instance, processor_instance = load_model()
70
+ return model_instance, processor_instance
71
+
72
+ # Optimized extraction function that uses the singleton model
73
+ def extract_medicine_names_optimized(image):
74
+ if image is None:
75
+ return "Please upload an image."
76
+
77
+ model, processor = get_model_and_processor()
78
+
79
+ # Prepare the message with the specific prompt for medicine extraction
80
+ messages = [
81
+ {
82
+ "role": "user",
83
+ "content": [
84
+ {
85
+ "type": "image",
86
+ "image": image,
87
+ },
88
+ {"type": "text", "text": "Extract and list ONLY the names of medicines/drugs from this prescription image. Output the medicine names as a numbered list without any additional information or descriptions."},
89
+ ],
90
+ }
91
+ ]
92
+
93
+ # Prepare for inference
94
+ text = processor.apply_chat_template(
95
+ messages, tokenize=False, add_generation_prompt=True
96
+ )
97
+ image_inputs, video_inputs = process_vision_info(messages)
98
+ inputs = processor(
99
+ text=[text],
100
+ images=image_inputs,
101
+ videos=video_inputs,
102
+ padding=True,
103
+ return_tensors="pt",
104
+ )
105
+
106
+ # Generate output
107
+ generated_ids = model.generate(**inputs, max_new_tokens=256)
108
+ generated_ids_trimmed = [
109
+ out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
110
+ ]
111
+ output_text = processor.batch_decode(
112
+ generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
113
+ )[0]
114
+
115
+ # Remove <|im_end|> and any other special tokens that might appear in the output
116
+ output_text = output_text.replace("<|im_end|>", "").strip()
117
+
118
  return output_text
119
 
120
  # Create Gradio interface
 
131
  output_text = gr.Textbox(label="Extracted Medicine Names", lines=10)
132
 
133
  extract_btn.click(
134
+ fn=extract_medicine_names_optimized,
135
  inputs=input_image,
136
  outputs=output_text
137
  )