Spaces:

Noureddinesa
/

Layoutlmv3_v2_space

Running

App Files Files Community

ITSAIDI commited on Apr 15, 2024

Commit

8bb3f75

1 Parent(s): 59bcc7e

sdf

Browse files

Files changed (1) hide show

utilitis.py +48 -42

utilitis.py CHANGED Viewed

@@ -75,10 +75,38 @@ def unnormalize_box(bbox, width, height):
          width * (bbox[2] / 1000),
          height * (bbox[3] / 1000),
      ]
 #############################################################################
 #############################################################################
 def Run_model(image):
-    encoding,offset_mapping,words = Encode(image)
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     # load the fine-tuned model from the hub
     model = LayoutLMv3ForTokenClassification.from_pretrained(model_Hugging_path)
@@ -86,28 +114,20 @@ def Run_model(image):
     # forward pass
     outputs = model(**encoding)
     predictions = outputs.logits.argmax(-1).squeeze().tolist()
     token_boxes = encoding.bbox.squeeze().tolist()
-    width, height = image.size
-    id2label, _  = Labels()
-    is_subword = np.array(offset_mapping.squeeze().tolist())[:,0] != 0
-    true_predictions = [id2label[pred] for idx, pred in enumerate(predictions) if not is_subword[idx]]
-    true_boxes = [unnormalize_box(box, width, height) for idx, box in enumerate(token_boxes) if not is_subword[idx]]
-    return true_predictions,true_boxes,words
 #############################################################################
 #############################################################################
-def Get_Json(true_predictions,words):
     Results = {}
-    i = 0
-    for prd in true_predictions:
         if prd in ['InvNum','Fourni', 'InvDate','TT','TTC','TVA']:
-            if i-1 < len(words):
-                print(i,prd,words[i-1])
-                Results[prd] = words[i-1]
-        i+=1
     key_mapping = {'InvNum':'Numéro de facture','Fourni':'Fournisseur', 'InvDate':'Date Facture','TT':'Total HT','TTC':'Total TTC','TVA':'TVA'}
     Results = {key_mapping.get(key, key): value for key, value in Results.items()}
     return Results
@@ -117,10 +137,9 @@ def Get_Json(true_predictions,words):
 def Draw(image):
     start_time = time.time()
-    image = enhance_image(image,1.3,1.5)
-    true_predictions, true_boxes,words = Run_model(image)
     draw = ImageDraw.Draw(image)
     label2color = {
         'InvNum': 'blue',
         'InvDate': 'green',
@@ -135,7 +154,6 @@ def Draw(image):
     rectangle_thickness = 4
     label_x_offset = 20
     label_y_offset = -30
     # Custom font size
     custom_font_size = 25
@@ -143,34 +161,22 @@ def Draw(image):
     font_path = "arial.ttf"  # Specify the path to your font file
     custom_font = ImageFont.truetype(font_path, custom_font_size)
-    for prediction, box in zip(true_predictions, true_boxes):
-        predicted_label = prediction
-        # Check if the predicted label exists in the label2color dictionary
-        if predicted_label in label2color:
-            color = label2color[predicted_label]
-        else:
-            color = 'black'  # Default color if label is not found
-        if predicted_label != "Autre":
-            draw.rectangle(box, outline=color, width=rectangle_thickness)
-            # Draw text using the custom font and size
-            draw.rectangle((box[0], box[1]+ label_y_offset,box[2],box[3]+ label_y_offset), fill=color)
-            draw.text((box[0] + label_x_offset, box[1] + label_y_offset), text=predicted_label, fill='white', font=custom_font)
-    # Get the Results Json File
-    Results = Get_Json(true_predictions,words)
     end_time = time.time()
     execution_time = end_time - start_time
     return image,Results,execution_time
-#############################################################################
-#############################################################################
-def Add_Results(data):
-    # Render the table
-    for key, value in data.items():
-        data[key] = st.sidebar.text_input(key, value)
 #############################################################################
 #############################################################################

          width * (bbox[2] / 1000),
          height * (bbox[3] / 1000),
      ]
+def get_word(bboxes,image):
+    ocr = Paddle()
+    x_min, y_min, x_max, y_max = bboxes
+    roi = image.crop((x_min, y_min, x_max, y_max)) # Region of intrest
+    roi_np = np.array(roi) # To array
+    result = ocr.ocr(roi_np, cls=True) # Apply OCR to ROI
+    return result[0][0][1][0]
 #############################################################################
 #############################################################################
+def get_Finale_results(offset_mapping,id2label,image,prediction_scores,predictions,token_boxes):
+    width, height = image.size
+    is_subword = np.array(offset_mapping.squeeze().tolist())[:,0] != 0
+    # Filter out subword tokens and extract true predictions and scores
+    true_predictions_with_scores = [(idx,id2label[pred], score[pred],unnormalize_box(box, width, height)) for idx, (pred, score,box) in enumerate(zip(predictions, prediction_scores,token_boxes)) if not is_subword[idx]]
+    Final_prediction = [pred for pred in true_predictions_with_scores if pred[1] != "Autre"]
+    # Create a dictionary to store the highest score for each prediction
+    Final_results = {}
+    # Eliminete Duplication of Predictions
+    for index, prediction, score, bbox in Final_prediction:
+        if prediction not in Final_results or score > Final_results[prediction][1]:
+            Final_results[prediction] = (index, score,bbox)
+    #print(Final_results)
+    for final in Final_results:
+        Kalma = get_word(Final_results[final][2],image)
+        New_tuple = (Kalma,Final_results[final][1],Final_results[final][2])
+        Final_results[final] = New_tuple
+    return Final_results
 def Run_model(image):
+    encoding,offset_mapping,_ = Encode(image)
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     # load the fine-tuned model from the hub
     model = LayoutLMv3ForTokenClassification.from_pretrained(model_Hugging_path)
     # forward pass
     outputs = model(**encoding)
+    prediction_scores = outputs.logits.softmax(-1).squeeze().tolist()
     predictions = outputs.logits.argmax(-1).squeeze().tolist()
     token_boxes = encoding.bbox.squeeze().tolist()
+    id2label, _ = Labels()
+    Finale_results=get_Finale_results(offset_mapping,id2label,image,prediction_scores,predictions,token_boxes)
+    return Finale_results
 #############################################################################
 #############################################################################
+def Get_Json(Finale_results):
     Results = {}
+    for prd in Finale_results:
         if prd in ['InvNum','Fourni', 'InvDate','TT','TTC','TVA']:
+            Results[prd] = Finale_results[prd][0]
     key_mapping = {'InvNum':'Numéro de facture','Fourni':'Fournisseur', 'InvDate':'Date Facture','TT':'Total HT','TTC':'Total TTC','TVA':'TVA'}
     Results = {key_mapping.get(key, key): value for key, value in Results.items()}
     return Results
 def Draw(image):
     start_time = time.time()
+    image = enhance_image(image,1.3,1.7)
+    Finale_results = Run_model(image)
     draw = ImageDraw.Draw(image)
     label2color = {
         'InvNum': 'blue',
         'InvDate': 'green',
     rectangle_thickness = 4
     label_x_offset = 20
     label_y_offset = -30
     # Custom font size
     custom_font_size = 25
     font_path = "arial.ttf"  # Specify the path to your font file
     custom_font = ImageFont.truetype(font_path, custom_font_size)
+    for result in Finale_results:
+        predicted_label = result
+        box = Finale_results[result][2]
+        color = label2color[result]
+        draw.rectangle(box, outline=color, width=rectangle_thickness)
+        #print(box)
+        # Draw text using the custom font and size
+        draw.rectangle((box[0], box[1]+ label_y_offset,box[2],box[3]+ label_y_offset), fill=color)
+        draw.text((box[0] + label_x_offset, box[1] + label_y_offset), text=predicted_label, fill='white', font=custom_font)
+    Results = Get_Json(Finale_results)
     end_time = time.time()
     execution_time = end_time - start_time
     return image,Results,execution_time
 #############################################################################
 #############################################################################