Spaces:

hiandrewfisher
/

nutri-extractor-api

Running

hiandrewfisher commited on Mar 18

Commit

f630d31

verified ·

1 Parent(s): 3df9c0b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -17,7 +17,7 @@ logger.info("EasyOCR initialized.")
 logger.info("Loading nutrition extraction model...")
 # Load the model using the Hugging Face Transformers pipeline.
-# We force CPU inference by using device=-1.
 tokenizer = AutoTokenizer.from_pretrained("openfoodfacts/nutrition-extractor")
 model = AutoModelForTokenClassification.from_pretrained("openfoodfacts/nutrition-extractor")
 logger.info("Model loaded successfully.")
@@ -26,6 +26,7 @@ def ocr_extract(image: Image.Image):
     """
     Uses EasyOCR to extract text tokens and their bounding boxes from an image.
     Returns a list of tokens and corresponding boxes in [left, top, width, height] format.
     """
     # Convert PIL image to numpy array.
     np_image = np.array(image)
@@ -39,10 +40,10 @@ def ocr_extract(image: Image.Image):
             # Convert the bounding box (list of 4 points) to [left, top, width, height].
             xs = [point[0] for point in bbox]
             ys = [point[1] for point in bbox]
-            left = min(xs)
-            top = min(ys)
-            width = max(xs) - left
-            height = max(ys) - top
             boxes.append([left, top, width, height])
     logger.info(f"OCR extracted {len(tokens)} tokens.")
     return tokens, boxes

 logger.info("Loading nutrition extraction model...")
 # Load the model using the Hugging Face Transformers pipeline.
+# Force CPU inference with device=-1.
 tokenizer = AutoTokenizer.from_pretrained("openfoodfacts/nutrition-extractor")
 model = AutoModelForTokenClassification.from_pretrained("openfoodfacts/nutrition-extractor")
 logger.info("Model loaded successfully.")
     """
     Uses EasyOCR to extract text tokens and their bounding boxes from an image.
     Returns a list of tokens and corresponding boxes in [left, top, width, height] format.
+    Bounding box coordinates are cast to int.
     """
     # Convert PIL image to numpy array.
     np_image = np.array(image)
             # Convert the bounding box (list of 4 points) to [left, top, width, height].
             xs = [point[0] for point in bbox]
             ys = [point[1] for point in bbox]
+            left = int(min(xs))
+            top = int(min(ys))
+            width = int(max(xs) - left)
+            height = int(max(ys) - top)
             boxes.append([left, top, width, height])
     logger.info(f"OCR extracted {len(tokens)} tokens.")
     return tokens, boxes