Spaces:

hiandrewfisher
/

nutri-extractor-api

Sleeping

App Files Files Community

hiandrewfisher commited on Mar 18

Commit

3df9c0b

verified ·

1 Parent(s): 46c29ce

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -26

app.py CHANGED Viewed

@@ -1,39 +1,91 @@
 import gradio as gr
-from transformers import pipeline
 from PIL import Image
 import torch
-# Load the model using Transformers' pipeline.
-print("Loading model...")
-model_pipeline = pipeline('token-classification', 'openfoodfacts/nutrition-extractor')
-print("Model loaded successfully.")
 def predict(image: Image.Image):
     """
-    Receives an image, passes it directly to the nutrition extraction model,
-    and processes the token-classification output to aggregate nutritional values.
-    Assumes the model performs OCR internally.
     """
-    # Directly pass the image to the model pipeline.
-    results = model_pipeline(image)
-    # Process the output: aggregate numeric values for each entity label.
     extracted_data = {}
-    for item in results:
-        # Expected structure: {'word': '100', 'entity': 'CALORIES', 'score': 0.98, ...}
-        label = item.get('entity', 'O').lower()
-        if label != 'o':  # Skip non-entity tokens.
-            token_text = item.get('word', '')
-            # Extract digits and decimal point.
-            num_str = "".join(filter(lambda c: c.isdigit() or c == '.', token_text))
-            try:
-                value = float(num_str)
-                extracted_data[label] = extracted_data.get(label, 0) + value
-            except ValueError:
-                continue
     if not extracted_data:
         return {"error": "No nutritional information extracted."}
     return extracted_data
 # Create a Gradio interface that exposes the API.
@@ -41,9 +93,9 @@ demo = gr.Interface(
     fn=predict,
     inputs=gr.Image(type="pil"),
     outputs="json",
-    title="Nutrition Extractor API",
-    description="Upload an image of a nutrition table to extract nutritional values. The model performs OCR internally."
 )
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+import easyocr
+import numpy as np
 from PIL import Image
+from transformers import AutoTokenizer, AutoModelForTokenClassification
 import torch
+import logging
+# Set up logging for debugging.
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+logger.info("Initializing EasyOCR...")
+# Initialize the EasyOCR reader for English.
+reader = easyocr.Reader(['en'], gpu=False)
+logger.info("EasyOCR initialized.")
+logger.info("Loading nutrition extraction model...")
+# Load the model using the Hugging Face Transformers pipeline.
+# We force CPU inference by using device=-1.
+tokenizer = AutoTokenizer.from_pretrained("openfoodfacts/nutrition-extractor")
+model = AutoModelForTokenClassification.from_pretrained("openfoodfacts/nutrition-extractor")
+logger.info("Model loaded successfully.")
+def ocr_extract(image: Image.Image):
+    """
+    Uses EasyOCR to extract text tokens and their bounding boxes from an image.
+    Returns a list of tokens and corresponding boxes in [left, top, width, height] format.
+    """
+    # Convert PIL image to numpy array.
+    np_image = np.array(image)
+    results = reader.readtext(np_image)
+    tokens = []
+    boxes = []
+    for bbox, text, confidence in results:
+        if text.strip():
+            tokens.append(text)
+            # Convert the bounding box (list of 4 points) to [left, top, width, height].
+            xs = [point[0] for point in bbox]
+            ys = [point[1] for point in bbox]
+            left = min(xs)
+            top = min(ys)
+            width = max(xs) - left
+            height = max(ys) - top
+            boxes.append([left, top, width, height])
+    logger.info(f"OCR extracted {len(tokens)} tokens.")
+    return tokens, boxes
 def predict(image: Image.Image):
     """
+    Runs OCR with EasyOCR to extract tokens and bounding boxes,
+    then uses the nutrition extraction model to classify tokens and aggregate nutritional values.
     """
+    tokens, boxes = ocr_extract(image)
+    if len(tokens) == 0:
+        logger.error("No text detected in the image.")
+        return {"error": "No text detected in the image."}
+    # Prepare inputs: pass the tokens and boxes to the tokenizer.
+    encoding = tokenizer(tokens, boxes=boxes, return_tensors="pt", truncation=True, padding=True)
+    try:
+        outputs = model(**encoding)
+    except Exception as e:
+        logger.error(f"Error during model inference: {e}")
+        return {"error": f"Model inference error: {e}"}
+    # Get predicted labels for each token.
+    predictions = torch.argmax(outputs.logits, dim=2)
     extracted_data = {}
+    for token, pred in zip(tokens, predictions[0].tolist()):
+        label = model.config.id2label.get(pred, "O").lower()
+        if label == "o":
+            continue
+        # Extract numeric value from token.
+        num_str = "".join(filter(lambda c: c.isdigit() or c == '.', token))
+        try:
+            value = float(num_str)
+            extracted_data[label] = extracted_data.get(label, 0) + value
+        except ValueError:
+            continue
     if not extracted_data:
+        logger.warning("No nutritional information extracted.")
         return {"error": "No nutritional information extracted."}
+    logger.info(f"Extracted data: {extracted_data}")
     return extracted_data
 # Create a Gradio interface that exposes the API.
     fn=predict,
     inputs=gr.Image(type="pil"),
     outputs="json",
+    title="Nutrition Extractor API with EasyOCR",
+    description="Upload an image of a nutrition table to extract nutritional values. The pipeline uses EasyOCR to extract tokens and bounding boxes, then processes them with the openfoodfacts/nutrition-extractor model."
 )
 if __name__ == "__main__":
+    demo.launch(share=True)