Spaces:

Kilos1
/

Nutrition_App

Runtime error

Kilos1 commited on Mar 9

Commit

fdb58e3

verified ·

1 Parent(s): 573e67a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,22 +1,17 @@
 import torch
 import gradio as gr
 from PIL import Image
-from transformers import AutoProcessor, AutoModel
 # Load the model and processor
-model_id = "OpenGVLab/InternVL2_5-78B"
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # Initialize the model and processor
-model = AutoModel.from_pretrained(
-    model_id,
-    torch_dtype=torch.bfloat16,
-    low_cpu_mem_usage=True,
-    use_flash_attn=True,
-    trust_remote_code=True
-).eval().to(device)
-processor = AutoProcessor.from_pretrained(model_id)
 def generate_model_response(image_file, user_query):
     """
@@ -34,18 +29,14 @@ def generate_model_response(image_file, user_query):
         raw_image = Image.open(image_file).convert("RGB")
         # Prepare inputs for the model using the processor
-        inputs = processor(
-            text=user_query,
-            images=raw_image,
-            return_tensors="pt"
-        ).to(device)
         # Generate response from the model
-        outputs = model.generate(**inputs, max_new_tokens=50)
         # Decode and return the response
-        response_text = processor.decode(outputs[0], skip_special_tokens=True)
-        return response_text
     except Exception as e:
         print(f"Error in generating response: {e}")

+import re
+import io
 import torch
 import gradio as gr
 from PIL import Image
+from transformers import OwlViTProcessor, OwlViTForImageClassification
 # Load the model and processor
+model_id = "google/owlvit-base-patch16"
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # Initialize the model and processor
+model = OwlViTForImageClassification.from_pretrained(model_id).to(device)
+processor = OwlViTProcessor.from_pretrained(model_id)
 def generate_model_response(image_file, user_query):
     """
         raw_image = Image.open(image_file).convert("RGB")
         # Prepare inputs for the model using the processor
+        inputs = processor(images=raw_image, text=user_query, return_tensors="pt").to(device)
         # Generate response from the model
+        outputs = model(**inputs)
         # Decode and return the response
+        response_text = outputs.logits.argmax(dim=-1)  # Example of how to process output
+        return f"Detected class ID: {response_text.item()}"
     except Exception as e:
         print(f"Error in generating response: {e}")