Spaces:

chbsaikiran
/

Phi3-VLM-On-Cifar10

Runtime error

App Files Files Community

chbsaikiran commited on 26 days ago

Commit

6433553

1 Parent(s): 19077ad

trying to fix bugs

Browse files

Files changed (2) hide show

app.py +26 -24
requirements.txt +2 -2

app.py CHANGED Viewed

@@ -8,8 +8,9 @@ from transformers import (
     SiglipVisionModel,
     AutoTokenizer,
     AutoImageProcessor,
-    AutoModelForCausalLM,
 )
 from PIL import Image
 # Initialize device
@@ -19,23 +20,35 @@ print(f"Using device: {device}")
 # Load models and processors
 def load_models():
     # Load SigLIP
-    siglip_model = SiglipVisionModel.from_pretrained("google/siglip-so400m-patch14-384").to(device)
     siglip_processor = AutoImageProcessor.from_pretrained("google/siglip-so400m-patch14-384")
-    # Load Phi model with 8-bit quantization that works on both CPU and GPU
-    print(f"Loading Phi model on {device}...")
-    phi_model = AutoModelForCausalLM.from_pretrained(
-        "phi_model_trained",
-        load_in_8bit=True,  # This works on both CPU and GPU
-        device_map="auto",
-        torch_dtype=torch.float32
     )
-    phi_tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
     if phi_tokenizer.pad_token is None:
         phi_tokenizer.pad_token = phi_tokenizer.eos_token
     # Load trained projections
     linear_proj = torch.load('linear_projection_final.pth', map_location=device)
     image_text_proj = torch.load('image_text_proj.pth', map_location=device)
@@ -71,15 +84,11 @@ def get_image_embedding(image, siglip_model, siglip_processor, linear_proj, devi
     with torch.no_grad():
         # Process image through SigLIP
         inputs = siglip_processor(image, return_tensors="pt")
-        # Move inputs to device
         inputs = {k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
         outputs = siglip_model(**inputs)
         image_features = outputs.pooler_output
-        # Project through trained linear layer
         projected_features = linear_proj(image_features)
-    return projected_features
 def get_random_images():
     # Select 10 random images from first 100
@@ -166,9 +175,7 @@ with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column():
-            # Button to get random images
             random_btn = gr.Button("Get Random Images")
-            # Gallery to display images
             gallery = gr.Gallery(
                 label="Click an image to select it",
                 show_label=True,
@@ -180,17 +187,13 @@ with gr.Blocks() as demo:
             )
         with gr.Column():
-            # Display selected image
             selected_img = gr.Image(label="Selected Image", height=200)
-            # Question buttons
             q_buttons = []
             for i, q in enumerate(questions):
                 btn = gr.Button(f"Q{i+1}: {q}")
                 q_buttons.append(btn)
-            # Answer textbox
             answer_box = gr.Textbox(label="Answer", lines=3)
-    # Handle random image button click
     def on_random_click():
         images, indices = get_random_images()
         return {
@@ -206,7 +209,6 @@ with gr.Blocks() as demo:
         outputs=[gallery, image_indices, selected_image_tensor, selected_img, answer_box]
     )
-    # Handle image selection
     def on_image_select(evt: gr.SelectData, images, indices):
         if images is None or evt.index >= len(images):
             return None, None, ""
@@ -220,7 +222,6 @@ with gr.Blocks() as demo:
         outputs=[selected_image_tensor, selected_img, answer_box]
     )
-    # Handle question button clicks
     for i, btn in enumerate(q_buttons):
         btn.click(
             generate_answer,
@@ -228,4 +229,5 @@ with gr.Blocks() as demo:
             outputs=answer_box
         )
-demo.launch()

     SiglipVisionModel,
     AutoTokenizer,
     AutoImageProcessor,
+    AutoModelForCausalLM
 )
+from peft import PeftModel
 from PIL import Image
 # Initialize device
 # Load models and processors
 def load_models():
     # Load SigLIP
+    print("Loading SigLIP model...")
+    siglip_model = SiglipVisionModel.from_pretrained(
+        "google/siglip-so400m-patch14-384",
+        torch_dtype=torch.float32,
+        low_cpu_mem_usage=True
+    ).to(device)
     siglip_processor = AutoImageProcessor.from_pretrained("google/siglip-so400m-patch14-384")
+    # Load base Phi-3 model
+    print("Loading Phi-3 model...")
+    base_model = AutoModelForCausalLM.from_pretrained(
+        "microsoft/phi-3-mini-4k-instruct",
+        torch_dtype=torch.float32,
+        low_cpu_mem_usage=True
+    ).to(device)
+    # Load the trained LoRA weights
+    print("Loading trained LoRA weights...")
+    phi_model = PeftModel.from_pretrained(
+        base_model,
+        "phi_model_trained"
     )
+    phi_tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-3-mini-4k-instruct")
     if phi_tokenizer.pad_token is None:
         phi_tokenizer.pad_token = phi_tokenizer.eos_token
     # Load trained projections
+    print("Loading projection layers...")
     linear_proj = torch.load('linear_projection_final.pth', map_location=device)
     image_text_proj = torch.load('image_text_proj.pth', map_location=device)
     with torch.no_grad():
         # Process image through SigLIP
         inputs = siglip_processor(image, return_tensors="pt")
         inputs = {k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
         outputs = siglip_model(**inputs)
         image_features = outputs.pooler_output
         projected_features = linear_proj(image_features)
+        return projected_features
 def get_random_images():
     # Select 10 random images from first 100
     with gr.Row():
         with gr.Column():
             random_btn = gr.Button("Get Random Images")
             gallery = gr.Gallery(
                 label="Click an image to select it",
                 show_label=True,
             )
         with gr.Column():
             selected_img = gr.Image(label="Selected Image", height=200)
             q_buttons = []
             for i, q in enumerate(questions):
                 btn = gr.Button(f"Q{i+1}: {q}")
                 q_buttons.append(btn)
             answer_box = gr.Textbox(label="Answer", lines=3)
     def on_random_click():
         images, indices = get_random_images()
         return {
         outputs=[gallery, image_indices, selected_image_tensor, selected_img, answer_box]
     )
     def on_image_select(evt: gr.SelectData, images, indices):
         if images is None or evt.index >= len(images):
             return None, None, ""
         outputs=[selected_image_tensor, selected_img, answer_box]
     )
     for i, btn in enumerate(q_buttons):
         btn.click(
             generate_answer,
             outputs=answer_box
         )
+# Launch with minimal settings
+demo.queue(max_size=1).launch(show_error=True)

requirements.txt CHANGED Viewed

@@ -6,5 +6,5 @@ tqdm>=4.65.0
 numpy>=1.24.0
 accelerate>=0.25.0
 gradio>=4.19.0
-bitsandbytes>=0.41.1
-peft>=0.7.0

 numpy>=1.24.0
 accelerate>=0.25.0
 gradio>=4.19.0
+peft>=0.7.0
+scipy>=1.11.0