import os import subprocess # Clone the repository if not already present if not os.path.exists("edge_vlm"): subprocess.run(["git", "clone", "https://huggingface.co/irotem98/edge_vlm"]) # Change directory to the cloned repository os.chdir("edge_vlm") # Install the required dependencies subprocess.run(["pip", "install", "-r", "requirements.txt"]) # Now import the model from the cloned repository from edge_vlm.model import MoondreamModel import torch import gradio as gr # Load the model and tokenizer model = MoondreamModel.load_model() tokenizer = MoondreamModel.load_tokenizer() # Define the default question default_question = "Describe the image." # Function to handle image and return generated caption def generate_caption_with_default(image): # Preprocess the image preprocessed_image = MoondreamModel.preprocess_image(image) # Generate caption caption = MoondreamModel.generate_caption(model, preprocessed_image, tokenizer) return caption # Create Gradio interface interface = gr.Interface( fn=generate_caption_with_default, inputs=gr.inputs.Image(type="pil", label="Upload an Image"), outputs="text", title="Image Caption Generator", description=f"The default question is: '{default_question}'. Upload an image to generate a description." ) # Launch the interface interface.launch()