import re import gradio as gr from PIL import Image from transformers import AutoProcessor, AutoModelForCausalLM import sys,os sys.path.append(os.getcwd()) from knowledge_extraction import get_entities, get_relations device='cpu' processor = AutoProcessor.from_pretrained("microsoft/git-base") model = AutoModelForCausalLM.from_pretrained("nkasmanoff/git-planet").to(device) def predict(image,max_length=64,device='cpu'): pixel_values = processor(images=image, return_tensors="pt").to(device).pixel_values generated_ids = model.generate(pixel_values=pixel_values, max_length=max_length) generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] relation = get_relations(generated_caption) entity_pair = get_entities(generated_caption) knowlege_triplet = f"{entity_pair[0]}-{relation}->{entity_pair[1]}" return knowlege_triplet input = gr.inputs.Image(label="Please upload a remote sensing image", type = 'pil', optional=True) output = gr.outputs.Textbox(type="text",label="Captions") title = "Satellite Image Captioning" interface = gr.Interface( fn=predict, inputs = input, theme="grass", outputs=output, title=title, ) interface.launch(debug=True)