Spaces:
Running
Running
import os | |
import cv2 | |
import requests | |
from PIL import Image | |
import logging | |
import torch | |
from llm.llm_service import get_llm | |
from langchain_core.tools import tool,Tool | |
from langchain_community.tools import DuckDuckGoSearchResults | |
from langchain_groq import ChatGroq | |
from utils import draw_panoptic_segmentation | |
from typing import List | |
from tool_utils.clip_segmentation import CLIPSEG | |
from tool_utils.yolo_world import YoloWorld | |
from tool_utils.image_qualitycheck import brightness_check,gaussian_noise_check,snr_check | |
from tool_utils.image_description import SMOLVLM2 | |
try: | |
# from transformers import BlipProcessor, BlipForConditionalGeneration | |
from transformers import AutoImageProcessor, Mask2FormerForUniversalSegmentation | |
except ImportError as err: | |
logging.error("Import error :{}".format(err)) | |
device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
logging.info("Loading Foundation Models") | |
try: | |
clipseg_model = CLIPSEG() | |
except Exception as err : | |
logging.error("Unable to clipseg model {}".format(err)) | |
try: | |
maskformer_processor = AutoImageProcessor.from_pretrained("facebook/mask2former-swin-base-coco-panoptic") | |
maskformer_model = Mask2FormerForUniversalSegmentation.from_pretrained("facebook/mask2former-swin-base-coco-panoptic") | |
except: | |
logging.error("Unable to Mask2Former model") | |
try: | |
yolo_world_model= YoloWorld() | |
except : | |
logging.error("Unable to Yolo world model") | |
def get_groq_model(model_name = "gemma2-9b-it"): | |
os.environ.get("GROQ_API_KEY") | |
llm_groq = ChatGroq(model=model_name) | |
return llm_groq | |
def panoptic_image_segemntation(image_path:str)->str: | |
""" | |
The tool is used to create a Panoptic segmentation mask . It uses Maskformer network to create a panoptic segmentation of all \ | |
the objects present in the image . Use the tool in case user ask to create a panoptic image segmentation or panoptic mask . | |
""" | |
if image_path.startswith('https'): | |
image = Image.open(requests.get(image_path, stream=True).raw).convert('RGB') | |
else: | |
image = Image.open(image_path).convert('RGB') | |
maskformer_model.to(device) | |
inputs = maskformer_processor(image, return_tensors="pt").to(device) | |
with torch.no_grad(): | |
outputs = maskformer_model(**inputs) | |
prediction = maskformer_processor.post_process_panoptic_segmentation(outputs, target_sizes=[image.size[::-1]])[0] | |
save_mask_path = draw_panoptic_segmentation(maskformer_model,prediction['segmentation'],prediction['segments_info']) | |
labels = [] | |
for segment in prediction['segments_info']: | |
label_names = maskformer_model.config.id2label[segment['label_id']] | |
labels.append(label_names) | |
labels = " ".join([label_name for label_name in labels]) | |
return 'Panoptic Segmentation image {} Found labels {} in the image '.format(save_mask_path,labels) | |
def image_description(img_path:str)->str: | |
"Use this tool to describe the image " \ | |
"The tool helps you to identify weather in the image as well " | |
smol_vlm = SMOLVLM2(memory_efficient=True) | |
query="Describe the image. Higlight the details in 2-3 lines" | |
response = smol_vlm.run_inference_on_image(image_path=img_path,query=query) | |
return response | |
def clipsegmentation_mask(input_data:str)->str: | |
""" | |
The tool helps to extract the object masks from the image. | |
For example : If you want to extract the object masks from the image use this tool. | |
""" | |
data = input_data.split(",") | |
image_path = data[0] | |
object_prompts = data[1:] | |
masks = clipseg_model.get_segmentation_mask(image_path,object_prompts) | |
return masks | |
def generate_bounding_box_tool(input_data:str)->str: | |
"use this tool when its is required to detect object and provide bounding boxes for the given image and list of objects" | |
print(input_data) | |
data = input_data.split(",") | |
image_path = data[0] | |
object_prompts = data[1:] | |
object_data = yolo_world_model.run_yolo_infer(image_path,object_prompts) | |
return object_data | |
def object_extraction(image_path:str)->str: | |
"Use this tool to identify the objects within the image" | |
objects = [] | |
maskformer_model.to(device) | |
image = cv2.imread(image_path) | |
image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB) | |
inputs = maskformer_processor(image, return_tensors="pt") | |
inputs.to(device) | |
with torch.no_grad(): | |
outputs = maskformer_model(**inputs) | |
prediction = maskformer_processor.post_process_panoptic_segmentation(outputs, target_sizes=[image.shape[:2]])[0] | |
segments_info = prediction['segments_info'] | |
for segment in segments_info: | |
segment_label_id = segment['label_id'] | |
segment_label = maskformer_model.config.id2label[segment_label_id] | |
objects.append(segment_label) | |
return "Detected objects are: "+ ",".join( objects) | |
def get_image_quality(image_path:str)->str: | |
""" | |
This tool helps to find out the parameters of the image.The tool will determine if image is blurry or not. | |
It will also tell you if image is bright or not. | |
This tool also determines the Signal to Noise Ratio of the image as well . | |
For example Output of the tool will be : | |
example 1 : Image is blurry.Image is not bright.Signal to Noise is less than 1 - More Noise in image | |
example 2 : Image is not blurry . Image is bright.Signal to Noise is greater than 1 - More Signal in image | |
""" | |
image = cv2.imread(image_path) | |
image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB) | |
brightness_text = brightness_check(image) | |
blurry_text = gaussian_noise_check(image) | |
# snr_text = snr_check(image) | |
final_text = "Image properties are :\n{}\n{}".format(blurry_text, brightness_text) | |
return final_text | |
def get_all_tools(): | |
## bind tools | |
image_desc_tool = Tool( | |
name = 'Image_Descprtion_Tool', | |
func= image_description, | |
description = """ | |
The tool helps to describe about the image or create a caption of the image | |
If the user asks to decribe or genrerate a caption for the image use this tool. | |
This tool can also be used to identify the weather within the image . | |
user example questions : | |
1. Describe the image ? | |
2. What the weather looks like in the image ? | |
""" | |
) | |
clipseg_tool = Tool( | |
name = 'ClipSegmentation-tool', | |
func = clipsegmentation_mask, | |
description="""Use this tool when user ask to extract the objects from the image . | |
The input to the tool is the path of the image and list of objects for which Segmenation mask is to generated. | |
For example : | |
Query :Provide a segmentation mask of all road car and dog in the image | |
The tool will generate the segmentation mask of the objects in the image. | |
for such query from the user you need to first use the tool to identify the objects and then use this tool to | |
generate the segmentation mask for the objects. | |
""" | |
) | |
bounding_box_generator = Tool( | |
name = 'Bounding Box Generator', | |
func = generate_bounding_box_tool, | |
description= """The tool helps to provide bounding boxes for the given image and list of objects | |
.Use this tool when user ask to provide bounding boxes for the objects.if user has not specified the names of the objects | |
then use the object extraction tool to identify the objects and then use this tool to generate the bounding boxes for the objects. | |
The input to this tool is the path of the image and list of objects for which bounding boxes are to be generated | |
For Example : | |
"action_input ": "image_store/<image_path>,person,dog,sand," | |
""" | |
) | |
object_extractor = Tool( | |
name = "Object description Tool", | |
func = object_extraction, | |
description = " The Tool is used to describe the objects within the image . Use this tool if user specifically ask to identify \ | |
what are the objects I can view in the image or identify the objects within the image. " | |
) | |
image_parameters_tool = Tool( | |
name = 'Image Parameters_Tool', | |
func = get_image_quality, | |
description= """ This tool will help you to determine | |
- If the image is blurry or not | |
- If the image is bright/sharp or not | |
- SNR ratio of the image | |
Based on the tool output take a proper decision regarding the image quality""" | |
) | |
panoptic_segmentation = Tool( | |
name = 'panoptic_Segmentation_tool', | |
func = panoptic_image_segemntation, | |
description = "The tool is used to create a Panoptic segmentation mask . It uses Maskformer network to create a panoptic segmentation of all \ | |
the objects present in the image . Use the tool in case user ask to create a panoptic segmentation or count objects in the image.\ | |
The tool also provides a list of objects along with the mask image of the all segmented objects found in the image .\ | |
Us the tool if user ask to create a panoptic image segmentation or panoptic mask" | |
) | |
tools = [ | |
DuckDuckGoSearchResults(), | |
image_desc_tool, | |
clipseg_tool, | |
image_parameters_tool, | |
object_extractor, | |
bounding_box_generator, | |
panoptic_segmentation | |
] | |
return tools |