Source code for cerebras.modelzoo.data.multimodal.datasets.OpenImages

# Copyright 2022 Cerebras Systems.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

import pandas as pd
import torch
from torchvision.io import ImageReadMode, read_image
from torchvision.utils import draw_bounding_boxes, save_image

from cerebras.modelzoo.data.multimodal.datasets import BaseDataset
from cerebras.modelzoo.data.multimodal.datasets.features import (
    Bbox,
    ImageLabels,
    ObjectDetectionFeaturesDict,
)

_URLS = {
    "webpage": "https://storage.googleapis.com/openimages/web/download_v4.html",
    # Images: Refer to webpage: Install awscli, dowload using `aws s3 --no-sign-request cp <url> [target_dir]`
    "train_images": [
        f"s3://open-images-dataset/tar/train_{i}.tar.gz"
        for i in "0123456789abcdef"
    ],
    "test_images": [f"s3://open-images-dataset/tar/test.tar.gz"],
    "validation_images": [f"s3://open-images-dataset/tar/validation.tar.gz"],
    # Label info: Download using `wget -P <path_to_download_folder> <url>`
    "train_human_labels": "https://storage.googleapis.com/openimages/2018_04/train/train-annotations-human-imagelabels.csv",
    "train_machine_labels": "https://storage.googleapis.com/openimages/2018_04/train/train-annotations-machine-imagelabels.csv",
    "test_human_labels": "https://storage.googleapis.com/openimages/2018_04/test/test-annotations-human-imagelabels.csv",
    "test_machine_labels": "https://storage.googleapis.com/openimages/2018_04/test/test-annotations-machine-imagelabels.csv",
    "validation_human_labels": "https://storage.googleapis.com/openimages/2018_04/validation/validation-annotations-human-imagelabels.csv",
    "validation_machine_labels": "https://storage.googleapis.com/openimages/2018_04/validation/validation-annotations-machine-imagelabels.csv",
    "train-annotations-bbox": "https://storage.googleapis.com/openimages/2018_04/train/train-annotations-bbox.csv",
    "test-annotations-bbox": "https://storage.googleapis.com/openimages/2018_04/test/test-annotations-bbox.csv",
    "validation-annotations-bbox": "https://storage.googleapis.com/openimages/2018_04/validation/validation-annotations-bbox.csv",
    "class-image-labels": "https://storage.googleapis.com/openimages/2018_04/class-descriptions.csv",
    "trainable-image-labels": "https://storage.googleapis.com/openimages/2018_04/classes-trainable.txt",
    # Trainable classes are those with at least 100 positive human-verifications in the V4 training set
    "bbox-labels": "https://storage.googleapis.com/openimages/2018_04/class-descriptions-boxable.csv",
}


[docs]class OpenImagesv4(BaseDataset): """ This class builds an OpenImages dataset based on version v4 and all metadata info """
[docs] def __init__(self, data_dir, split, *args): if split not in ["train", "validation", "test"]: raise ValueError( f"Split={split} invalid. Accepted values are one of ('train', 'validation', 'test')" ) self.split = split self.data_dir = data_dir self.images_dir = os.path.join(self.data_dir, self.split) # Read relevant files for data self._csv_image_labels_desc = os.path.join( self.data_dir, f"class-descriptions.csv" ) self._csv_trainable_labels_desc = os.path.join( self.data_dir, f"classes-trainable.txt" ) self._csv_bbox_labels_desc = os.path.join( self.data_dir, f"class-descriptions-boxable.csv" ) self._csv_image_human_labels = os.path.join( self.data_dir, f"{self.split}-annotations-human-imagelabels.csv" ) self._csv_image_machine_labels = os.path.join( self.data_dir, f"{self.split}-annotations-machine-imagelabels.csv" ) self._csv_bbox_anns = os.path.join( self.data_dir, f"{self.split}-annotations-bbox.csv" ) # Get label and bbox data self.label_data = self._get_label_data() # Dict is faster than dataframe for label level lookups self.label_data_dict = self.label_data.to_dict(orient="index") self.labelid_str_dict = { v["ClassIntID"]: v["ClassLabel"] for v in self.label_data_dict.values() } self._label_id_range = [ min(self.label_data["ClassIntID"]), max(self.label_data["ClassIntID"]) + 1, ] self._num_labels = len(self.label_data) self.bbox_data = self._get_bbox_data() self.imgid_bbox = sorted( list(self.bbox_data.groups.keys()) ) # ImageIDs with bboxes ( self.img_human_anns, self.img_machine_anns, ) = self._get_imagelabels_data() self.imgid_human_anns = sorted(list(self.img_human_anns.groups.keys())) self.imgid_machine_anns = sorted( list(self.img_machine_anns.groups.keys()) )
@property def label_id_range(self): # [start, end) return self._label_id_range @property def num_labels(self): # Return length of all Labels in the dataset return self._num_labels def index_to_sample_path(self, index): # Return path to imagepath based on index image_id = self.imgid_bbox[index] return os.path.join(self.images_dir, f"{image_id}.jpg") def sample_path_to_index(self, sample_path): # Return path to sample based on index image_id = os.path.basename(sample_path).split(".")[0] return self.imgid_bbox.index(image_id) def __getitem__(self, index): image_path = self.index_to_sample_path(index) image = read_image(image_path, ImageReadMode.RGB) image_id = os.path.basename(image_path).split(".jpg")[0] bbox_data = ( self.bbox_data.get_group(image_id) if image_id in self.bbox_data.groups else [] ) img_human_anns = ( self.img_human_anns.get_group(image_id) if image_id in self.img_human_anns.groups else [] ) img_machine_anns = ( self.img_machine_anns.get_group(image_id) if image_id in self.img_machine_anns.groups else [] ) bboxes = self.process_bboxes(bbox_data) objects = self.process_image_labels(img_human_anns, img_machine_anns) data_dict = { "Image": image, "ImagePath": image_path, "ImageID": image_id, "Bboxes": bboxes, "Objects": objects, } features = ObjectDetectionFeaturesDict(**data_dict) return features def __repr__(self): s = f"OpenImagesv4(split={self.split}, data_dir={self.data_dir})" return s def __len__(self): return len(self.imgid_bbox)
[docs] @staticmethod def display_sample(features_dict): """ Plot and display features_dict """ image = features_dict.Image # tensor of shape (C, H, W) h, w = image.shape[1], image.shape[2] bboxes = [b.bbox_to_tensor(format="xyxy") for b in features_dict.Bboxes] bboxes = torch.stack(bboxes, dim=0) # (num_boxes, 4) bboxes = bboxes * torch.tensor([w, h, w, h]) label_str = [ f"{b.ClassLabel}_{b.ClassIntID}" for b in features_dict.Bboxes ] result = draw_bounding_boxes( image, bboxes, label_str, colors=["blue"] * len(label_str), width=4 ) save_image( result.unsqueeze(0).to(torch.float32), f"{features_dict.ImageID}_bbox.jpg", nrow=1, normalize=True, )
## Helper classes below ## def _process_helper(self, df_rowtup): df_rowtup = df_rowtup._asdict() # Get Label info df_rowtup["ClassID"] = df_rowtup["LabelName"] cls_id = self.label_data_dict[df_rowtup["ClassID"]] for k in ("ClassLabel", "ClassIntID", "IsTrainable"): df_rowtup[k] = cls_id[k] # Pop unncessary values for easy Bbox creation for kp in ("Index", "LabelName", "ImageID"): df_rowtup.pop(kp) return df_rowtup def process_bboxes(self, bbox_data): bboxes = [] if not bbox_data.empty: for r in bbox_data.itertuples(): r_dict = self._process_helper(r) bboxes.append(Bbox(**r_dict)) return bboxes def process_image_labels(self, img_human_anns, img_machine_anns): objects = [] def process(input_data): data = [] for r in input_data.itertuples(): r_dict = self._process_helper(r) data.append(ImageLabels(**r_dict)) return data if not img_human_anns.empty: objects.extend(process(img_human_anns)) if not img_machine_anns.empty: objects.extend(process(img_machine_anns)) return objects def _get_label_data(self): bbox_labels = pd.read_csv( self._csv_bbox_labels_desc, header=None, index_col=0, names=["ClassLabel"], ) bbox_labels["ClassID"] = bbox_labels.index image_labels_train = pd.read_csv( self._csv_trainable_labels_desc, header=None, names=["ClassID"] ) image_labels = pd.read_csv( self._csv_image_labels_desc, header=None, index_col=0, names=["ClassLabel"], ) image_labels["ClassID"] = image_labels.index image_labels["IsTrainable"] = image_labels.ClassID.map( lambda row: row in image_labels_train.ClassID.values ) image_labels["IsBboxLabel"] = image_labels.ClassID.map( lambda row: row in bbox_labels.ClassID.values ) image_labels["ClassIntID"] = range(0, len(image_labels)) return image_labels def _get_bbox_data(self): bbox_data = pd.read_csv(self._csv_bbox_anns) return bbox_data.groupby("ImageID") def _get_imagelabels_data(self): img_labels_human = pd.read_csv(self._csv_image_human_labels) img_labels_machine = pd.read_csv(self._csv_image_machine_labels) return ( img_labels_human.groupby("ImageID"), img_labels_machine.groupby("ImageID"), ) # TODO: Re-evaluate: do we really need these ? def _convert_helper(self, from_data, from_key, to_key): if not isinstance(from_data, list): from_data = [from_data] from_data = pd.DataFrame(from_data, columns=[from_key]).astype( self.label_data[from_key].dtype ) to_data = from_data.merge( self.label_data, left_on=from_key, right_on=from_key, how="inner" )[to_key].to_list() return to_data def classid_to_label(self, class_id): # Complement of `self.label_to_classid` # class_id = [/m/011k07, /m/011k07] -> return class_label = [Tortoise, Tortoise] return self._convert_helper( class_id, from_key="ClassID", to_key="ClassLabel" ) def label_to_classid(self, label): # Complement of `self.classid_to_label` (Case-sensitive) # class_label = [Tortoise, Tortoise] -> return class_id = [/m/011k07, /m/011k07] return self._convert_helper( label, from_key="ClassLabel", to_key="ClassID" ) def classintid_to_label(self, class_int_id): # class_int_id= [45, 45] -> return class_label = [Tortoise, Tortoise] return self._convert_helper( class_int_id, from_key="ClassIntID", to_key="ClassLabel" ) def label_to_classintid(self, label): # class_label = [Tortoise, Tortoise] -> return class_int_id=[45, 45] return self._convert_helper( label, from_key="ClassLabel", to_key="ClassIntID" ) def classid_to_classintid(self, class_id): # class_id = [/m/011k07, /m/011k07] -> return class_int_id = [45, 45] return self._convert_helper( class_id, from_key="ClassID", to_key="ClassIntID" ) def classintid_to_classid(self, class_int_id): # class_int_id = [45, 45] -> return class_id = [/m/011k07, /m/011k07] return self._convert_helper( class_int_id, from_key="ClassIntID", to_key="ClassID" )
if __name__ == "__main__": import random obj = OpenImagesv4( "/cb/cold/multimodal_datasets/open_images/v4", "validation" ) print(obj) idx = random.randint(0, len(obj)) features_dict = obj[idx] print(idx, features_dict, features_dict.ImageID) obj.display_sample(features_dict)