Source code for multimodal.datasets.clevr

from genericpath import exists
import os
import json

from multimodal.utils import Task, download_and_unzip
from multimodal import DEFAULT_DATA_DIR

from torch.utils.data import Dataset
from PIL import Image
import torchvision
import torch


[docs]class CLEVR(Dataset): """ CLEVR: A Diagnostic Dataset for Compositional Language and Elementary Visual Reasoning. See https://cs.stanford.edu/people/jcjohns/clevr/ Warning: instanciating this class will download a 18Gb file to the multimodal data directory (by default in your applications data). You can specify the multimodal data directory by specifying the ``dir_data`` argument, or specifying it in your path. Args: dir_data (str): dir for the multimodal cache (data will be downloaded in a clevr/ folder inside this directory split (str): either train, val or test transform: torchvision transform applied to images. By default, only ToTensor. """ url = "https://dl.fbaipublicfiles.com/clevr/CLEVR_v1.0.zip" def __init__( self, dir_data=None, split="train", transform=torchvision.transforms.ToTensor() ): super().__init__() if dir_data is None: dir_data = DEFAULT_DATA_DIR self.split = split self.dir_dataset = os.path.join(dir_data, "datasets", "clevr") self.transform = transform os.makedirs(self.dir_dataset, exist_ok=True) self.download_and_process(self.dir_dataset) # open data with open(self._get_path_questions(self.dir_dataset, self.split)) as f: self.questions = json.load(f)["questions"] with open(os.path.join(self.dir_dataset, "answers.json")) as f: self.aid_to_ans = json.load(f) self.ans_to_aid = {ans: id for (id, ans) in enumerate(self.aid_to_ans)} @classmethod def _get_path_questions(cls, dir_dataset, split): return os.path.join( dir_dataset, f"CLEVR_v1.0/questions/CLEVR_{split}_questions.json" ) @classmethod def download_and_process(cls, dir_dataset): task = Task(dir_dataset, "download") if not task.is_done(): print("Downloading CLEVR") download_and_unzip(cls.url, directory=dir_dataset) task.mark_done() path_answers = os.path.join(dir_dataset, "answers.json") if not os.path.exists(path_answers): print("Processing answers") with open(cls._get_path_questions(dir_dataset, split="train")) as f: train_questions = json.load(f)["questions"] all_answers = list(set(q["answer"] for q in train_questions)) with open(path_answers, "w") as f: json.dump(all_answers, f)
[docs] def __getitem__(self, index: int): """ Returns a dictionnary with the following keys: .. code-block:: { "index", "question", "answer":, "question_family_index":, "image_filename":, "image_index":, "image" "label", } Note that you can recover the program for an example by using the index: .. code-block:: python index = item["index"][0] # first item of batch program = clevr.questions[index]["program"] """ q = self.questions[index] img_path = os.path.join( self.dir_dataset, "CLEVR_v1.0", "images", self.split, q["image_filename"] ) # add image data target = torch.zeros(len(self.aid_to_ans)) ans_id = self.ans_to_aid[q["answer"]] im = Image.open(img_path) if self.transform is not None: im = self.transform(im) item = { "index": index, "question": q["question"], "answer": q["answer"], "question_family_index": q["question_family_index"], "image_filename": q["image_filename"], "image_index": q["image_index"], "image": im, "label": torch.tensor(ans_id), } return item
def __len__(self) -> int: return len(self.questions)