Source code for modelzoo.vision.pytorch.dit.utils

# Copyright 2022 Cerebras Systems.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
from enum import Enum


[docs]class BlockType(Enum):
    ADALN_ZERO = "adaln_zero"

    @classmethod
    def values(cls):
        return [b.value for b in BlockType]

    @classmethod
    def get(cls, blk):
        if isinstance(blk, str):
            return BlockType(blk)
        elif isinstance(blk, Enum):
            return blk
        else:
            raise ValueError(
                f"Unsupported type {type(blk)}, supported are `str` and `Enum`"
            )


[docs]def set_defaults(params):
    """
    Update any missing parameters in the params dictionary with default values
    Args:
        params: The dictionary containing the params
    """

    tparams = params["train_input"]
    eparams = params["eval_input"]
    mparams = params["model"]
    runconfig = params["runconfig"]

    # train input_required parameters
    _set_input_defaults(params)
    _set_model_defaults(params)
    _copy_params_across(params)

    # Runconfig related
    if runconfig["checkpoint_steps"] == 0:
        logging.warning(
            "Setting `runconfig.checkpoint_steps` to `runconfig.max_steps`. Setting to 0 only saves initial checkpoint"
        )
        runconfig["checkpoint_steps"] = runconfig["max_steps"]

    return params


def _set_model_defaults(params):
    # model related parameters
    mparams = params["model"]
    tparams = params["train_input"]
    mparams["num_diffusion_steps"] = tparams["num_diffusion_steps"]
    mparams["num_classes"] = tparams["num_classes"]
    mparams["beta_start"] = mparams.get("beta_start", 0.0001)
    mparams["beta_end"] = mparams.get("beta_end", 0.02)

    tparams["vae_scaling_factor"] = params["model"]["vae"]["scaling_factor"]
    params["eval_input"]["vae_scaling_factor"] = params["model"]["vae"][
        "scaling_factor"
    ]

    mparams["vae"]["in_channels"] = tparams["image_channels"]
    mparams["vae"]["out_channels"] = tparams["image_channels"]

    mparams["vae"]["scaling_factor"] = mparams["vae"].get(
        "scaling_factor", 0.18215
    )
    mparams["latent_channels"] = mparams.get(
        "latent_channels", mparams["vae"]["latent_channels"]
    )
    mparams["latent_size"] = mparams.get(
        "latent_size", mparams["vae"]["latent_size"]
    )
    image_dims = [tparams["image_channels"]] + tparams["image_size"]
    latent_dims = [mparams["latent_channels"]] + mparams["latent_size"]
    logging.info(
        f"Using Image Dimensions (C, H, W): {image_dims} and VAE output Dimensions (C, H, W): {latent_dims}"
    )
    mparams["block_type"] = mparams.get(
        "block_type", BlockType.ADALN_ZERO.value
    )
    if mparams["block_type"] not in BlockType.values():
        raise ValueError(
            f"Unsupported DiT block type {mparams['block_type']}. ",
            f"Supported values are {BlockType.values()}.",
        )
    logging.info(f"Using DiT block type : {mparams['block_type']}")

    if mparams["fp16_type"] == "bfloat16":
        params["optimizer"]["loss_scaling_factor"] = 1.0

    # Regression Head
    # False -> linear + unpatchify for regression head
    mparams["use_conv_transpose_unpatchify"] = mparams.get(
        "use_conv_transpose_unpatchify", True
    )
    if not mparams["use_conv_transpose_unpatchify"]:
        raise ValueError(
            f"Using linear layer + unpatchify in RegressionHead is unsupported at this time, "
            f"please set `model.use_conv_transpose_unpatchify` to True"
        )

    _set_layer_initializer_defaults(params)
    _set_reverse_process_defaults(params)


def _set_reverse_process_defaults(params):
    mparams = params["model"]
    rparams = mparams.get("reverse_process", {})
    if rparams:
        rparams["sampler"]["num_diffusion_steps"] = rparams["sampler"].get(
            "num_diffusion_steps", mparams["num_diffusion_steps"]
        )
        rparams["batch_size"] = rparams.get("batch_size", 32)
        rparams["pipeline"]["num_classes"] = rparams["pipeline"].get(
            "num_classes", mparams["num_classes"]
        )
        rparams["pipeline"]["custom_labels"] = rparams["pipeline"].get(
            "custom_labels", None
        )
        # For DDPM Sampler only
        if rparams["sampler"]["name"] == "ddpm":
            rparams["sampler"]["variance_type"] = "fixed_small"


def _set_layer_initializer_defaults(params):

    # Modifies in-place
    mparams = params["model"]

    # Patch Embedding
    mparams["projection_initializer"] = {"name": "xavier_uniform", "gain": 1.0}
    mparams["init_conv_like_linear"] = mparams.get(
        "init_conv_like_linear", mparams["use_conv_patchified_embedding"]
    )

    # Timestep Embedding MLP
    mparams["timestep_embeddding_initializer"] = {
        "name": "normal",
        "mean": 0.0,
        "std": mparams["initializer_range"],
    }

    # Label Embedding table
    mparams["label_embedding_initializer"] = {
        "name": "normal",
        "mean": 0.0,
        "std": mparams["initializer_range"],
    }

    # Attention
    mparams["attention_initializer"] = {"name": "xavier_uniform", "gain": 1.0}

    # ffn
    mparams["ffn_initializer"] = {"name": "xavier_uniform", "gain": 1.0}

    # Regression Head FFN
    mparams["head_initializer"] = {"name": "zeros"}


def _set_input_defaults(params):
    # Modifies in place
    # train input_required parameters
    tparams = params["train_input"]
    tparams["shuffle"] = tparams.get("shuffle", True)
    tparams["shuffle_seed"] = tparams.get("shuffle_seed", 4321)
    tparams["num_classes"] = tparams.get("num_classes", 1000)
    tparams["noaugment"] = tparams.get("noaugment", False)
    tparams["drop_last"] = tparams.get("drop_last", True)
    tparams["num_workers"] = tparams.get("num_workers", 0)
    tparams["prefetch_factor"] = tparams.get("prefetch_factor", 10)
    tparams["persistent_workers"] = tparams.get("persistent_workers", True)

    if tparams["noaugment"]:
        tparams["transforms"] = None
        logging.info(
            f"Since `noaugment`={tparams['noaugment']}, the transforms are set to None"
        )
    tparams["use_worker_cache"] = tparams.get("use_worker_cache", False)

    # eval input_required parameters
    eparams = params["eval_input"]
    eparams["shuffle"] = eparams.get("shuffle", False)
    eparams["shuffle_seed"] = eparams.get("shuffle_seed", 4321)
    eparams["noaugment"] = eparams.get("noaugment", False)
    eparams["drop_last"] = eparams.get("drop_last", True)
    eparams["num_workers"] = eparams.get("num_workers", 0)
    eparams["prefetch_factor"] = eparams.get("prefetch_factor", 10)
    eparams["persistent_workers"] = eparams.get("persistent_workers", True)
    if eparams["noaugment"]:
        eparams["transforms"] = None
        logging.info(
            f"Since `noaugment`={eparams['noaugment']}, the transforms are set to None"
        )
    eparams["use_worker_cache"] = eparams.get("use_worker_cache", False)


def _copy_params_across(params):

    # Pass model settings into data loader.
    _model_to_input_map = [
        # latent shape
        "label_dropout_rate",
        "latent_size",
        "latent_channels",
        # Other params
        "mixed_precision",
        "fp16_type",
        # diffusion & related params for performing gd
        "schedule_name",
    ]

    for _key_map in _model_to_input_map:
        if isinstance(_key_map, tuple):
            assert len(_key_map) == 2, f"Tuple {_key_map} does not have len=2"
            model_key, input_key = _key_map
        else:
            model_key = input_key = _key_map

        for section in ["train_input", "eval_input"]:
            params[section][input_key] = params["model"][model_key]