Source code for cerebras.modelzoo.trainer.callbacks.selective_grad

# Copyright 2022 Cerebras Systems.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Contains the SelectiveGrad callback class."""

from typing import List, Union

import cerebras.pytorch as cstorch
from cerebras.modelzoo.trainer.callbacks import Callback
from cerebras.pytorch.sparse.configure import default_sparse_param_filter


[docs]class SelectiveGrad(Callback): """Callback class that selectively applies gradient computation.""" def __init__(self, selective_grads: Union[dict, List[dict]]): """Constructs a `SelectiveGrad` instance. Args: selective_grads: Configuration for selective gradient computation. It may be initialized with a configuration dict or list of dicts. """ if selective_grads: def get_selective_grad_from_dict(single_config): # use the sparsity filter as well as a default filter param_filter = single_config.get("param_filter", None) if param_filter is None: param_filter = default_sparse_param_filter # make init_method an optional field if "init_method" in single_config: return cstorch.nn.SelectiveGrad( param_filter, single_config["init_method"] ) return cstorch.nn.SelectiveGrad(param_filter) if isinstance(selective_grads, dict): self.selective_grads = [ get_selective_grad_from_dict(selective_grads) ] elif isinstance(selective_grads, list): self.selective_grads = list( map(get_selective_grad_from_dict, selective_grads) ) else: raise ValueError( "Expected `selective_grads` to be a dict or a list of dicts." ) else: self.selective_grads = []
[docs] def setup(self, trainer): for selective_grad in self.selective_grads: trainer.model.apply(selective_grad)