cerebras.modelzoo.data.common.GenericDataProcessor.GenericDataProcessorConfig#

class cerebras.modelzoo.data.common.GenericDataProcessor.GenericDataProcessorConfig(*args, **kwargs)[source]#

Bases: cerebras.modelzoo.config.data_config.DataConfig

Methods

`check_for_deprecated_fields`
`check_literal_discriminator_field`
`copy`
`get_orig_class`
`get_orig_class_args`
`model_copy`
`model_post_init`
`post_init`

Attributes

`batch_size`	The Batch size.
`discriminator`
`discriminator_value`
`drop_last`	If True and the dataset size is not divisible by the batch size, the last incomplete batch will be dropped.
`model_config`
`num_workers`	How many subprocesses to use for data loading.
`persistent_workers`	If True, the data loader will not shutdown the worker processes after a dataset has been consumed once.
`prefetch_factor`	Number of batches loaded in advance by each worker.
`shuffle`	Flag to enable data shuffling.
`shuffle_buffer`	Size of shuffle buffer in samples.
`shuffle_seed`	Shuffle seed.
`data_processor`

batch_size = Ellipsis#: The Batch size.

shuffle = False#: Flag to enable data shuffling.

shuffle_seed = None#: Shuffle seed.

shuffle_buffer = None#: Size of shuffle buffer in samples.

num_workers = 0#: How many subprocesses to use for data loading.

drop_last = True#: If True and the dataset size is not divisible by the batch size, the last incomplete batch will be dropped.

prefetch_factor = 10#: Number of batches loaded in advance by each worker.

persistent_workers = True#: If True, the data loader will not shutdown the worker processes after a dataset has been consumed once.

previous

cerebras.modelzoo.data.common.GenericDataProcessor.GenericDataProcessor

next

cerebras.modelzoo.data.common.HDF5DataProcessor