cerebras.modelzoo.data_preparation.raw_dataset_processor.config.RawDatasetProcessorConfig#

class cerebras.modelzoo.data_preparation.raw_dataset_processor.config.RawDatasetProcessorConfig(batch_size: int = <object object at 0x7f9345f8db90>, shuffle: bool = True, shuffle_seed: int = 0, num_workers: int = 0, prefetch_factor: int = 10, persistent_workers: bool = True, preprocessing: Optional[dict] = None, drop_last: bool = True, seed: Optional[int] = None)[source]#
preprocessing: Optional[dict] = None#
drop_last: bool = True#
prefetch_factor: int = 10#

The number of batches to prefetch in the dataloader

persistent_workers: bool = True#

Whether or not to keep workers persistent between epochs

seed: Optional[int] = None#
batch_size: int = <object object>#

Batch size to be used

num_workers: int = 0#

The number of PyTorch processes used in the dataloader

shuffle: bool = True#

Whether or not to shuffle the dataset

shuffle_seed: int = 0#

Seed used for deterministic shuffling