cerebras.modelzoo.data.nlp.transformer.config.TransformerDynamicDataProcessorConfig#

class cerebras.modelzoo.data.nlp.transformer.config.TransformerDynamicDataProcessorConfig(batch_size: int = <object object at 0x7f9345f8db90>, shuffle: bool = True, shuffle_seed: int = 0, num_workers: int = 0, prefetch_factor: int = 10, persistent_workers: bool = True, src_data_dir: str = <object object at 0x7f9345f8db90>, src_vocab_file: str = <object object at 0x7f9345f8db90>, src_max_sequence_length: int = <object object at 0x7f9345f8db90>, tgt_max_sequence_length: int = <object object at 0x7f9345f8db90>, shuffle_buffer: Optional[int] = None, do_lower: bool = False, buckets: Optional[List[int]] = None, dynamic_loss_weight: Optional[bool] = None, pack_sequences: Optional[bool] = False, num_documents_to_concatenate: int = 128, drop_last: bool = True, oov_token: str = '<unk>', sos_token: str = '<s>', eos_token: str = '</s>', pad_token: str = '<pad>', extra_ids: Union[int, List[int]] = 0, labels_pad_id: int = 0, input_pad_id: int = 0)[source]#
batch_size: int = <object object>#

Batch size to be used

buckets: Optional[List[int]] = None#
do_lower: bool = False#
drop_last: bool = True#
dynamic_loss_weight: Optional[bool] = None#
eos_token: str = '</s>'#
extra_ids: Union[int, List[int]] = 0#
input_pad_id: int = 0#
labels_pad_id: int = 0#
num_documents_to_concatenate: int = 128#
num_workers: int = 0#

The number of PyTorch processes used in the dataloader

oov_token: str = '<unk>'#
pack_sequences: Optional[bool] = False#
pad_token: str = '<pad>'#
persistent_workers: bool = True#

Whether or not to keep workers persistent between epochs

prefetch_factor: int = 10#

The number of batches to prefetch in the dataloader

shuffle: bool = True#

Whether or not to shuffle the dataset

shuffle_buffer: Optional[int] = None#
shuffle_seed: int = 0#

Seed used for deterministic shuffling

sos_token: str = '<s>'#
src_data_dir: str = <object object>#
src_max_sequence_length: int = <object object>#
src_vocab_file: str = <object object>#
tgt_max_sequence_length: int = <object object>#