cerebras.modelzoo.data.nlp.bert.BertCSVDataProcessor.BertCSVDataProcessorConfig#

class cerebras.modelzoo.data.nlp.bert.BertCSVDataProcessor.BertCSVDataProcessorConfig(*args, **kwargs)[source]#

Methods

`check_for_deprecated_fields`
`check_literal_discriminator_field`
`copy`
`get_disable_nsp`
`get_orig_class`
`get_orig_class_args`
`model_copy`
`model_post_init`
`post_init`

Attributes

`batch_size`	The batch size.
`buckets`	A list of bucket boundaries.
`data_dir`	Path to the data files to use.
`disable_nsp`	Whether Next Sentence Prediction (NSP) objective is disabled.
`discriminator`
`discriminator_value`
`do_lower`
`drop_last`	Whether to drop last batch of epoch if it's an incomplete batch.
`dynamic_mlm_scale`	Whether to dynamically scale the loss.
`masked_lm_prob`
`max_position_embeddings`
`max_predictions_per_seq`
`max_sequence_length`
`mixed_precision`
`model_config`
`num_workers`	The number of PyTorch processes used in the dataloader.
`persistent_workers`	Whether or not to keep workers persistent between epochs.
`prefetch_factor`	The number of batches to prefetch in the dataloader.
`shuffle`	Whether or not to shuffle the dataset.
`shuffle_buffer`	Buffer size to shuffle samples across.
`shuffle_seed`	The seed used for deterministic shuffling.
`vocab_file`
`vocab_size`
`whole_word_masking`
`data_processor`

disable_nsp = False#: Whether Next Sentence Prediction (NSP) objective is disabled.

buckets = None#: A list of bucket boundaries. If set to None, then no bucketing will happen, and data will be batched normally. If set to a list, then data will be grouped into len(buckets) + 1 buckets. A sample s will go into bucket i if buckets[i-1] <= element_length_fn(s) < buckets[i] where 0 and inf are the implied lowest and highest boundaries respectively. buckets must be sorted and all elements must be non-zero.

shuffle_buffer = None#: Buffer size to shuffle samples across. If None and shuffle is enabled, 10*batch_size is used.

persistent_workers = False#: Whether or not to keep workers persistent between epochs.

drop_last = True#: Whether to drop last batch of epoch if it’s an incomplete batch.

cerebras.modelzoo.data.nlp.bert.BertCSVDataProcessor.BertCSVDataProcessor

cerebras.modelzoo.data.nlp.bert.BertCSVDynamicMaskDataProcessor