cerebras.modelzoo.data_preparation.nlp.pubmed.TextSharding.Sharding#

class cerebras.modelzoo.data_preparation.nlp.pubmed.TextSharding.Sharding[source]#

Bases: object

Methods

distribute_articles_over_shards

get_sentences_per_shard

init_output_files

load_articles

segment_articles_into_sentences

write_shards_to_disk

write_single_shard

__init__(input_files, output_name_prefix, n_training_shards, n_test_shards, fraction_test_set)[source]#