cerebras.modelzoo.config_manager.config_classes.base.run_config#

Config classes of Run Configs

Classes

PytorchProfilerConfig

PytorchProfilerConfig(start_step: int = <object object at 0x7f592a106820>, end_step: int = <object object at 0x7f592a106820>)

RunConfig

RunConfig(steps_per_epoch: Optional[int] = None, max_steps: Optional[int] = None, mgmt_address: Optional[str] = None, mount_dirs: Optional[List[str]] = None, num_epochs: Optional[int] = None, python_paths: Optional[List[str]] = None, compile_dir: Optional[str] = None, checkpoint_path: Optional[str] = None, credentials_path: Optional[str] = None, debug_args_path: Optional[str] = None, retrace_every_iteration: Optional[bool] = None, eval_steps: Optional[int] = None, init_method: str = 'env://', job_time_sec: Optional[int] = None, job_labels: Optional[List[str]] = None, job_priority: Optional[str] = None, seed: Optional[int] = None, mgmt_namespace: Optional[str] = None, load_checkpoint_states: Optional[str] = None, target_device: Literal['CPU', 'GPU', 'CSX'] = 'CSX', mode: Literal['train', 'eval', 'eval_all', 'sideband_eval_all', 'train_and_eval', 'sideband_train_and_eval', 'inference'] = 'train', wsc_log_level: Union[Literal['INFO', 'DEBUG', 'VERBOSE', '20', '10'], dict, NoneType] = 'INFO', autoload_last_checkpoint: Optional[bool] = True, check_loss_values: bool = True, disable_strict_checkpoint_loading: Optional[bool] = False, dist_addr: str = 'localhost:8888', dist_backend: str = 'nccl', checkpoint_steps: Optional[int] = None, disable_version_check: Optional[bool] = False, drop_data: Optional[bool] = False, enable_distributed: bool = False, model_dir: str = './model_dir', save_initial_checkpoint: bool = False, precision_opt_level: Optional[int] = None, num_workers_per_csx: int = 0, validate_only: Optional[bool] = False, logging: Optional[str] = 'INFO', sync_batchnorm: bool = False, compile_only: Optional[bool] = False, log_steps: int = 1, num_steps: Optional[int] = None, transfer_processes: Optional[int] = None, num_wgt_servers: Optional[int] = None, num_csx: int = 1, num_act_servers: Optional[int] = 1, eval_frequency: Optional[int] = None, execute_crd_memory_gi: Optional[int] = None, compile_crd_memory_gi: Optional[int] = None, op_profiler_config: Optional[cerebras.modelzoo.config_manager.config_classes.base.run_config.PytorchProfilerConfig] = None, dump_activations: bool = False, log_input_summaries: bool = False, main_process_id: int = 0, max_checkpoints: Optional[int] = None, summary_dir: Optional[str] = None, lazy_initialization: bool = True, use_cstorch_optimizer_step: bool = False, wrk_memory_gi: Optional[int] = None, act_memory_gi: Optional[int] = None, cmd_memory_gi: Optional[int] = None, wgt_memory_gi: Optional[int] = None, experimental: dict = <factory>, ini: Optional[Dict[str, Union[bool, int, float, str]]] = None, debug_args: Optional[Dict[str, Union[bool, int, float, str]]] = None)