workload: "sft" submission_id: "" code_path: "/private/common/code/verl/verl_repo" model_id: "Qwen/Qwen2.5-0.5B-Instruct" train_file: "/private/datasets/gsm8k_sft/train.parquet" val_file: null nnodes: 2 n_gpus_per_node: 4 total_epochs: 1 total_training_steps: 10 save_freq: 10 # SFT driver 默认不分配 GPU(ray job entrypoint 不指定 entrypoint_num_gpus),因此 driver 侧不要依赖 CUDA trainer_device: "cpu"