23 lines
432 B
YAML
23 lines
432 B
YAML
workload: "sft"
|
||
|
||
submission_id: ""
|
||
|
||
code_path: "/private/common/code/verl/verl_repo"
|
||
|
||
model_id: "Qwen/Qwen2.5-0.5B-Instruct"
|
||
|
||
train_file: "/private/datasets/gsm8k_sft/train.parquet"
|
||
val_file: null
|
||
|
||
nnodes: 2
|
||
n_gpus_per_node: 4
|
||
|
||
total_epochs: 1
|
||
total_training_steps: 10
|
||
|
||
save_freq: 10
|
||
|
||
# SFT driver 默认不分配 GPU(ray job entrypoint 不指定 entrypoint_num_gpus),因此 driver 侧不要依赖 CUDA
|
||
trainer_device: "cpu"
|
||
|