workload: "grpo" submission_id: "" code_path: "/private/common/code/verl/verl_repo" model_id: "Qwen/Qwen2.5-0.5B-Instruct" train_file: "/private/common/datasets/gsm8k/train.parquet" val_file: "/private/common/datasets/gsm8k/test.parquet" nnodes: 2 n_gpus_per_node: 4 total_epochs: 1 total_training_steps: 10 save_freq: 10 test_freq: -1