swift_test/swift_client.sh
2025-06-25 17:00:47 +08:00

17 lines
459 B
Bash

CUDA_VISIBLE_DEVICES=2,3 \
NPROC_PER_NODE=2 \
swift rlhf \
--rlhf_type grpo \
--model /data1/yuyr/qwen3-8b \
--dataset AI-MO/NuminaMath-TIR#5000 \
--reward_funcs accuracy cosine \
--use_vllm true \
--vllm_mode server \
--vllm_server_host localhost \
--vllm_server_port 8000 \
--per_device_train_batch_size 8 \
--per_device_eval_batch_size 8 \
--async_generate true \
--num_generations 4 \
--deepspeed zero3