From 8f168ecbef787cae6b4cc41431c96790a6e26c79 Mon Sep 17 00:00:00 2001 From: yuyr Date: Wed, 25 Jun 2025 17:00:47 +0800 Subject: [PATCH] test swift for qwen3 math --- .gitignore | 2 ++ README.md | 10 ++++++++++ swift_client.sh | 16 ++++++++++++++++ swift_server.sh | 5 +++++ 4 files changed, 33 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 swift_client.sh create mode 100644 swift_server.sh diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..99f7cb1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +output/ +result/ \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..7177607 --- /dev/null +++ b/README.md @@ -0,0 +1,10 @@ +# 试验使用swift 对qwen3-8b进行grpo训练 +- 数据集:modelscope提供数学 +- 启动方法:external模式,因为在a6000上使用colocate方式启动会报内存不足,因此使用2张卡跑vllm,2张卡跑训练,基本都跑满 +```bash +# start server +sh swift_server.sh # 要等到看到vllm服务启动 + +# start client +sh swift_client.sh # 启动训练任务 +``` \ No newline at end of file diff --git a/swift_client.sh b/swift_client.sh new file mode 100644 index 0000000..ba7b8a2 --- /dev/null +++ b/swift_client.sh @@ -0,0 +1,16 @@ +CUDA_VISIBLE_DEVICES=2,3 \ +NPROC_PER_NODE=2 \ +swift rlhf \ + --rlhf_type grpo \ + --model /data1/yuyr/qwen3-8b \ + --dataset AI-MO/NuminaMath-TIR#5000 \ + --reward_funcs accuracy cosine \ + --use_vllm true \ + --vllm_mode server \ + --vllm_server_host localhost \ + --vllm_server_port 8000 \ + --per_device_train_batch_size 8 \ + --per_device_eval_batch_size 8 \ + --async_generate true \ + --num_generations 4 \ + --deepspeed zero3 diff --git a/swift_server.sh b/swift_server.sh new file mode 100644 index 0000000..c8bb4d0 --- /dev/null +++ b/swift_server.sh @@ -0,0 +1,5 @@ +CUDA_VISIBLE_DEVICES=0,1 \ +swift rollout \ + --model /data1/yuyr/qwen3-8b \ + --tensor_parallel_size 2 \ + --port 8000