From 054ed3612d5432a22ab5a1534458d820638969a6 Mon Sep 17 00:00:00 2001 From: yuyr Date: Sun, 4 Jan 2026 16:48:12 +0800 Subject: [PATCH] =?UTF-8?q?V3.5=E5=A2=9E=E5=8A=A0=E8=A1=A8=E5=8D=95?= =?UTF-8?q?=E6=A8=A1=E5=BC=8F=E5=88=9B=E5=BB=BA=E4=BB=BB=E5=8A=A1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/mvp/py/argus/service/advanced_command.py | 7 +- src/mvp/py/argus/service/ui.py | 391 ++++++++++++++++++- src/mvp/py/tests/test_advanced_command.py | 3 +- src/mvp/py/tests/test_ui.py | 2 + 4 files changed, 389 insertions(+), 14 deletions(-) diff --git a/src/mvp/py/argus/service/advanced_command.py b/src/mvp/py/argus/service/advanced_command.py index 4ee699a..07656ba 100644 --- a/src/mvp/py/argus/service/advanced_command.py +++ b/src/mvp/py/argus/service/advanced_command.py @@ -43,8 +43,10 @@ def validate_advanced_command(*, user_id: str, expanded_command: str) -> None: - If reward path is present, must be under /private/users//code/... """ - if "python3" not in expanded_command or "-m verl.trainer." not in expanded_command: - raise ValueError("command must include 'python3' and '-m verl.trainer.'") + # v3.5: advanced commands must still be scoped to VERL entrypoints (not arbitrary shell). + # Allow non-trainer utilities like `verl.model_merger`. + if "python3" not in expanded_command or "-m verl." not in expanded_command: + raise ValueError("command must include 'python3' and '-m verl.'") for other in _RE_PRIVATE_USER.findall(expanded_command): if other != user_id: @@ -72,4 +74,3 @@ def validate_advanced_command(*, user_id: str, expanded_command: str) -> None: allow = f"/private/users/{user_id}/code/" if not path.startswith(allow): raise ValueError(f"reward path must be under {allow}, got: {path}") - diff --git a/src/mvp/py/argus/service/ui.py b/src/mvp/py/argus/service/ui.py index 133a3a8..8a41ef4 100644 --- a/src/mvp/py/argus/service/ui.py +++ b/src/mvp/py/argus/service/ui.py @@ -24,11 +24,14 @@ a { color:var(--accent); text-decoration:none; } .muted { color: var(--muted); } .btn { border: 1px solid rgba(255,255,255,0.16); background: rgba(255,255,255,0.06); color: var(--fg); padding: 10px 12px; border-radius: 10px; cursor: pointer; } .btn:hover { background: rgba(255,255,255,0.10); } +.btn.active { background: rgba(122,162,255,0.14); border-color: rgba(122,162,255,0.22); } .btn.danger { border-color: rgba(255,107,107,0.35); background: rgba(255,107,107,0.10); } .pill { display:inline-block; padding: 2px 10px; border-radius: 999px; border: 1px solid rgba(255,255,255,0.16); font-size: 12px; } .pill.ok { border-color: rgba(61,220,151,0.35); background: rgba(61,220,151,0.12); } .pill.bad { border-color: rgba(255,107,107,0.35); background: rgba(255,107,107,0.12); } textarea, input { width: 100%; color: var(--fg); background: rgba(255,255,255,0.06); border: 1px solid rgba(255,255,255,0.12); border-radius: 12px; padding: 10px 12px; outline: none; } +select { color: var(--fg); background: rgba(255,255,255,0.06); border: 1px solid rgba(255,255,255,0.12); border-radius: 12px; padding: 10px 12px; outline: none; } +select option { color: var(--fg); background: #0e1630; } button:disabled { opacity: .45; cursor: not-allowed; } pre { white-space: pre-wrap; word-break: break-word; } table { width:100%; border-collapse: collapse; } @@ -174,13 +177,38 @@ def register_ui_routes(app: FastAPI) -> None:
+
+
+

User Info

+
Shown after login via /api/v2/me.
+
+
+ +
+
+
(not loaded)
+
""".strip() script = """ const tokEl = document.getElementById("tok"); const msg = document.getElementById("msg"); +const me = document.getElementById("me"); tokEl.value = mvpTokenGet(); -document.getElementById("save").onclick = () => { mvpTokenSet(tokEl.value); msg.textContent = "Saved."; }; -document.getElementById("clear").onclick = () => { mvpTokenSet(""); tokEl.value = ""; msg.textContent = "Cleared."; }; + +async function refreshMe() { + me.textContent = "Loading..."; + try { + const obj = await apiJson("/api/v2/me"); + me.textContent = fmtJson(obj); + } catch (e) { + me.textContent = "Error: " + (e.status || "") + "\\n" + (e.body || String(e)); + } +} + +document.getElementById("me-refresh").onclick = refreshMe; +document.getElementById("save").onclick = () => { mvpTokenSet(tokEl.value); msg.textContent = "Saved."; refreshMe(); }; +document.getElementById("clear").onclick = () => { mvpTokenSet(""); tokEl.value = ""; msg.textContent = "Cleared."; me.textContent = "(not loaded)"; }; +if (mvpTokenGet()) refreshMe(); """.strip() return HTMLResponse(content=_page("Login", "login", body, script)) @@ -380,6 +408,24 @@ command: | # command 里增加如下 overrides: # custom_reward_function.path=$HOME/code/reward.py # custom_reward_function.name=compute_score +""".strip() + merge = """# Model Merge (YAML) - v3.5 (Advanced command) +# 用途:将 VERL 训练产生的 FSDP 分片 checkpoint 合并为 HuggingFace 格式目录。 +# +# 你需要把 替换成真实值: +# - submission id:在 Tasks 详情页里看到的 `ray_submission_id`(如 mvp2-...--a01) +# - global_step:对应 checkpoints 下的 global_step_xxx 目录(如 global_step_10) +# +# 注意:这里不需要 GPU,建议把 n_gpus_per_node 设为 0,这样不受训练任务占用 GPU 的影响。 +kind: advanced +nnodes: 1 +n_gpus_per_node: 0 + +command: | + python3 -m verl.model_merger merge \ + --backend fsdp \ + --local_dir $HOME/jobs//checkpoints//actor \ + --target_dir $HOME/jobs//checkpoints//actor/huggingface """.strip() body = f"""

New Task

@@ -389,14 +435,161 @@ command: | Basic tasks require code_path; Advanced tasks use kind: advanced with a custom command.
+
+ + +
+
+ +
+
+
+ + +
@@ -410,6 +603,7 @@ command: | tpl_grpo = json.dumps(grpo) tpl_sft = json.dumps(sft) tpl_adv = json.dumps(adv) + tpl_merge = json.dumps(merge) script = ( """ const msg = document.getElementById("msg"); @@ -418,13 +612,189 @@ const TPL_PPO = __TPL_PPO__; const TPL_GRPO = __TPL_GRPO__; const TPL_SFT = __TPL_SFT__; const TPL_ADV = __TPL_ADV__; +const TPL_MERGE = __TPL_MERGE__; document.getElementById("tpl-ppo").onclick = () => { yamlEl.value = TPL_PPO; msg.textContent = ""; }; document.getElementById("tpl-grpo").onclick = () => { yamlEl.value = TPL_GRPO; msg.textContent = ""; }; document.getElementById("tpl-sft").onclick = () => { yamlEl.value = TPL_SFT; msg.textContent = ""; }; document.getElementById("tpl-adv").onclick = () => { yamlEl.value = TPL_ADV; msg.textContent = ""; }; +document.getElementById("tpl-merge").onclick = () => { yamlEl.value = TPL_MERGE; msg.textContent = ""; }; + +function yamlQuote(s) { + s = String(s ?? ""); + return '"' + s.replaceAll('\\\\', '\\\\\\\\').replaceAll('"', '\\\\"') + '"'; +} + +function buildYamlFromForm() { + const tpl = document.getElementById("f-tpl").value; + if (tpl === "advanced" || tpl === "merge") { + const nn = Number(document.getElementById("f-adv-nnodes").value || "1"); + const gpn = Number(document.getElementById("f-adv-gpn").value || "0"); + const cmd = String(document.getElementById("f-command").value || "").replace(/\\r/g, ""); + const lines = cmd.split("\\n"); + const indented = lines.map(l => " " + l).join("\\n"); + return `kind: advanced\\n` + `nnodes: ${nn}\\n` + `n_gpus_per_node: ${gpn}\\n\\n` + `command: |\\n${indented}\\n`; + } + + const workload = tpl; + const codePath = document.getElementById("f-code-path").value || ""; + const modelId = document.getElementById("f-model-id").value || ""; + const trainFile = document.getElementById("f-train-file").value || ""; + const valFile = document.getElementById("f-val-file").value || ""; + const nn = Number(document.getElementById("f-nnodes").value || "2"); + const gpn = Number(document.getElementById("f-gpn").value || "4"); + const epochs = Number(document.getElementById("f-epochs").value || "1"); + const stepsRaw = String(document.getElementById("f-steps").value || "").trim(); + const saveFreq = Number(document.getElementById("f-save-freq").value || "10"); + const testFreqRaw = String(document.getElementById("f-test-freq").value || "").trim(); + + let y = ""; + y += `workload: ${workload}\\n`; + y += `code_path: ${yamlQuote(codePath)}\\n`; + y += `model_id: ${yamlQuote(modelId)}\\n`; + y += `train_file: ${yamlQuote(trainFile)}\\n`; + y += `val_file: ${yamlQuote(valFile)}\\n`; + y += `nnodes: ${nn}\\n`; + y += `n_gpus_per_node: ${gpn}\\n`; + y += `total_epochs: ${epochs}\\n`; + if (stepsRaw) y += `total_training_steps: ${Number(stepsRaw)}\\n`; + y += `save_freq: ${saveFreq}\\n`; + if (testFreqRaw) y += `test_freq: ${Number(testFreqRaw)}\\n`; + if (tpl === "sft") { + const dev = document.getElementById("f-trainer-device").value || "cpu"; + y += `trainer_device: ${dev}\\n`; + } + return y; +} + +function updateFormVisibility() { + const tpl = document.getElementById("f-tpl").value; + const basic = document.getElementById("f-basic"); + const adv = document.getElementById("f-adv"); + const sftWrap = document.getElementById("f-sft-device-wrap"); + if (tpl === "advanced" || tpl === "merge") { + basic.style.display = "none"; + adv.style.display = "block"; + } else { + basic.style.display = "block"; + adv.style.display = "none"; + } + if (tpl === "sft") { + sftWrap.style.display = "block"; + } else { + sftWrap.style.display = "none"; + } +} + +function updatePreview() { + const text = buildYamlFromForm(); + document.getElementById("f-preview").textContent = text; + return text; +} + +function setBtnActive(id, on) { + const el = document.getElementById(id); + if (!el) return; + if (on) el.classList.add("active"); + else el.classList.remove("active"); +} + +function showMode(mode) { + const yamlPanel = document.getElementById("panel-yaml"); + const formPanel = document.getElementById("panel-form"); + if (mode === "form") { + yamlPanel.style.display = "none"; + formPanel.style.display = "block"; + setBtnActive("mode-yaml", false); + setBtnActive("mode-form", true); + updateFormVisibility(); + updatePreview(); + } else { + yamlPanel.style.display = "block"; + formPanel.style.display = "none"; + setBtnActive("mode-yaml", true); + setBtnActive("mode-form", false); + } +} + +function bindStepper(inputId, decId, incId, minV) { + const el = document.getElementById(inputId); + document.getElementById(decId).onclick = () => { + const v = Number(el.value || "0") - 1; + el.value = String(Math.max(minV, v)); + updatePreview(); + }; + document.getElementById(incId).onclick = () => { + const v = Number(el.value || "0") + 1; + el.value = String(Math.max(minV, v)); + updatePreview(); + }; +} + +// Init defaults (match examples) +document.getElementById("f-code-path").value = "/private/common/code/verl/verl_repo"; +document.getElementById("f-model-id").value = "Qwen/Qwen2.5-0.5B-Instruct"; +document.getElementById("f-train-file").value = "/private/common/datasets/gsm8k/train.parquet"; +document.getElementById("f-val-file").value = "/private/common/datasets/gsm8k/test.parquet"; +document.getElementById("f-nnodes").value = "2"; +document.getElementById("f-gpn").value = "4"; +document.getElementById("f-epochs").value = "1"; +document.getElementById("f-save-freq").value = "10"; +document.getElementById("f-trainer-device").value = "cpu"; + +document.getElementById("f-adv-nnodes").value = "2"; +document.getElementById("f-adv-gpn").value = "4"; +document.getElementById("f-command").value = `PYTHONUNBUFFERED=1 \\\npython3 -m verl.trainer.main_ppo \\\n data.train_files=$HOME/common/datasets/gsm8k/train.parquet \\\n data.val_files=$HOME/common/datasets/gsm8k/test.parquet \\\n +ray_kwargs.ray_init.address=auto`; + +bindStepper("f-nnodes", "f-nnodes-dec", "f-nnodes-inc", 1); +bindStepper("f-gpn", "f-gpn-dec", "f-gpn-inc", 0); +bindStepper("f-epochs", "f-epochs-dec", "f-epochs-inc", 1); +bindStepper("f-adv-nnodes", "f-adv-nnodes-dec", "f-adv-nnodes-inc", 1); +bindStepper("f-adv-gpn", "f-adv-gpn-dec", "f-adv-gpn-inc", 0); + +document.getElementById("f-tpl").onchange = () => { + const tpl = document.getElementById("f-tpl").value; + if (tpl === "ppo") { + document.getElementById("f-train-file").value = "/private/common/datasets/gsm8k/train.parquet"; + document.getElementById("f-val-file").value = "/private/common/datasets/gsm8k/test.parquet"; + } else if (tpl === "grpo") { + document.getElementById("f-train-file").value = "/private/common/datasets/gsm8k/train.parquet"; + document.getElementById("f-val-file").value = "/private/common/datasets/gsm8k/test.parquet"; + } else if (tpl === "sft") { + document.getElementById("f-train-file").value = "/private/common/datasets/gsm8k_sft/train.parquet"; + document.getElementById("f-val-file").value = "/private/common/datasets/gsm8k_sft/test.parquet"; + } else if (tpl === "advanced") { + document.getElementById("f-adv-nnodes").value = "2"; + document.getElementById("f-adv-gpn").value = "4"; + document.getElementById("f-command").value = TPL_ADV.split("command: |")[1]?.trimStart() || document.getElementById("f-command").value; + } else if (tpl === "merge") { + document.getElementById("f-adv-nnodes").value = "1"; + document.getElementById("f-adv-gpn").value = "0"; + document.getElementById("f-command").value = TPL_MERGE.split("command: |")[1]?.trimStart() || document.getElementById("f-command").value; + } + updateFormVisibility(); + updatePreview(); +}; + +for (const id of ["f-code-path","f-model-id","f-train-file","f-val-file","f-nnodes","f-gpn","f-epochs","f-steps","f-save-freq","f-test-freq","f-trainer-device","f-adv-nnodes","f-adv-gpn","f-command"]) { + const el = document.getElementById(id); + if (el) el.oninput = () => updatePreview(); +} + +document.getElementById("mode-yaml").onclick = () => { + // When switching to YAML, sync from form if form is visible. + const formVisible = document.getElementById("panel-form").style.display !== "none"; + if (formVisible) yamlEl.value = updatePreview(); + showMode("yaml"); +}; +document.getElementById("mode-form").onclick = () => { + showMode("form"); +}; + +showMode("yaml"); + document.getElementById("submit").onclick = async () => { msg.textContent = "Submitting..."; - const body = yamlEl.value; + const body = (document.getElementById("panel-form").style.display !== "none") ? updatePreview() : yamlEl.value; const resp = await apiFetch("/api/v2/tasks", { method: "POST", headers: {"Content-Type":"text/plain"}, body }); const text = await resp.text(); if (!resp.ok) { msg.textContent = "Error: " + resp.status + "\\n" + text; return; } @@ -437,6 +807,7 @@ document.getElementById("submit").onclick = async () => { .replace("__TPL_GRPO__", tpl_grpo) .replace("__TPL_SFT__", tpl_sft) .replace("__TPL_ADV__", tpl_adv) + .replace("__TPL_MERGE__", tpl_merge) ) return HTMLResponse(content=_page("New Task", "new", body, script)) @@ -568,11 +939,11 @@ refresh();
-
Loading...
+
""".strip() script = """ -const out = document.getElementById("out"); +const msg = document.getElementById("msg"); document.getElementById("nav-ray-dashboard").href = curOriginWithPort(8265); const u = document.getElementById("u"); const p = document.getElementById("p"); @@ -585,7 +956,7 @@ document.getElementById("copy-p").onclick = async () => { await copyText(p.value async function refresh() { const resp = await apiFetch("/api/v2/me"); const text = await resp.text(); - if (!resp.ok) { out.textContent = "Error: " + resp.status + "\\n" + text; return; } + if (!resp.ok) { msg.textContent = "Error: " + resp.status + "\\n" + text; return; } const obj = JSON.parse(text); u.value = (obj.user_id || ""); const cached = mvpSftpPasswordGet(); @@ -594,17 +965,19 @@ async function refresh() { sftpWeb.href = host + "/web/client"; sftpHost.textContent = (obj.sftp && obj.sftp.host) ? obj.sftp.host : window.location.hostname; sftpPort.textContent = (obj.sftp && obj.sftp.port) ? String(obj.sftp.port) : "2022"; - out.textContent = fmtJson(obj); + msg.textContent = ""; } document.getElementById("reset-p").onclick = async () => { p.value = ""; + mvpSftpPasswordSet(""); + msg.textContent = "Resetting..."; const resp = await apiFetch("/api/v2/me/sftp:reset_password", { method: "POST" }); const text = await resp.text(); - if (!resp.ok) { out.textContent = "Error: " + resp.status + "\\n" + text; return; } + if (!resp.ok) { msg.textContent = "Error: " + resp.status + "\\n" + text; return; } const obj = JSON.parse(text); p.value = obj.password || ""; mvpSftpPasswordSet(p.value); - out.textContent = "SFTPGo password rotated.\\n\\n" + fmtJson(obj); + msg.textContent = "SFTPGo password rotated."; }; refresh(); """.strip() diff --git a/src/mvp/py/tests/test_advanced_command.py b/src/mvp/py/tests/test_advanced_command.py index 118e848..ad0a7c0 100644 --- a/src/mvp/py/tests/test_advanced_command.py +++ b/src/mvp/py/tests/test_advanced_command.py @@ -25,7 +25,7 @@ def test_expand_maps_home_to_private_users(): def test_validate_requires_verl_trainer_entry(): - with pytest.raises(ValueError, match="verl\\.trainer"): + with pytest.raises(ValueError, match="-m verl\\."): validate_advanced_command(user_id="alice", expanded_command="echo hi") @@ -71,4 +71,3 @@ def test_validate_rejects_reward_path_outside_user_code(): ) with pytest.raises(ValueError, match="reward path must be under"): validate_advanced_command(user_id="alice", expanded_command=cmd) - diff --git a/src/mvp/py/tests/test_ui.py b/src/mvp/py/tests/test_ui.py index 24d306d..8255fbf 100644 --- a/src/mvp/py/tests/test_ui.py +++ b/src/mvp/py/tests/test_ui.py @@ -93,3 +93,5 @@ def test_ui_new_task_contains_advanced_example_snippet(tmp_path, monkeypatch): # workload is not needed for advanced in v3.5. assert "# workload:" not in r.text assert "actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu" in r.text + assert "Model merge example" in r.text + assert "verl.model_merger" in r.text