V3.5增加表单模式创建任务

This commit is contained in:
yuyr 2026-01-04 16:48:12 +08:00
parent 5d0d849c28
commit 054ed3612d
4 changed files with 389 additions and 14 deletions

View File

@ -43,8 +43,10 @@ def validate_advanced_command(*, user_id: str, expanded_command: str) -> None:
- If reward path is present, must be under /private/users/<me>/code/...
"""
if "python3" not in expanded_command or "-m verl.trainer." not in expanded_command:
raise ValueError("command must include 'python3' and '-m verl.trainer.'")
# v3.5: advanced commands must still be scoped to VERL entrypoints (not arbitrary shell).
# Allow non-trainer utilities like `verl.model_merger`.
if "python3" not in expanded_command or "-m verl." not in expanded_command:
raise ValueError("command must include 'python3' and '-m verl.'")
for other in _RE_PRIVATE_USER.findall(expanded_command):
if other != user_id:
@ -72,4 +74,3 @@ def validate_advanced_command(*, user_id: str, expanded_command: str) -> None:
allow = f"/private/users/{user_id}/code/"
if not path.startswith(allow):
raise ValueError(f"reward path must be under {allow}, got: {path}")

View File

@ -24,11 +24,14 @@ a { color:var(--accent); text-decoration:none; }
.muted { color: var(--muted); }
.btn { border: 1px solid rgba(255,255,255,0.16); background: rgba(255,255,255,0.06); color: var(--fg); padding: 10px 12px; border-radius: 10px; cursor: pointer; }
.btn:hover { background: rgba(255,255,255,0.10); }
.btn.active { background: rgba(122,162,255,0.14); border-color: rgba(122,162,255,0.22); }
.btn.danger { border-color: rgba(255,107,107,0.35); background: rgba(255,107,107,0.10); }
.pill { display:inline-block; padding: 2px 10px; border-radius: 999px; border: 1px solid rgba(255,255,255,0.16); font-size: 12px; }
.pill.ok { border-color: rgba(61,220,151,0.35); background: rgba(61,220,151,0.12); }
.pill.bad { border-color: rgba(255,107,107,0.35); background: rgba(255,107,107,0.12); }
textarea, input { width: 100%; color: var(--fg); background: rgba(255,255,255,0.06); border: 1px solid rgba(255,255,255,0.12); border-radius: 12px; padding: 10px 12px; outline: none; }
select { color: var(--fg); background: rgba(255,255,255,0.06); border: 1px solid rgba(255,255,255,0.12); border-radius: 12px; padding: 10px 12px; outline: none; }
select option { color: var(--fg); background: #0e1630; }
button:disabled { opacity: .45; cursor: not-allowed; }
pre { white-space: pre-wrap; word-break: break-word; }
table { width:100%; border-collapse: collapse; }
@ -174,13 +177,38 @@ def register_ui_routes(app: FastAPI) -> None:
<div style="height:10px"></div>
<div id="msg" class="muted"></div>
</div>
<div style="height:12px"></div>
<div class="card">
<h3 style="margin-top:0">User Info</h3>
<div class="muted">Shown after login via <code>/api/v2/me</code>.</div>
<div style="height:10px"></div>
<div class="row">
<button class="btn" id="me-refresh">Refresh</button>
</div>
<div style="height:10px"></div>
<pre id="me" class="muted">(not loaded)</pre>
</div>
""".strip()
script = """
const tokEl = document.getElementById("tok");
const msg = document.getElementById("msg");
const me = document.getElementById("me");
tokEl.value = mvpTokenGet();
document.getElementById("save").onclick = () => { mvpTokenSet(tokEl.value); msg.textContent = "Saved."; };
document.getElementById("clear").onclick = () => { mvpTokenSet(""); tokEl.value = ""; msg.textContent = "Cleared."; };
async function refreshMe() {
me.textContent = "Loading...";
try {
const obj = await apiJson("/api/v2/me");
me.textContent = fmtJson(obj);
} catch (e) {
me.textContent = "Error: " + (e.status || "") + "\\n" + (e.body || String(e));
}
}
document.getElementById("me-refresh").onclick = refreshMe;
document.getElementById("save").onclick = () => { mvpTokenSet(tokEl.value); msg.textContent = "Saved."; refreshMe(); };
document.getElementById("clear").onclick = () => { mvpTokenSet(""); tokEl.value = ""; msg.textContent = "Cleared."; me.textContent = "(not loaded)"; };
if (mvpTokenGet()) refreshMe();
""".strip()
return HTMLResponse(content=_page("Login", "login", body, script))
@ -380,6 +408,24 @@ command: |
# command 里增加如下 overrides
# custom_reward_function.path=$HOME/code/reward.py
# custom_reward_function.name=compute_score
""".strip()
merge = """# Model Merge (YAML) - v3.5 (Advanced command)
# 用途:将 VERL 训练产生的 FSDP 分片 checkpoint 合并为 HuggingFace 格式目录。
#
# 你需要把 <RAY_SUBMISSION_ID> 和 <GLOBAL_STEP> 替换成真实值:
# - submission id在 Tasks 详情页里看到的 `ray_submission_id`(如 mvp2-...--a01
# - global_step对应 checkpoints 下的 global_step_xxx 目录(如 global_step_10
#
# 注意:这里不需要 GPU建议把 n_gpus_per_node 设为 0这样不受训练任务占用 GPU 的影响。
kind: advanced
nnodes: 1
n_gpus_per_node: 0
command: |
python3 -m verl.model_merger merge \
--backend fsdp \
--local_dir $HOME/jobs/<RAY_SUBMISSION_ID>/checkpoints/<GLOBAL_STEP>/actor \
--target_dir $HOME/jobs/<RAY_SUBMISSION_ID>/checkpoints/<GLOBAL_STEP>/actor/huggingface
""".strip()
body = f"""
<h1>New Task</h1>
@ -389,14 +435,161 @@ command: |
Basic tasks require <code>code_path</code>; Advanced tasks use <code>kind: advanced</code> with a custom <code>command</code>.
</div>
<div style="height:10px"></div>
<div class="row">
<button class="btn" id="mode-yaml">YAML 模式</button>
<button class="btn" id="mode-form">表单模式</button>
</div>
<div style="height:10px"></div>
<div id="panel-yaml">
<div class="row">
<button class="btn" id="tpl-ppo">PPO example</button>
<button class="btn" id="tpl-grpo">GRPO example</button>
<button class="btn" id="tpl-sft">SFT example</button>
<button class="btn" id="tpl-adv">Advanced example</button>
<button class="btn" id="tpl-merge">Model merge example</button>
</div>
<div style="height:10px"></div>
<textarea id="yaml" rows="16">{html.escape(ppo)}</textarea>
</div>
<div id="panel-form" style="display:none">
<div class="row">
<div style="min-width:240px">
<div class="muted">模板</div>
<div style="height:6px"></div>
<select id="f-tpl" style="width:240px; padding:10px 12px; border-radius: 12px; background: rgba(255,255,255,0.06); color: var(--fg); border: 1px solid rgba(255,255,255,0.12);">
<option value="ppo">PPO</option>
<option value="grpo">GRPO</option>
<option value="sft">SFT</option>
<option value="advanced">Advanced</option>
<option value="merge">Model Merge</option>
</select>
</div>
<div class="muted" style="flex:1; min-width:260px">
表单模式会生成 TaskSpec YAML 并用同一个 API 提交可随时切回 YAML 模式查看/编辑生成结果
</div>
</div>
<div style="height:14px"></div>
<div id="f-basic">
<div class="row">
<div style="flex:1; min-width:260px">
<div class="muted">code_path</div>
<div style="height:6px"></div>
<input id="f-code-path" />
</div>
<div style="flex:1; min-width:260px">
<div class="muted">model_id</div>
<div style="height:6px"></div>
<input id="f-model-id" />
</div>
</div>
<div style="height:10px"></div>
<div class="row">
<div style="flex:1; min-width:260px">
<div class="muted">train_file</div>
<div style="height:6px"></div>
<input id="f-train-file" />
</div>
<div style="flex:1; min-width:260px">
<div class="muted">val_file</div>
<div style="height:6px"></div>
<input id="f-val-file" />
</div>
</div>
<div style="height:10px"></div>
<div class="row">
<div style="min-width:220px">
<div class="muted">nnodes</div>
<div style="height:6px"></div>
<div class="row" style="gap:8px">
<button class="btn" id="f-nnodes-dec">-</button>
<input id="f-nnodes" type="number" min="1" step="1" style="width:120px" />
<button class="btn" id="f-nnodes-inc">+</button>
</div>
</div>
<div style="min-width:240px">
<div class="muted">n_gpus_per_node</div>
<div style="height:6px"></div>
<div class="row" style="gap:8px">
<button class="btn" id="f-gpn-dec">-</button>
<input id="f-gpn" type="number" min="0" step="1" style="width:120px" />
<button class="btn" id="f-gpn-inc">+</button>
</div>
</div>
<div style="min-width:220px">
<div class="muted">total_epochs</div>
<div style="height:6px"></div>
<div class="row" style="gap:8px">
<button class="btn" id="f-epochs-dec">-</button>
<input id="f-epochs" type="number" min="1" step="1" style="width:120px" />
<button class="btn" id="f-epochs-inc">+</button>
</div>
</div>
<div style="min-width:260px">
<div class="muted">total_training_steps可选</div>
<div style="height:6px"></div>
<input id="f-steps" type="number" min="1" step="1" placeholder="留空表示不传(让 VERL 自动推导)" />
</div>
</div>
<div style="height:10px"></div>
<div class="row">
<div style="min-width:220px">
<div class="muted">save_freq</div>
<div style="height:6px"></div>
<input id="f-save-freq" type="number" min="1" step="1" style="width:160px" />
</div>
<div style="min-width:260px">
<div class="muted">test_freq可选</div>
<div style="height:6px"></div>
<input id="f-test-freq" type="number" min="1" step="1" placeholder="留空表示不传(训练端会当成 -1=不验证)" />
</div>
<div id="f-sft-device-wrap" style="min-width:260px; display:none">
<div class="muted">trainer_device SFT</div>
<div style="height:6px"></div>
<select id="f-trainer-device" style="width:200px; padding:10px 12px; border-radius: 12px; background: rgba(255,255,255,0.06); color: var(--fg); border: 1px solid rgba(255,255,255,0.12);">
<option value="cpu">cpu</option>
<option value="cuda">cuda</option>
</select>
</div>
</div>
</div>
<div id="f-adv" style="display:none">
<div class="row">
<div style="min-width:220px">
<div class="muted">nnodes</div>
<div style="height:6px"></div>
<div class="row" style="gap:8px">
<button class="btn" id="f-adv-nnodes-dec">-</button>
<input id="f-adv-nnodes" type="number" min="1" step="1" style="width:120px" />
<button class="btn" id="f-adv-nnodes-inc">+</button>
</div>
</div>
<div style="min-width:240px">
<div class="muted">n_gpus_per_node</div>
<div style="height:6px"></div>
<div class="row" style="gap:8px">
<button class="btn" id="f-adv-gpn-dec">-</button>
<input id="f-adv-gpn" type="number" min="0" step="1" style="width:120px" />
<button class="btn" id="f-adv-gpn-inc">+</button>
</div>
</div>
</div>
<div style="height:10px"></div>
<div class="muted">commandbash多行可用 <code>\\</code> 续行</div>
<div style="height:6px"></div>
<textarea id="f-command" rows="14"></textarea>
</div>
<div style="height:10px"></div>
<div class="muted">预览生成的 YAML</div>
<div style="height:6px"></div>
<pre id="f-preview" class="muted"></pre>
</div>
<div style="height:10px"></div>
<div class="row">
<button class="btn" id="submit">Submit</button>
@ -410,6 +603,7 @@ command: |
tpl_grpo = json.dumps(grpo)
tpl_sft = json.dumps(sft)
tpl_adv = json.dumps(adv)
tpl_merge = json.dumps(merge)
script = (
"""
const msg = document.getElementById("msg");
@ -418,13 +612,189 @@ const TPL_PPO = __TPL_PPO__;
const TPL_GRPO = __TPL_GRPO__;
const TPL_SFT = __TPL_SFT__;
const TPL_ADV = __TPL_ADV__;
const TPL_MERGE = __TPL_MERGE__;
document.getElementById("tpl-ppo").onclick = () => { yamlEl.value = TPL_PPO; msg.textContent = ""; };
document.getElementById("tpl-grpo").onclick = () => { yamlEl.value = TPL_GRPO; msg.textContent = ""; };
document.getElementById("tpl-sft").onclick = () => { yamlEl.value = TPL_SFT; msg.textContent = ""; };
document.getElementById("tpl-adv").onclick = () => { yamlEl.value = TPL_ADV; msg.textContent = ""; };
document.getElementById("tpl-merge").onclick = () => { yamlEl.value = TPL_MERGE; msg.textContent = ""; };
function yamlQuote(s) {
s = String(s ?? "");
return '"' + s.replaceAll('\\\\', '\\\\\\\\').replaceAll('"', '\\\\"') + '"';
}
function buildYamlFromForm() {
const tpl = document.getElementById("f-tpl").value;
if (tpl === "advanced" || tpl === "merge") {
const nn = Number(document.getElementById("f-adv-nnodes").value || "1");
const gpn = Number(document.getElementById("f-adv-gpn").value || "0");
const cmd = String(document.getElementById("f-command").value || "").replace(/\\r/g, "");
const lines = cmd.split("\\n");
const indented = lines.map(l => " " + l).join("\\n");
return `kind: advanced\\n` + `nnodes: ${nn}\\n` + `n_gpus_per_node: ${gpn}\\n\\n` + `command: |\\n${indented}\\n`;
}
const workload = tpl;
const codePath = document.getElementById("f-code-path").value || "";
const modelId = document.getElementById("f-model-id").value || "";
const trainFile = document.getElementById("f-train-file").value || "";
const valFile = document.getElementById("f-val-file").value || "";
const nn = Number(document.getElementById("f-nnodes").value || "2");
const gpn = Number(document.getElementById("f-gpn").value || "4");
const epochs = Number(document.getElementById("f-epochs").value || "1");
const stepsRaw = String(document.getElementById("f-steps").value || "").trim();
const saveFreq = Number(document.getElementById("f-save-freq").value || "10");
const testFreqRaw = String(document.getElementById("f-test-freq").value || "").trim();
let y = "";
y += `workload: ${workload}\\n`;
y += `code_path: ${yamlQuote(codePath)}\\n`;
y += `model_id: ${yamlQuote(modelId)}\\n`;
y += `train_file: ${yamlQuote(trainFile)}\\n`;
y += `val_file: ${yamlQuote(valFile)}\\n`;
y += `nnodes: ${nn}\\n`;
y += `n_gpus_per_node: ${gpn}\\n`;
y += `total_epochs: ${epochs}\\n`;
if (stepsRaw) y += `total_training_steps: ${Number(stepsRaw)}\\n`;
y += `save_freq: ${saveFreq}\\n`;
if (testFreqRaw) y += `test_freq: ${Number(testFreqRaw)}\\n`;
if (tpl === "sft") {
const dev = document.getElementById("f-trainer-device").value || "cpu";
y += `trainer_device: ${dev}\\n`;
}
return y;
}
function updateFormVisibility() {
const tpl = document.getElementById("f-tpl").value;
const basic = document.getElementById("f-basic");
const adv = document.getElementById("f-adv");
const sftWrap = document.getElementById("f-sft-device-wrap");
if (tpl === "advanced" || tpl === "merge") {
basic.style.display = "none";
adv.style.display = "block";
} else {
basic.style.display = "block";
adv.style.display = "none";
}
if (tpl === "sft") {
sftWrap.style.display = "block";
} else {
sftWrap.style.display = "none";
}
}
function updatePreview() {
const text = buildYamlFromForm();
document.getElementById("f-preview").textContent = text;
return text;
}
function setBtnActive(id, on) {
const el = document.getElementById(id);
if (!el) return;
if (on) el.classList.add("active");
else el.classList.remove("active");
}
function showMode(mode) {
const yamlPanel = document.getElementById("panel-yaml");
const formPanel = document.getElementById("panel-form");
if (mode === "form") {
yamlPanel.style.display = "none";
formPanel.style.display = "block";
setBtnActive("mode-yaml", false);
setBtnActive("mode-form", true);
updateFormVisibility();
updatePreview();
} else {
yamlPanel.style.display = "block";
formPanel.style.display = "none";
setBtnActive("mode-yaml", true);
setBtnActive("mode-form", false);
}
}
function bindStepper(inputId, decId, incId, minV) {
const el = document.getElementById(inputId);
document.getElementById(decId).onclick = () => {
const v = Number(el.value || "0") - 1;
el.value = String(Math.max(minV, v));
updatePreview();
};
document.getElementById(incId).onclick = () => {
const v = Number(el.value || "0") + 1;
el.value = String(Math.max(minV, v));
updatePreview();
};
}
// Init defaults (match examples)
document.getElementById("f-code-path").value = "/private/common/code/verl/verl_repo";
document.getElementById("f-model-id").value = "Qwen/Qwen2.5-0.5B-Instruct";
document.getElementById("f-train-file").value = "/private/common/datasets/gsm8k/train.parquet";
document.getElementById("f-val-file").value = "/private/common/datasets/gsm8k/test.parquet";
document.getElementById("f-nnodes").value = "2";
document.getElementById("f-gpn").value = "4";
document.getElementById("f-epochs").value = "1";
document.getElementById("f-save-freq").value = "10";
document.getElementById("f-trainer-device").value = "cpu";
document.getElementById("f-adv-nnodes").value = "2";
document.getElementById("f-adv-gpn").value = "4";
document.getElementById("f-command").value = `PYTHONUNBUFFERED=1 \\\npython3 -m verl.trainer.main_ppo \\\n data.train_files=$HOME/common/datasets/gsm8k/train.parquet \\\n data.val_files=$HOME/common/datasets/gsm8k/test.parquet \\\n +ray_kwargs.ray_init.address=auto`;
bindStepper("f-nnodes", "f-nnodes-dec", "f-nnodes-inc", 1);
bindStepper("f-gpn", "f-gpn-dec", "f-gpn-inc", 0);
bindStepper("f-epochs", "f-epochs-dec", "f-epochs-inc", 1);
bindStepper("f-adv-nnodes", "f-adv-nnodes-dec", "f-adv-nnodes-inc", 1);
bindStepper("f-adv-gpn", "f-adv-gpn-dec", "f-adv-gpn-inc", 0);
document.getElementById("f-tpl").onchange = () => {
const tpl = document.getElementById("f-tpl").value;
if (tpl === "ppo") {
document.getElementById("f-train-file").value = "/private/common/datasets/gsm8k/train.parquet";
document.getElementById("f-val-file").value = "/private/common/datasets/gsm8k/test.parquet";
} else if (tpl === "grpo") {
document.getElementById("f-train-file").value = "/private/common/datasets/gsm8k/train.parquet";
document.getElementById("f-val-file").value = "/private/common/datasets/gsm8k/test.parquet";
} else if (tpl === "sft") {
document.getElementById("f-train-file").value = "/private/common/datasets/gsm8k_sft/train.parquet";
document.getElementById("f-val-file").value = "/private/common/datasets/gsm8k_sft/test.parquet";
} else if (tpl === "advanced") {
document.getElementById("f-adv-nnodes").value = "2";
document.getElementById("f-adv-gpn").value = "4";
document.getElementById("f-command").value = TPL_ADV.split("command: |")[1]?.trimStart() || document.getElementById("f-command").value;
} else if (tpl === "merge") {
document.getElementById("f-adv-nnodes").value = "1";
document.getElementById("f-adv-gpn").value = "0";
document.getElementById("f-command").value = TPL_MERGE.split("command: |")[1]?.trimStart() || document.getElementById("f-command").value;
}
updateFormVisibility();
updatePreview();
};
for (const id of ["f-code-path","f-model-id","f-train-file","f-val-file","f-nnodes","f-gpn","f-epochs","f-steps","f-save-freq","f-test-freq","f-trainer-device","f-adv-nnodes","f-adv-gpn","f-command"]) {
const el = document.getElementById(id);
if (el) el.oninput = () => updatePreview();
}
document.getElementById("mode-yaml").onclick = () => {
// When switching to YAML, sync from form if form is visible.
const formVisible = document.getElementById("panel-form").style.display !== "none";
if (formVisible) yamlEl.value = updatePreview();
showMode("yaml");
};
document.getElementById("mode-form").onclick = () => {
showMode("form");
};
showMode("yaml");
document.getElementById("submit").onclick = async () => {
msg.textContent = "Submitting...";
const body = yamlEl.value;
const body = (document.getElementById("panel-form").style.display !== "none") ? updatePreview() : yamlEl.value;
const resp = await apiFetch("/api/v2/tasks", { method: "POST", headers: {"Content-Type":"text/plain"}, body });
const text = await resp.text();
if (!resp.ok) { msg.textContent = "Error: " + resp.status + "\\n" + text; return; }
@ -437,6 +807,7 @@ document.getElementById("submit").onclick = async () => {
.replace("__TPL_GRPO__", tpl_grpo)
.replace("__TPL_SFT__", tpl_sft)
.replace("__TPL_ADV__", tpl_adv)
.replace("__TPL_MERGE__", tpl_merge)
)
return HTMLResponse(content=_page("New Task", "new", body, script))
@ -568,11 +939,11 @@ refresh();
</div>
<div style="height:14px"></div>
<pre id="out" class="muted">Loading...</pre>
<div id="msg" class="muted"></div>
</div>
""".strip()
script = """
const out = document.getElementById("out");
const msg = document.getElementById("msg");
document.getElementById("nav-ray-dashboard").href = curOriginWithPort(8265);
const u = document.getElementById("u");
const p = document.getElementById("p");
@ -585,7 +956,7 @@ document.getElementById("copy-p").onclick = async () => { await copyText(p.value
async function refresh() {
const resp = await apiFetch("/api/v2/me");
const text = await resp.text();
if (!resp.ok) { out.textContent = "Error: " + resp.status + "\\n" + text; return; }
if (!resp.ok) { msg.textContent = "Error: " + resp.status + "\\n" + text; return; }
const obj = JSON.parse(text);
u.value = (obj.user_id || "");
const cached = mvpSftpPasswordGet();
@ -594,17 +965,19 @@ async function refresh() {
sftpWeb.href = host + "/web/client";
sftpHost.textContent = (obj.sftp && obj.sftp.host) ? obj.sftp.host : window.location.hostname;
sftpPort.textContent = (obj.sftp && obj.sftp.port) ? String(obj.sftp.port) : "2022";
out.textContent = fmtJson(obj);
msg.textContent = "";
}
document.getElementById("reset-p").onclick = async () => {
p.value = "";
mvpSftpPasswordSet("");
msg.textContent = "Resetting...";
const resp = await apiFetch("/api/v2/me/sftp:reset_password", { method: "POST" });
const text = await resp.text();
if (!resp.ok) { out.textContent = "Error: " + resp.status + "\\n" + text; return; }
if (!resp.ok) { msg.textContent = "Error: " + resp.status + "\\n" + text; return; }
const obj = JSON.parse(text);
p.value = obj.password || "";
mvpSftpPasswordSet(p.value);
out.textContent = "SFTPGo password rotated.\\n\\n" + fmtJson(obj);
msg.textContent = "SFTPGo password rotated.";
};
refresh();
""".strip()

View File

@ -25,7 +25,7 @@ def test_expand_maps_home_to_private_users():
def test_validate_requires_verl_trainer_entry():
with pytest.raises(ValueError, match="verl\\.trainer"):
with pytest.raises(ValueError, match="-m verl\\."):
validate_advanced_command(user_id="alice", expanded_command="echo hi")
@ -71,4 +71,3 @@ def test_validate_rejects_reward_path_outside_user_code():
)
with pytest.raises(ValueError, match="reward path must be under"):
validate_advanced_command(user_id="alice", expanded_command=cmd)

View File

@ -93,3 +93,5 @@ def test_ui_new_task_contains_advanced_example_snippet(tmp_path, monkeypatch):
# workload is not needed for advanced in v3.5.
assert "# workload:" not in r.text
assert "actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu" in r.text
assert "Model merge example" in r.text
assert "verl.model_merger" in r.text