from __future__ import annotations import html import json from fastapi import FastAPI from fastapi.responses import HTMLResponse, RedirectResponse _BASE_CSS = """ :root { --bg:#0b1020; --panel:#111a33; --muted:#95a3c6; --fg:#e8eeff; --accent:#7aa2ff; --danger:#ff6b6b; --ok:#3ddc97; } * { box-sizing: border-box; } body { margin:0; font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial; background:var(--bg); color:var(--fg); } a { color:var(--accent); text-decoration:none; } .layout { display:flex; min-height:100vh; } .nav { width: 240px; padding:16px; background: linear-gradient(180deg, #0e1630, #0b1020); border-right: 1px solid rgba(255,255,255,0.06); } .brand { font-weight: 700; letter-spacing: .2px; margin-bottom: 12px; } .nav a { display:block; padding:10px 10px; border-radius:10px; color: var(--fg); opacity: .9; } .nav a.active { background: rgba(122,162,255,0.14); border: 1px solid rgba(122,162,255,0.22); } .nav a:hover { background: rgba(255,255,255,0.06); } .main { flex:1; padding: 20px 24px; } .card { background: rgba(255,255,255,0.04); border: 1px solid rgba(255,255,255,0.08); border-radius: 14px; padding: 16px; } .row { display:flex; gap: 12px; align-items:center; flex-wrap: wrap; } .muted { color: var(--muted); } .btn { border: 1px solid rgba(255,255,255,0.16); background: rgba(255,255,255,0.06); color: var(--fg); padding: 10px 12px; border-radius: 10px; cursor: pointer; } .btn:hover { background: rgba(255,255,255,0.10); } .btn.active { background: rgba(122,162,255,0.14); border-color: rgba(122,162,255,0.22); } .btn.danger { border-color: rgba(255,107,107,0.35); background: rgba(255,107,107,0.10); } .pill { display:inline-block; padding: 2px 10px; border-radius: 999px; border: 1px solid rgba(255,255,255,0.16); font-size: 12px; } .pill.ok { border-color: rgba(61,220,151,0.35); background: rgba(61,220,151,0.12); } .pill.bad { border-color: rgba(255,107,107,0.35); background: rgba(255,107,107,0.12); } textarea, input { width: 100%; color: var(--fg); background: rgba(255,255,255,0.06); border: 1px solid rgba(255,255,255,0.12); border-radius: 12px; padding: 10px 12px; outline: none; } select { color: var(--fg); background: rgba(255,255,255,0.06); border: 1px solid rgba(255,255,255,0.12); border-radius: 12px; padding: 10px 12px; outline: none; } select option { color: var(--fg); background: #0e1630; } button:disabled { opacity: .45; cursor: not-allowed; } pre { white-space: pre-wrap; word-break: break-word; } table { width:100%; border-collapse: collapse; } th, td { padding: 10px 8px; border-bottom: 1px solid rgba(255,255,255,0.08); text-align:left; } """.strip() _BASE_JS = """ function mvpTokenGet() { return (localStorage.getItem("mvp_token") || "").trim(); } function mvpTokenSet(v) { localStorage.setItem("mvp_token", (v || "").trim()); } function mvpSftpPasswordGet() { return (localStorage.getItem("mvp_sftp_password") || "").trim(); } function mvpSftpPasswordSet(v) { localStorage.setItem("mvp_sftp_password", (v || "").trim()); } async function apiFetch(path, opts) { opts = opts || {}; opts.headers = opts.headers || {}; const tok = mvpTokenGet(); if (tok) opts.headers["Authorization"] = "Bearer " + tok; return fetch(path, opts); } async function apiJson(path, opts) { const resp = await apiFetch(path, opts); const text = await resp.text(); if (!resp.ok) { const err = new Error("HTTP " + resp.status); err.status = resp.status; err.body = text; throw err; } return JSON.parse(text); } function fmtJson(obj) { try { return JSON.stringify(obj, null, 2); } catch (e) { return String(obj); } } function curOriginWithPort(port) { const proto = window.location.protocol; const host = window.location.hostname; return proto + "//" + host + ":" + port; } async function copyText(v) { if (!v) return false; try { await navigator.clipboard.writeText(v); return true; } catch (e) { // Fallback for non-secure contexts (http) or older browsers. try { const ta = document.createElement("textarea"); ta.value = v; ta.style.position = "fixed"; ta.style.opacity = "0"; document.body.appendChild(ta); ta.focus(); ta.select(); const ok = document.execCommand("copy"); document.body.removeChild(ta); return ok; } catch (e2) { return false; } } } document.addEventListener("DOMContentLoaded", () => { const el = document.getElementById("nav-ray-dashboard"); if (el) el.href = curOriginWithPort(8265); }); """.strip() def _nav(active: str) -> str: links = [ ("login", "/ui/login", "Login"), ("tasks", "/ui/tasks", "Tasks"), ("new", "/ui/tasks/new", "New Task"), ("data", "/ui/data", "Data"), ("admin", "/ui/admin", "Admin"), ("ray", "#", "Ray Dashboard"), ] items = [] for key, href, label in links: cls = "active" if key == active else "" extra = "" if key == "ray": extra = ' id="nav-ray-dashboard" target="_blank" rel="noopener"' items.append(f'{html.escape(label)}') return "\n".join(items) def _page(title: str, active: str, body: str, script: str = "") -> str: return f""" {html.escape(title)}
{body}
""" def register_ui_routes(app: FastAPI) -> None: @app.get("/ui") async def ui_root() -> RedirectResponse: return RedirectResponse(url="/ui/tasks") @app.get("/ui/login") async def ui_login() -> HTMLResponse: body = """

Login

Paste your API token (without the Bearer prefix).
Go to Tasks

User Info

Shown after login via /api/v2/me.
(not loaded)
""".strip() script = """ const tokEl = document.getElementById("tok"); const msg = document.getElementById("msg"); const me = document.getElementById("me"); tokEl.value = mvpTokenGet(); async function refreshMe() { me.textContent = "Loading..."; try { const obj = await apiJson("/api/v2/me"); me.textContent = fmtJson(obj); } catch (e) { me.textContent = "Error: " + (e.status || "") + "\\n" + (e.body || String(e)); } } document.getElementById("me-refresh").onclick = refreshMe; document.getElementById("save").onclick = () => { mvpTokenSet(tokEl.value); msg.textContent = "Saved."; refreshMe(); }; document.getElementById("clear").onclick = () => { mvpTokenSet(""); tokEl.value = ""; msg.textContent = "Cleared."; me.textContent = "(not loaded)"; }; if (mvpTokenGet()) refreshMe(); """.strip() return HTMLResponse(content=_page("Login", "login", body, script)) @app.get("/ui/tasks") async def ui_tasks() -> HTMLResponse: body = """

Tasks

New Task
Loading...
""".strip() script = """ const out = document.getElementById("out"); async function refresh() { out.textContent = "Loading..."; try { const q = await apiJson("/api/v2/queue"); const completedLimit = 25; const completedOffset = Number(localStorage.getItem("mvp_completed_offset") || "0") || 0; const done = await apiJson("/api/v2/tasks?limit=" + completedLimit + "&offset=" + completedOffset + "&states=SUCCEEDED,FAILED,CANCELED"); function pill(state) { const s = String(state || ""); if (s === "SUCCEEDED") return `${s}`; if (s === "FAILED") return `${s}`; if (s === "CANCELED") return `${s}`; if (s === "RUNNING") return `${s}`; if (s === "QUEUED" || s === "PENDING_RESOURCES" || s === "SUBMITTING" || s === "SUBMITTED") return `${s}`; return `${s}`; } function row(t) { const id = t.task_id; return ` ${id} ${t.workload} ${pill(t.state)} ${t.nnodes} x ${t.n_gpus_per_node} GPU ${t.updated_at || ""} `; } const running = (q.running || []).map(row).join(""); const pending = (q.pending || []).map(row).join(""); const doneRows = (done.tasks || []).map(row).join(""); const pageNo = Math.floor(completedOffset / completedLimit) + 1; const prevDisabled = completedOffset <= 0; const nextDisabled = !done.has_more; out.innerHTML = `
Tip: configure token in Login.

Running

${running || ""}
TaskWorkloadStateResourcesUpdated
(none)

Pending

${pending || ""}
TaskWorkloadStateResourcesUpdated
(none)

Completed

Page ${pageNo}
${doneRows || ""}
TaskWorkloadStateResourcesUpdated
(none)
`; const prevBtn = document.getElementById("done-prev"); const nextBtn = document.getElementById("done-next"); if (prevBtn) prevBtn.onclick = () => { const cur = Number(localStorage.getItem("mvp_completed_offset") || "0") || 0; const next = Math.max(0, cur - completedLimit); localStorage.setItem("mvp_completed_offset", String(next)); refresh(); }; if (nextBtn) nextBtn.onclick = () => { const cur = Number(localStorage.getItem("mvp_completed_offset") || "0") || 0; const next = cur + completedLimit; localStorage.setItem("mvp_completed_offset", String(next)); refresh(); }; } catch (e) { out.textContent = "Error: " + (e.status || "") + "\\n" + (e.body || String(e)); } } document.getElementById("refresh").onclick = refresh; refresh(); """.strip() return HTMLResponse(content=_page("Tasks", "tasks", body, script)) @app.get("/ui/tasks/new") async def ui_new_task() -> HTMLResponse: ppo = """# PPO TaskSpec (YAML) workload: ppo # 任务类型(必填):ppo|grpo|sft code_path: /private/common/code/verl/verl_repo # 代码路径(必填):v3.0 固定使用 common 下的 verl 快照(不支持用户自定义代码) model_id: Qwen/Qwen2.5-0.5B-Instruct # 基础模型(必填):HuggingFace 模型 ID 或 /private/... 本地模型路径 train_file: /private/common/datasets/gsm8k/train.parquet # 训练数据(必填):parquet 文件路径(支持 /private/common/datasets 或 /private/users//datasets) val_file: /private/common/datasets/gsm8k/test.parquet # 验证数据(必填):parquet 文件路径(VERL 侧会用来构建 val dataset,不能为 null) # nnodes: 2 # 训练节点数(可选,默认:2) # n_gpus_per_node: 4 # 每节点 GPU 数(可选,默认:4) # total_epochs: 1 # 总训练 epoch(可选,默认:1) # total_training_steps: null # 总训练 step(可选,默认:null;不传则让 VERL 按 epochs 和数据长度自动推导) # save_freq: 10 # checkpoint 保存频率(step)(可选,默认:10) # test_freq: null # 验证频率(step)(可选,默认:null;训练端会当成 -1=不验证) # submission_id: "" # Ray submission_id(可选,默认空;通常由服务自动生成,无需填写) """.strip() grpo = """# GRPO TaskSpec (YAML) workload: grpo # 任务类型(必填):ppo|grpo|sft(grpo 会自动启用对应的算法配置) code_path: /private/common/code/verl/verl_repo # 代码路径(必填):v3.0 固定使用 common 下的 verl 快照(不支持用户自定义代码) model_id: Qwen/Qwen2.5-0.5B-Instruct # 基础模型(必填):HuggingFace 模型 ID 或 /private/... 本地模型路径 train_file: /private/common/datasets/gsm8k/train.parquet # 训练数据(必填):parquet 文件路径(支持 /private/common/datasets 或 /private/users//datasets) val_file: /private/common/datasets/gsm8k/test.parquet # 验证数据(必填):parquet 文件路径(VERL 侧会用来构建 val dataset,不能为 null) # nnodes: 2 # 训练节点数(可选,默认:2) # n_gpus_per_node: 4 # 每节点 GPU 数(可选,默认:4) # total_epochs: 1 # 总训练 epoch(可选,默认:1) # total_training_steps: null # 总训练 step(可选,默认:null;不传则让 VERL 按 epochs 和数据长度自动推导) # save_freq: 10 # checkpoint 保存频率(step)(可选,默认:10) # test_freq: null # 验证频率(step)(可选,默认:null;训练端会当成 -1=不验证) # submission_id: "" # Ray submission_id(可选,默认空;通常由服务自动生成,无需填写) """.strip() sft = """# SFT TaskSpec (YAML) workload: sft # 任务类型(必填):ppo|grpo|sft code_path: /private/common/code/verl/verl_repo # 代码路径(必填):v3.0 固定使用 common 下的 verl 快照(不支持用户自定义代码) model_id: Qwen/Qwen2.5-0.5B-Instruct # 基础模型(必填):HuggingFace 模型 ID 或 /private/... 本地模型路径 train_file: /private/common/datasets/gsm8k_sft/train.parquet # 训练数据(必填):parquet 文件路径(支持 /private/common/datasets 或 /private/users//datasets) val_file: /private/common/datasets/gsm8k_sft/test.parquet # 验证数据(必填):parquet 文件路径(VERL 侧会用来构建 val dataset,不能为 null) # nnodes: 2 # 训练节点数(可选,默认:2;单机可设 1) # n_gpus_per_node: 4 # 每节点 GPU 数(可选,默认:4;单卡可设 1) # total_epochs: 1 # 总训练 epoch(可选,默认:1) # total_training_steps: null # 总训练 step(可选,默认:null;不传则让 VERL 按 epochs 和数据长度自动推导) # save_freq: 10 # checkpoint 保存频率(step)(可选,默认:10) # test_freq: null # 验证频率(step)(可选,默认:null;训练端会当成 -1=不验证) # trainer_device: cpu # 仅 SFT 生效:driver 侧 device(可选,默认:cpu) # submission_id: "" # Ray submission_id(可选,默认空;通常由服务自动生成,无需填写) """.strip() adv = """# Advanced TaskSpec (YAML) - v3.5 kind: advanced # 任务类型(必填):advanced(自定义 command) # 说明:v3.5 中 Advanced 任务不会按 ppo/grpo/sft 分类;平台统一按 "advanced" 做任务分类与 task_id 命名。 nnodes: 2 # 训练节点数(必填):用于平台队列调度与资源预检查 n_gpus_per_node: 4 # 每节点 GPU 数(必填):用于平台队列调度与资源预检查 # 自定义训练命令(必填):平台会做 $HOME 宏替换: # - $HOME -> /private/users/ # - $HOME/common/datasets -> /private/datasets(共享只读数据) # - $HOME/common/hf -> /private/hf(共享只读 HF cache) command: | # 注意:PPO 需要一些关键参数,否则 VERL 会在启动前 fail-fast(例如 actor 的 micro batch)。 PYTHONUNBUFFERED=1 \ python3 -m verl.trainer.main_ppo \ data.train_files=$HOME/common/datasets/gsm8k/train.parquet \ data.val_files=$HOME/common/datasets/gsm8k/test.parquet \ data.train_batch_size=256 \ data.max_prompt_length=512 \ data.max_response_length=512 \ actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B-Instruct \ actor_rollout_ref.actor.optim.lr=1e-6 \ actor_rollout_ref.actor.ppo_mini_batch_size=64 \ actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \ actor_rollout_ref.rollout.name=sglang \ actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=8 \ actor_rollout_ref.rollout.tensor_model_parallel_size=1 \ actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=4 \ critic.optim.lr=1e-5 \ critic.model.path=Qwen/Qwen2.5-0.5B-Instruct \ critic.ppo_micro_batch_size_per_gpu=4 \ algorithm.kl_ctrl.kl_coef=0.001 \ trainer.logger=console \ trainer.val_before_train=False \ trainer.nnodes=2 \ trainer.n_gpus_per_node=4 \ trainer.total_epochs=1 \ trainer.total_training_steps=10 \ trainer.save_freq=10 \ trainer.test_freq=-1 \ trainer.resume_mode=disable \ trainer.default_local_dir=checkpoints \ +ray_kwargs.ray_init.address=auto \ hydra.run.dir=logs/hydra # 可选:自定义 reward(方式 A:直接写在 command 里) # command 里增加如下 overrides: # custom_reward_function.path=$HOME/code/reward.py # custom_reward_function.name=compute_score """.strip() merge = """# Model Merge (YAML) - v3.5 (Advanced command) # 用途:将 VERL 训练产生的 FSDP 分片 checkpoint 合并为 HuggingFace 格式目录。 # # 你需要把 替换成真实值: # - submission id:在 Tasks 详情页里看到的 `ray_submission_id`(如 mvp2-...--a01) # - global_step:对应 checkpoints 下的 global_step_xxx 目录(如 global_step_10) # # 注意:这里不需要 GPU,建议把 n_gpus_per_node 设为 0,这样不受训练任务占用 GPU 的影响。 kind: advanced nnodes: 1 n_gpus_per_node: 0 command: | python3 -m verl.model_merger merge \ --backend fsdp \ --local_dir $HOME/jobs//checkpoints//actor \ --target_dir $HOME/jobs//checkpoints//actor/huggingface """.strip() body = f"""

New Task

Paste TaskSpec YAML and submit to API server. Basic tasks require code_path; Advanced tasks use kind: advanced with a custom command.
Back

""".strip() tpl_ppo = json.dumps(ppo) tpl_grpo = json.dumps(grpo) tpl_sft = json.dumps(sft) tpl_adv = json.dumps(adv) tpl_merge = json.dumps(merge) script = ( """ const msg = document.getElementById("msg"); const yamlEl = document.getElementById("yaml"); const TPL_PPO = __TPL_PPO__; const TPL_GRPO = __TPL_GRPO__; const TPL_SFT = __TPL_SFT__; const TPL_ADV = __TPL_ADV__; const TPL_MERGE = __TPL_MERGE__; document.getElementById("tpl-ppo").onclick = () => { yamlEl.value = TPL_PPO; msg.textContent = ""; }; document.getElementById("tpl-grpo").onclick = () => { yamlEl.value = TPL_GRPO; msg.textContent = ""; }; document.getElementById("tpl-sft").onclick = () => { yamlEl.value = TPL_SFT; msg.textContent = ""; }; document.getElementById("tpl-adv").onclick = () => { yamlEl.value = TPL_ADV; msg.textContent = ""; }; document.getElementById("tpl-merge").onclick = () => { yamlEl.value = TPL_MERGE; msg.textContent = ""; }; function yamlQuote(s) { s = String(s ?? ""); return '"' + s.replaceAll('\\\\', '\\\\\\\\').replaceAll('"', '\\\\"') + '"'; } function buildYamlFromForm() { const tpl = document.getElementById("f-tpl").value; if (tpl === "advanced" || tpl === "merge") { const nn = Number(document.getElementById("f-adv-nnodes").value || "1"); const gpn = Number(document.getElementById("f-adv-gpn").value || "0"); const cmd = String(document.getElementById("f-command").value || "").replace(/\\r/g, ""); const lines = cmd.split("\\n"); const indented = lines.map(l => " " + l).join("\\n"); return `kind: advanced\\n` + `nnodes: ${nn}\\n` + `n_gpus_per_node: ${gpn}\\n\\n` + `command: |\\n${indented}\\n`; } const workload = tpl; const codePath = document.getElementById("f-code-path").value || ""; const modelId = document.getElementById("f-model-id").value || ""; const trainFile = document.getElementById("f-train-file").value || ""; const valFile = document.getElementById("f-val-file").value || ""; const nn = Number(document.getElementById("f-nnodes").value || "2"); const gpn = Number(document.getElementById("f-gpn").value || "4"); const epochs = Number(document.getElementById("f-epochs").value || "1"); const stepsRaw = String(document.getElementById("f-steps").value || "").trim(); const saveFreq = Number(document.getElementById("f-save-freq").value || "10"); const testFreqRaw = String(document.getElementById("f-test-freq").value || "").trim(); let y = ""; y += `workload: ${workload}\\n`; y += `code_path: ${yamlQuote(codePath)}\\n`; y += `model_id: ${yamlQuote(modelId)}\\n`; y += `train_file: ${yamlQuote(trainFile)}\\n`; y += `val_file: ${yamlQuote(valFile)}\\n`; y += `nnodes: ${nn}\\n`; y += `n_gpus_per_node: ${gpn}\\n`; y += `total_epochs: ${epochs}\\n`; if (stepsRaw) y += `total_training_steps: ${Number(stepsRaw)}\\n`; y += `save_freq: ${saveFreq}\\n`; if (testFreqRaw) y += `test_freq: ${Number(testFreqRaw)}\\n`; if (tpl === "sft") { const dev = document.getElementById("f-trainer-device").value || "cpu"; y += `trainer_device: ${dev}\\n`; } return y; } function updateFormVisibility() { const tpl = document.getElementById("f-tpl").value; const basic = document.getElementById("f-basic"); const adv = document.getElementById("f-adv"); const sftWrap = document.getElementById("f-sft-device-wrap"); if (tpl === "advanced" || tpl === "merge") { basic.style.display = "none"; adv.style.display = "block"; } else { basic.style.display = "block"; adv.style.display = "none"; } if (tpl === "sft") { sftWrap.style.display = "block"; } else { sftWrap.style.display = "none"; } } function updatePreview() { const text = buildYamlFromForm(); document.getElementById("f-preview").textContent = text; return text; } function setBtnActive(id, on) { const el = document.getElementById(id); if (!el) return; if (on) el.classList.add("active"); else el.classList.remove("active"); } function showMode(mode) { const yamlPanel = document.getElementById("panel-yaml"); const formPanel = document.getElementById("panel-form"); if (mode === "form") { yamlPanel.style.display = "none"; formPanel.style.display = "block"; setBtnActive("mode-yaml", false); setBtnActive("mode-form", true); updateFormVisibility(); updatePreview(); } else { yamlPanel.style.display = "block"; formPanel.style.display = "none"; setBtnActive("mode-yaml", true); setBtnActive("mode-form", false); } } function bindStepper(inputId, decId, incId, minV) { const el = document.getElementById(inputId); document.getElementById(decId).onclick = () => { const v = Number(el.value || "0") - 1; el.value = String(Math.max(minV, v)); updatePreview(); }; document.getElementById(incId).onclick = () => { const v = Number(el.value || "0") + 1; el.value = String(Math.max(minV, v)); updatePreview(); }; } // Init defaults (match examples) document.getElementById("f-code-path").value = "/private/common/code/verl/verl_repo"; document.getElementById("f-model-id").value = "Qwen/Qwen2.5-0.5B-Instruct"; document.getElementById("f-train-file").value = "/private/common/datasets/gsm8k/train.parquet"; document.getElementById("f-val-file").value = "/private/common/datasets/gsm8k/test.parquet"; document.getElementById("f-nnodes").value = "2"; document.getElementById("f-gpn").value = "4"; document.getElementById("f-epochs").value = "1"; document.getElementById("f-save-freq").value = "10"; document.getElementById("f-trainer-device").value = "cpu"; document.getElementById("f-adv-nnodes").value = "2"; document.getElementById("f-adv-gpn").value = "4"; document.getElementById("f-command").value = `PYTHONUNBUFFERED=1 \\\npython3 -m verl.trainer.main_ppo \\\n data.train_files=$HOME/common/datasets/gsm8k/train.parquet \\\n data.val_files=$HOME/common/datasets/gsm8k/test.parquet \\\n +ray_kwargs.ray_init.address=auto`; bindStepper("f-nnodes", "f-nnodes-dec", "f-nnodes-inc", 1); bindStepper("f-gpn", "f-gpn-dec", "f-gpn-inc", 0); bindStepper("f-epochs", "f-epochs-dec", "f-epochs-inc", 1); bindStepper("f-adv-nnodes", "f-adv-nnodes-dec", "f-adv-nnodes-inc", 1); bindStepper("f-adv-gpn", "f-adv-gpn-dec", "f-adv-gpn-inc", 0); document.getElementById("f-tpl").onchange = () => { const tpl = document.getElementById("f-tpl").value; if (tpl === "ppo") { document.getElementById("f-train-file").value = "/private/common/datasets/gsm8k/train.parquet"; document.getElementById("f-val-file").value = "/private/common/datasets/gsm8k/test.parquet"; } else if (tpl === "grpo") { document.getElementById("f-train-file").value = "/private/common/datasets/gsm8k/train.parquet"; document.getElementById("f-val-file").value = "/private/common/datasets/gsm8k/test.parquet"; } else if (tpl === "sft") { document.getElementById("f-train-file").value = "/private/common/datasets/gsm8k_sft/train.parquet"; document.getElementById("f-val-file").value = "/private/common/datasets/gsm8k_sft/test.parquet"; } else if (tpl === "advanced") { document.getElementById("f-adv-nnodes").value = "2"; document.getElementById("f-adv-gpn").value = "4"; document.getElementById("f-command").value = TPL_ADV.split("command: |")[1]?.trimStart() || document.getElementById("f-command").value; } else if (tpl === "merge") { document.getElementById("f-adv-nnodes").value = "1"; document.getElementById("f-adv-gpn").value = "0"; document.getElementById("f-command").value = TPL_MERGE.split("command: |")[1]?.trimStart() || document.getElementById("f-command").value; } updateFormVisibility(); updatePreview(); }; for (const id of ["f-code-path","f-model-id","f-train-file","f-val-file","f-nnodes","f-gpn","f-epochs","f-steps","f-save-freq","f-test-freq","f-trainer-device","f-adv-nnodes","f-adv-gpn","f-command"]) { const el = document.getElementById(id); if (el) el.oninput = () => updatePreview(); } document.getElementById("mode-yaml").onclick = () => { // When switching to YAML, sync from form if form is visible. const formVisible = document.getElementById("panel-form").style.display !== "none"; if (formVisible) yamlEl.value = updatePreview(); showMode("yaml"); }; document.getElementById("mode-form").onclick = () => { showMode("form"); }; showMode("yaml"); document.getElementById("submit").onclick = async () => { msg.textContent = "Submitting..."; const body = (document.getElementById("panel-form").style.display !== "none") ? updatePreview() : yamlEl.value; const resp = await apiFetch("/api/v2/tasks", { method: "POST", headers: {"Content-Type":"text/plain"}, body }); const text = await resp.text(); if (!resp.ok) { msg.textContent = "Error: " + resp.status + "\\n" + text; return; } const obj = JSON.parse(text); msg.textContent = "OK: " + fmtJson(obj); if (obj.task_id) window.location.href = "/ui/tasks/" + obj.task_id; }; """.strip() .replace("__TPL_PPO__", tpl_ppo) .replace("__TPL_GRPO__", tpl_grpo) .replace("__TPL_SFT__", tpl_sft) .replace("__TPL_ADV__", tpl_adv) .replace("__TPL_MERGE__", tpl_merge) ) return HTMLResponse(content=_page("New Task", "new", body, script)) @app.get("/ui/tasks/{task_id}") async def ui_task_detail(task_id: str) -> HTMLResponse: safe_id = html.escape(task_id) body = f"""

Task: {safe_id}

Logs Back
Loading...

TaskSpec (YAML)

Resolved TaskSpec (includes default values; submission_id reflects latest attempt when available).
Loading...
""".strip() script = f""" document.getElementById("nav-ray-dashboard").href = curOriginWithPort(8265); const out = document.getElementById("out"); const spec = document.getElementById("spec"); async function refresh() {{ out.textContent = "Loading..."; spec.textContent = "Loading..."; const resp = await apiFetch("/api/v2/tasks/{task_id}"); const text = await resp.text(); if (!resp.ok) {{ out.textContent = "Error: " + resp.status + "\\n" + text; return; }} out.textContent = fmtJson(JSON.parse(text)); const resp2 = await apiFetch("/api/v2/tasks/{task_id}/spec"); const text2 = await resp2.text(); spec.textContent = resp2.ok ? text2 : ("Error: " + resp2.status + "\\n" + text2); }} document.getElementById("refresh").onclick = refresh; document.getElementById("cancel").onclick = async () => {{ if (!confirm("Cancel this task?")) return; const resp = await apiFetch("/api/v2/tasks/{task_id}:cancel", {{ method: "POST" }}); const text = await resp.text(); out.textContent = (resp.ok ? "Canceled.\\n" : "Error: " + resp.status + "\\n") + text; setTimeout(refresh, 800); }}; refresh(); """.strip() return HTMLResponse(content=_page(f"Task {task_id}", "tasks", body, script)) @app.get("/ui/tasks/{task_id}/logs") async def ui_task_logs(task_id: str) -> HTMLResponse: safe_id = html.escape(task_id) body = f"""

Logs: {safe_id}

Back
Loading...
""".strip() script = f""" document.getElementById("nav-ray-dashboard").href = curOriginWithPort(8265); const out = document.getElementById("out"); let timer = null; async function refresh() {{ const resp = await apiFetch("/api/v2/tasks/{task_id}/logs?tail=4000"); const text = await resp.text(); out.textContent = resp.ok ? text : ("Error: " + resp.status + "\\n" + text); }} document.getElementById("refresh").onclick = refresh; document.getElementById("auto").onchange = (e) => {{ if (e.target.checked) {{ timer = setInterval(refresh, 2000); }} else {{ if (timer) clearInterval(timer); timer = null; }} }}; refresh(); """.strip() return HTMLResponse(content=_page(f"Logs {task_id}", "tasks", body, script)) @app.get("/ui/data") async def ui_data() -> HTMLResponse: body = """

Data

User files live under your home directory. Keep long-term artifacts in models/ or datasets/.
Username
SFTPGo password
You can also use an SFTP client (e.g. FileZilla) with the same username/password. Host: , Port: .
""".strip() script = """ const msg = document.getElementById("msg"); document.getElementById("nav-ray-dashboard").href = curOriginWithPort(8265); const u = document.getElementById("u"); const p = document.getElementById("p"); const sftpWeb = document.getElementById("sftp-web"); const sftpHost = document.getElementById("sftp-host"); const sftpPort = document.getElementById("sftp-port"); document.getElementById("copy-u").onclick = async () => { await copyText(u.value || ""); }; document.getElementById("copy-p").onclick = async () => { await copyText(p.value || ""); }; async function refresh() { const resp = await apiFetch("/api/v2/me"); const text = await resp.text(); if (!resp.ok) { msg.textContent = "Error: " + resp.status + "\\n" + text; return; } const obj = JSON.parse(text); u.value = (obj.user_id || ""); const cached = mvpSftpPasswordGet(); if (cached) p.value = cached; const host = curOriginWithPort(8081); sftpWeb.href = host + "/web/client"; sftpHost.textContent = (obj.sftp && obj.sftp.host) ? obj.sftp.host : window.location.hostname; sftpPort.textContent = (obj.sftp && obj.sftp.port) ? String(obj.sftp.port) : "2022"; msg.textContent = ""; } document.getElementById("reset-p").onclick = async () => { p.value = ""; mvpSftpPasswordSet(""); msg.textContent = "Resetting..."; const resp = await apiFetch("/api/v2/me/sftp:reset_password", { method: "POST" }); const text = await resp.text(); if (!resp.ok) { msg.textContent = "Error: " + resp.status + "\\n" + text; return; } const obj = JSON.parse(text); p.value = obj.password || ""; mvpSftpPasswordSet(p.value); msg.textContent = "SFTPGo password rotated."; }; refresh(); """.strip() return HTMLResponse(content=_page("Data", "data", body, script)) @app.get("/ui/admin") async def ui_admin() -> HTMLResponse: body = """

Admin

This page requires the admin token (set it in Login).

Create user



  
Loading...
""".strip() script = """ const out = document.getElementById("out"); const createMsg = document.getElementById("create-msg"); const userIdEl = document.getElementById("new-user-id"); const displayNameEl = document.getElementById("new-display-name"); function esc(s) { s = String(s || ""); return s.replaceAll("&","&").replaceAll("<","<").replaceAll(">",">"); } async function refresh() { out.textContent = "Loading..."; try { const obj = await apiJson("/api/v2/users?limit=200"); const users = (obj.users || []); function row(u) { const uid = u.user_id; const tok = u.token || ""; const tokShort = tok ? (tok.length > 18 ? (tok.slice(0, 18) + "…") : tok) : ""; const created = u.created_at || ""; const updated = u.updated_at || ""; const tCreated = u.token_created_at || ""; const tUsed = u.token_last_used_at || ""; return ` ${esc(uid)} ${esc(created)} ${esc(updated)}
${esc(tokShort)}
token_created_at: ${esc(tCreated)}; last_used_at: ${esc(tUsed)}
`; } out.innerHTML = ` ${users.map(row).join("") || ""}
UserCreatedUpdatedToken
(none)
`; for (const btn of out.querySelectorAll("button[data-copy]")) { btn.onclick = async () => { await copyText(btn.getAttribute("data-copy") || ""); }; } for (const btn of out.querySelectorAll("button[data-issue]")) { btn.onclick = async () => { const uid = btn.getAttribute("data-issue"); if (!uid) return; try { const r = await apiJson("/api/v2/users/" + encodeURIComponent(uid) + "/tokens", { method: "POST" }); createMsg.textContent = "Issued token:\\n" + fmtJson(r); await refresh(); } catch (e) { createMsg.textContent = "Error issuing token: " + (e.status || "") + "\\n" + (e.body || String(e)); } }; } } catch (e) { out.textContent = "Error: " + (e.status || "") + "\\n" + (e.body || String(e)); } } document.getElementById("refresh").onclick = refresh; document.getElementById("create-user").onclick = async () => { createMsg.textContent = "Creating..."; const user_id = (userIdEl.value || "").trim(); const display_name = (displayNameEl.value || "").trim(); if (!user_id) { createMsg.textContent = "user_id is required"; return; } const payload = { user_id: user_id }; if (display_name) payload.display_name = display_name; try { const r = await apiJson("/api/v2/users", { method: "POST", headers: {"Content-Type":"application/json"}, body: JSON.stringify(payload) }); createMsg.textContent = "Created:\\n" + fmtJson(r); userIdEl.value = ""; displayNameEl.value = ""; await refresh(); } catch (e) { createMsg.textContent = "Error: " + (e.status || "") + "\\n" + (e.body || String(e)); } }; refresh(); """.strip() return HTMLResponse(content=_page("Admin", "admin", body, script))