from __future__ import annotations
import html
import json
from fastapi import FastAPI
from fastapi.responses import HTMLResponse, RedirectResponse
_BASE_CSS = """
:root { --bg:#0b1020; --panel:#111a33; --muted:#95a3c6; --fg:#e8eeff; --accent:#7aa2ff; --danger:#ff6b6b; --ok:#3ddc97; }
* { box-sizing: border-box; }
body { margin:0; font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial; background:var(--bg); color:var(--fg); }
a { color:var(--accent); text-decoration:none; }
.layout { display:flex; min-height:100vh; }
.nav { width: 240px; padding:16px; background: linear-gradient(180deg, #0e1630, #0b1020); border-right: 1px solid rgba(255,255,255,0.06); }
.brand { font-weight: 700; letter-spacing: .2px; margin-bottom: 12px; }
.nav a { display:block; padding:10px 10px; border-radius:10px; color: var(--fg); opacity: .9; }
.nav a.active { background: rgba(122,162,255,0.14); border: 1px solid rgba(122,162,255,0.22); }
.nav a:hover { background: rgba(255,255,255,0.06); }
.main { flex:1; padding: 20px 24px; }
.card { background: rgba(255,255,255,0.04); border: 1px solid rgba(255,255,255,0.08); border-radius: 14px; padding: 16px; }
.row { display:flex; gap: 12px; align-items:center; flex-wrap: wrap; }
.muted { color: var(--muted); }
.btn { border: 1px solid rgba(255,255,255,0.16); background: rgba(255,255,255,0.06); color: var(--fg); padding: 10px 12px; border-radius: 10px; cursor: pointer; }
.btn:hover { background: rgba(255,255,255,0.10); }
.btn.active { background: rgba(122,162,255,0.14); border-color: rgba(122,162,255,0.22); }
.btn.danger { border-color: rgba(255,107,107,0.35); background: rgba(255,107,107,0.10); }
.pill { display:inline-block; padding: 2px 10px; border-radius: 999px; border: 1px solid rgba(255,255,255,0.16); font-size: 12px; }
.pill.ok { border-color: rgba(61,220,151,0.35); background: rgba(61,220,151,0.12); }
.pill.bad { border-color: rgba(255,107,107,0.35); background: rgba(255,107,107,0.12); }
textarea, input { width: 100%; color: var(--fg); background: rgba(255,255,255,0.06); border: 1px solid rgba(255,255,255,0.12); border-radius: 12px; padding: 10px 12px; outline: none; }
select { color: var(--fg); background: rgba(255,255,255,0.06); border: 1px solid rgba(255,255,255,0.12); border-radius: 12px; padding: 10px 12px; outline: none; }
select option { color: var(--fg); background: #0e1630; }
button:disabled { opacity: .45; cursor: not-allowed; }
pre { white-space: pre-wrap; word-break: break-word; }
table { width:100%; border-collapse: collapse; }
th, td { padding: 10px 8px; border-bottom: 1px solid rgba(255,255,255,0.08); text-align:left; }
""".strip()
_BASE_JS = """
function mvpTokenGet() {
return (localStorage.getItem("mvp_token") || "").trim();
}
function mvpTokenSet(v) {
localStorage.setItem("mvp_token", (v || "").trim());
}
function mvpSftpPasswordGet() {
return (localStorage.getItem("mvp_sftp_password") || "").trim();
}
function mvpSftpPasswordSet(v) {
localStorage.setItem("mvp_sftp_password", (v || "").trim());
}
async function apiFetch(path, opts) {
opts = opts || {};
opts.headers = opts.headers || {};
const tok = mvpTokenGet();
if (tok) opts.headers["Authorization"] = "Bearer " + tok;
return fetch(path, opts);
}
async function apiJson(path, opts) {
const resp = await apiFetch(path, opts);
const text = await resp.text();
if (!resp.ok) {
const err = new Error("HTTP " + resp.status);
err.status = resp.status;
err.body = text;
throw err;
}
return JSON.parse(text);
}
function fmtJson(obj) {
try { return JSON.stringify(obj, null, 2); } catch (e) { return String(obj); }
}
function curOriginWithPort(port) {
const proto = window.location.protocol;
const host = window.location.hostname;
return proto + "//" + host + ":" + port;
}
async function copyText(v) {
if (!v) return false;
try {
await navigator.clipboard.writeText(v);
return true;
} catch (e) {
// Fallback for non-secure contexts (http) or older browsers.
try {
const ta = document.createElement("textarea");
ta.value = v;
ta.style.position = "fixed";
ta.style.opacity = "0";
document.body.appendChild(ta);
ta.focus();
ta.select();
const ok = document.execCommand("copy");
document.body.removeChild(ta);
return ok;
} catch (e2) {
return false;
}
}
}
document.addEventListener("DOMContentLoaded", () => {
const el = document.getElementById("nav-ray-dashboard");
if (el) el.href = curOriginWithPort(8265);
});
""".strip()
def _nav(active: str) -> str:
links = [
("login", "/ui/login", "Login"),
("tasks", "/ui/tasks", "Tasks"),
("new", "/ui/tasks/new", "New Task"),
("data", "/ui/data", "Data"),
("admin", "/ui/admin", "Admin"),
("ray", "#", "Ray Dashboard"),
]
items = []
for key, href, label in links:
cls = "active" if key == active else ""
extra = ""
if key == "ray":
extra = ' id="nav-ray-dashboard" target="_blank" rel="noopener"'
items.append(f'{html.escape(label)}')
return "\n".join(items)
def _page(title: str, active: str, body: str, script: str = "") -> str:
return f"""
{html.escape(title)}
{body}
"""
def register_ui_routes(app: FastAPI) -> None:
@app.get("/ui")
async def ui_root() -> RedirectResponse:
return RedirectResponse(url="/ui/tasks")
@app.get("/ui/login")
async def ui_login() -> HTMLResponse:
body = """
Login
Paste your API token (without the Bearer prefix).
User Info
Shown after login via /api/v2/me.
(not loaded)
""".strip()
script = """
const tokEl = document.getElementById("tok");
const msg = document.getElementById("msg");
const me = document.getElementById("me");
tokEl.value = mvpTokenGet();
async function refreshMe() {
me.textContent = "Loading...";
try {
const obj = await apiJson("/api/v2/me");
me.textContent = fmtJson(obj);
} catch (e) {
me.textContent = "Error: " + (e.status || "") + "\\n" + (e.body || String(e));
}
}
document.getElementById("me-refresh").onclick = refreshMe;
document.getElementById("save").onclick = () => { mvpTokenSet(tokEl.value); msg.textContent = "Saved."; refreshMe(); };
document.getElementById("clear").onclick = () => { mvpTokenSet(""); tokEl.value = ""; msg.textContent = "Cleared."; me.textContent = "(not loaded)"; };
if (mvpTokenGet()) refreshMe();
""".strip()
return HTMLResponse(content=_page("Login", "login", body, script))
@app.get("/ui/tasks")
async def ui_tasks() -> HTMLResponse:
body = """
Tasks
""".strip()
script = """
const out = document.getElementById("out");
async function refresh() {
out.textContent = "Loading...";
try {
const q = await apiJson("/api/v2/queue");
const completedLimit = 25;
const completedOffset = Number(localStorage.getItem("mvp_completed_offset") || "0") || 0;
const done = await apiJson("/api/v2/tasks?limit=" + completedLimit + "&offset=" + completedOffset + "&states=SUCCEEDED,FAILED,CANCELED");
function pill(state) {
const s = String(state || "");
if (s === "SUCCEEDED") return `${s}`;
if (s === "FAILED") return `${s}`;
if (s === "CANCELED") return `${s}`;
if (s === "RUNNING") return `${s}`;
if (s === "QUEUED" || s === "PENDING_RESOURCES" || s === "SUBMITTING" || s === "SUBMITTED") return `${s}`;
return `${s}`;
}
function row(t) {
const id = t.task_id;
return `
| ${id} |
${t.workload} |
${pill(t.state)} |
${t.nnodes} x ${t.n_gpus_per_node} GPU |
${t.updated_at || ""} |
`;
}
const running = (q.running || []).map(row).join("");
const pending = (q.pending || []).map(row).join("");
const doneRows = (done.tasks || []).map(row).join("");
const pageNo = Math.floor(completedOffset / completedLimit) + 1;
const prevDisabled = completedOffset <= 0;
const nextDisabled = !done.has_more;
out.innerHTML = `
Tip: configure token in
Login.
Running
| Task | Workload | State | Resources | Updated |
${running || "| (none) |
"}
Pending
| Task | Workload | State | Resources | Updated |
${pending || "| (none) |
"}
Completed
| Task | Workload | State | Resources | Updated |
${doneRows || "| (none) |
"}
`;
const prevBtn = document.getElementById("done-prev");
const nextBtn = document.getElementById("done-next");
if (prevBtn) prevBtn.onclick = () => {
const cur = Number(localStorage.getItem("mvp_completed_offset") || "0") || 0;
const next = Math.max(0, cur - completedLimit);
localStorage.setItem("mvp_completed_offset", String(next));
refresh();
};
if (nextBtn) nextBtn.onclick = () => {
const cur = Number(localStorage.getItem("mvp_completed_offset") || "0") || 0;
const next = cur + completedLimit;
localStorage.setItem("mvp_completed_offset", String(next));
refresh();
};
} catch (e) {
out.textContent = "Error: " + (e.status || "") + "\\n" + (e.body || String(e));
}
}
document.getElementById("refresh").onclick = refresh;
refresh();
""".strip()
return HTMLResponse(content=_page("Tasks", "tasks", body, script))
@app.get("/ui/tasks/new")
async def ui_new_task() -> HTMLResponse:
ppo = """# PPO TaskSpec (YAML)
workload: ppo # 任务类型(必填):ppo|grpo|sft
code_path: /private/common/code/verl/verl_repo # 代码路径(必填):v3.0 固定使用 common 下的 verl 快照(不支持用户自定义代码)
model_id: Qwen/Qwen2.5-0.5B-Instruct # 基础模型(必填):HuggingFace 模型 ID 或 /private/... 本地模型路径
train_file: /private/common/datasets/gsm8k/train.parquet # 训练数据(必填):parquet 文件路径(支持 /private/common/datasets 或 /private/users//datasets)
val_file: /private/common/datasets/gsm8k/test.parquet # 验证数据(必填):parquet 文件路径(VERL 侧会用来构建 val dataset,不能为 null)
# nnodes: 2 # 训练节点数(可选,默认:2)
# n_gpus_per_node: 4 # 每节点 GPU 数(可选,默认:4)
# total_epochs: 1 # 总训练 epoch(可选,默认:1)
# total_training_steps: null # 总训练 step(可选,默认:null;不传则让 VERL 按 epochs 和数据长度自动推导)
# save_freq: 10 # checkpoint 保存频率(step)(可选,默认:10)
# test_freq: null # 验证频率(step)(可选,默认:null;训练端会当成 -1=不验证)
# submission_id: "" # Ray submission_id(可选,默认空;通常由服务自动生成,无需填写)
""".strip()
grpo = """# GRPO TaskSpec (YAML)
workload: grpo # 任务类型(必填):ppo|grpo|sft(grpo 会自动启用对应的算法配置)
code_path: /private/common/code/verl/verl_repo # 代码路径(必填):v3.0 固定使用 common 下的 verl 快照(不支持用户自定义代码)
model_id: Qwen/Qwen2.5-0.5B-Instruct # 基础模型(必填):HuggingFace 模型 ID 或 /private/... 本地模型路径
train_file: /private/common/datasets/gsm8k/train.parquet # 训练数据(必填):parquet 文件路径(支持 /private/common/datasets 或 /private/users//datasets)
val_file: /private/common/datasets/gsm8k/test.parquet # 验证数据(必填):parquet 文件路径(VERL 侧会用来构建 val dataset,不能为 null)
# nnodes: 2 # 训练节点数(可选,默认:2)
# n_gpus_per_node: 4 # 每节点 GPU 数(可选,默认:4)
# total_epochs: 1 # 总训练 epoch(可选,默认:1)
# total_training_steps: null # 总训练 step(可选,默认:null;不传则让 VERL 按 epochs 和数据长度自动推导)
# save_freq: 10 # checkpoint 保存频率(step)(可选,默认:10)
# test_freq: null # 验证频率(step)(可选,默认:null;训练端会当成 -1=不验证)
# submission_id: "" # Ray submission_id(可选,默认空;通常由服务自动生成,无需填写)
""".strip()
sft = """# SFT TaskSpec (YAML)
workload: sft # 任务类型(必填):ppo|grpo|sft
code_path: /private/common/code/verl/verl_repo # 代码路径(必填):v3.0 固定使用 common 下的 verl 快照(不支持用户自定义代码)
model_id: Qwen/Qwen2.5-0.5B-Instruct # 基础模型(必填):HuggingFace 模型 ID 或 /private/... 本地模型路径
train_file: /private/common/datasets/gsm8k_sft/train.parquet # 训练数据(必填):parquet 文件路径(支持 /private/common/datasets 或 /private/users//datasets)
val_file: /private/common/datasets/gsm8k_sft/test.parquet # 验证数据(必填):parquet 文件路径(VERL 侧会用来构建 val dataset,不能为 null)
# nnodes: 2 # 训练节点数(可选,默认:2;单机可设 1)
# n_gpus_per_node: 4 # 每节点 GPU 数(可选,默认:4;单卡可设 1)
# total_epochs: 1 # 总训练 epoch(可选,默认:1)
# total_training_steps: null # 总训练 step(可选,默认:null;不传则让 VERL 按 epochs 和数据长度自动推导)
# save_freq: 10 # checkpoint 保存频率(step)(可选,默认:10)
# test_freq: null # 验证频率(step)(可选,默认:null;训练端会当成 -1=不验证)
# trainer_device: cpu # 仅 SFT 生效:driver 侧 device(可选,默认:cpu)
# submission_id: "" # Ray submission_id(可选,默认空;通常由服务自动生成,无需填写)
""".strip()
adv = """# Advanced TaskSpec (YAML) - v3.5
kind: advanced # 任务类型(必填):advanced(自定义 command)
# 说明:v3.5 中 Advanced 任务不会按 ppo/grpo/sft 分类;平台统一按 "advanced" 做任务分类与 task_id 命名。
nnodes: 2 # 训练节点数(必填):用于平台队列调度与资源预检查
n_gpus_per_node: 4 # 每节点 GPU 数(必填):用于平台队列调度与资源预检查
# 自定义训练命令(必填):平台会做 $HOME 宏替换:
# - $HOME -> /private/users/
# - $HOME/common/datasets -> /private/datasets(共享只读数据)
# - $HOME/common/hf -> /private/hf(共享只读 HF cache)
command: |
# 注意:PPO 需要一些关键参数,否则 VERL 会在启动前 fail-fast(例如 actor 的 micro batch)。
PYTHONUNBUFFERED=1 \
python3 -m verl.trainer.main_ppo \
data.train_files=$HOME/common/datasets/gsm8k/train.parquet \
data.val_files=$HOME/common/datasets/gsm8k/test.parquet \
data.train_batch_size=256 \
data.max_prompt_length=512 \
data.max_response_length=512 \
actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B-Instruct \
actor_rollout_ref.actor.optim.lr=1e-6 \
actor_rollout_ref.actor.ppo_mini_batch_size=64 \
actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \
actor_rollout_ref.rollout.name=sglang \
actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=8 \
actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=4 \
critic.optim.lr=1e-5 \
critic.model.path=Qwen/Qwen2.5-0.5B-Instruct \
critic.ppo_micro_batch_size_per_gpu=4 \
algorithm.kl_ctrl.kl_coef=0.001 \
trainer.logger=console \
trainer.val_before_train=False \
trainer.nnodes=2 \
trainer.n_gpus_per_node=4 \
trainer.total_epochs=1 \
trainer.total_training_steps=10 \
trainer.save_freq=10 \
trainer.test_freq=-1 \
trainer.resume_mode=disable \
trainer.default_local_dir=checkpoints \
+ray_kwargs.ray_init.address=auto \
hydra.run.dir=logs/hydra
# 可选:自定义 reward(方式 A:直接写在 command 里)
# command 里增加如下 overrides:
# custom_reward_function.path=$HOME/code/reward.py
# custom_reward_function.name=compute_score
""".strip()
merge = """# Model Merge (YAML) - v3.5 (Advanced command)
# 用途:将 VERL 训练产生的 FSDP 分片 checkpoint 合并为 HuggingFace 格式目录。
#
# 你需要把 和 替换成真实值:
# - submission id:在 Tasks 详情页里看到的 `ray_submission_id`(如 mvp2-...--a01)
# - global_step:对应 checkpoints 下的 global_step_xxx 目录(如 global_step_10)
#
# 注意:这里不需要 GPU,建议把 n_gpus_per_node 设为 0,这样不受训练任务占用 GPU 的影响。
kind: advanced
nnodes: 1
n_gpus_per_node: 0
command: |
python3 -m verl.model_merger merge \
--backend fsdp \
--local_dir $HOME/jobs//checkpoints//actor \
--target_dir $HOME/jobs//checkpoints//actor/huggingface
""".strip()
body = f"""
New Task
Paste TaskSpec YAML and submit to API server.
Basic tasks require code_path; Advanced tasks use kind: advanced with a custom command.
""".strip()
tpl_ppo = json.dumps(ppo)
tpl_grpo = json.dumps(grpo)
tpl_sft = json.dumps(sft)
tpl_adv = json.dumps(adv)
tpl_merge = json.dumps(merge)
script = (
"""
const msg = document.getElementById("msg");
const yamlEl = document.getElementById("yaml");
const TPL_PPO = __TPL_PPO__;
const TPL_GRPO = __TPL_GRPO__;
const TPL_SFT = __TPL_SFT__;
const TPL_ADV = __TPL_ADV__;
const TPL_MERGE = __TPL_MERGE__;
document.getElementById("tpl-ppo").onclick = () => { yamlEl.value = TPL_PPO; msg.textContent = ""; };
document.getElementById("tpl-grpo").onclick = () => { yamlEl.value = TPL_GRPO; msg.textContent = ""; };
document.getElementById("tpl-sft").onclick = () => { yamlEl.value = TPL_SFT; msg.textContent = ""; };
document.getElementById("tpl-adv").onclick = () => { yamlEl.value = TPL_ADV; msg.textContent = ""; };
document.getElementById("tpl-merge").onclick = () => { yamlEl.value = TPL_MERGE; msg.textContent = ""; };
function yamlQuote(s) {
s = String(s ?? "");
return '"' + s.replaceAll('\\\\', '\\\\\\\\').replaceAll('"', '\\\\"') + '"';
}
function buildYamlFromForm() {
const tpl = document.getElementById("f-tpl").value;
if (tpl === "advanced" || tpl === "merge") {
const nn = Number(document.getElementById("f-adv-nnodes").value || "1");
const gpn = Number(document.getElementById("f-adv-gpn").value || "0");
const cmd = String(document.getElementById("f-command").value || "").replace(/\\r/g, "");
const lines = cmd.split("\\n");
const indented = lines.map(l => " " + l).join("\\n");
return `kind: advanced\\n` + `nnodes: ${nn}\\n` + `n_gpus_per_node: ${gpn}\\n\\n` + `command: |\\n${indented}\\n`;
}
const workload = tpl;
const codePath = document.getElementById("f-code-path").value || "";
const modelId = document.getElementById("f-model-id").value || "";
const trainFile = document.getElementById("f-train-file").value || "";
const valFile = document.getElementById("f-val-file").value || "";
const nn = Number(document.getElementById("f-nnodes").value || "2");
const gpn = Number(document.getElementById("f-gpn").value || "4");
const epochs = Number(document.getElementById("f-epochs").value || "1");
const stepsRaw = String(document.getElementById("f-steps").value || "").trim();
const saveFreq = Number(document.getElementById("f-save-freq").value || "10");
const testFreqRaw = String(document.getElementById("f-test-freq").value || "").trim();
let y = "";
y += `workload: ${workload}\\n`;
y += `code_path: ${yamlQuote(codePath)}\\n`;
y += `model_id: ${yamlQuote(modelId)}\\n`;
y += `train_file: ${yamlQuote(trainFile)}\\n`;
y += `val_file: ${yamlQuote(valFile)}\\n`;
y += `nnodes: ${nn}\\n`;
y += `n_gpus_per_node: ${gpn}\\n`;
y += `total_epochs: ${epochs}\\n`;
if (stepsRaw) y += `total_training_steps: ${Number(stepsRaw)}\\n`;
y += `save_freq: ${saveFreq}\\n`;
if (testFreqRaw) y += `test_freq: ${Number(testFreqRaw)}\\n`;
if (tpl === "sft") {
const dev = document.getElementById("f-trainer-device").value || "cpu";
y += `trainer_device: ${dev}\\n`;
}
return y;
}
function updateFormVisibility() {
const tpl = document.getElementById("f-tpl").value;
const basic = document.getElementById("f-basic");
const adv = document.getElementById("f-adv");
const sftWrap = document.getElementById("f-sft-device-wrap");
if (tpl === "advanced" || tpl === "merge") {
basic.style.display = "none";
adv.style.display = "block";
} else {
basic.style.display = "block";
adv.style.display = "none";
}
if (tpl === "sft") {
sftWrap.style.display = "block";
} else {
sftWrap.style.display = "none";
}
}
function updatePreview() {
const text = buildYamlFromForm();
document.getElementById("f-preview").textContent = text;
return text;
}
function setBtnActive(id, on) {
const el = document.getElementById(id);
if (!el) return;
if (on) el.classList.add("active");
else el.classList.remove("active");
}
function showMode(mode) {
const yamlPanel = document.getElementById("panel-yaml");
const formPanel = document.getElementById("panel-form");
if (mode === "form") {
yamlPanel.style.display = "none";
formPanel.style.display = "block";
setBtnActive("mode-yaml", false);
setBtnActive("mode-form", true);
updateFormVisibility();
updatePreview();
} else {
yamlPanel.style.display = "block";
formPanel.style.display = "none";
setBtnActive("mode-yaml", true);
setBtnActive("mode-form", false);
}
}
function bindStepper(inputId, decId, incId, minV) {
const el = document.getElementById(inputId);
document.getElementById(decId).onclick = () => {
const v = Number(el.value || "0") - 1;
el.value = String(Math.max(minV, v));
updatePreview();
};
document.getElementById(incId).onclick = () => {
const v = Number(el.value || "0") + 1;
el.value = String(Math.max(minV, v));
updatePreview();
};
}
// Init defaults (match examples)
document.getElementById("f-code-path").value = "/private/common/code/verl/verl_repo";
document.getElementById("f-model-id").value = "Qwen/Qwen2.5-0.5B-Instruct";
document.getElementById("f-train-file").value = "/private/common/datasets/gsm8k/train.parquet";
document.getElementById("f-val-file").value = "/private/common/datasets/gsm8k/test.parquet";
document.getElementById("f-nnodes").value = "2";
document.getElementById("f-gpn").value = "4";
document.getElementById("f-epochs").value = "1";
document.getElementById("f-save-freq").value = "10";
document.getElementById("f-trainer-device").value = "cpu";
document.getElementById("f-adv-nnodes").value = "2";
document.getElementById("f-adv-gpn").value = "4";
document.getElementById("f-command").value = `PYTHONUNBUFFERED=1 \\\npython3 -m verl.trainer.main_ppo \\\n data.train_files=$HOME/common/datasets/gsm8k/train.parquet \\\n data.val_files=$HOME/common/datasets/gsm8k/test.parquet \\\n +ray_kwargs.ray_init.address=auto`;
bindStepper("f-nnodes", "f-nnodes-dec", "f-nnodes-inc", 1);
bindStepper("f-gpn", "f-gpn-dec", "f-gpn-inc", 0);
bindStepper("f-epochs", "f-epochs-dec", "f-epochs-inc", 1);
bindStepper("f-adv-nnodes", "f-adv-nnodes-dec", "f-adv-nnodes-inc", 1);
bindStepper("f-adv-gpn", "f-adv-gpn-dec", "f-adv-gpn-inc", 0);
document.getElementById("f-tpl").onchange = () => {
const tpl = document.getElementById("f-tpl").value;
if (tpl === "ppo") {
document.getElementById("f-train-file").value = "/private/common/datasets/gsm8k/train.parquet";
document.getElementById("f-val-file").value = "/private/common/datasets/gsm8k/test.parquet";
} else if (tpl === "grpo") {
document.getElementById("f-train-file").value = "/private/common/datasets/gsm8k/train.parquet";
document.getElementById("f-val-file").value = "/private/common/datasets/gsm8k/test.parquet";
} else if (tpl === "sft") {
document.getElementById("f-train-file").value = "/private/common/datasets/gsm8k_sft/train.parquet";
document.getElementById("f-val-file").value = "/private/common/datasets/gsm8k_sft/test.parquet";
} else if (tpl === "advanced") {
document.getElementById("f-adv-nnodes").value = "2";
document.getElementById("f-adv-gpn").value = "4";
document.getElementById("f-command").value = TPL_ADV.split("command: |")[1]?.trimStart() || document.getElementById("f-command").value;
} else if (tpl === "merge") {
document.getElementById("f-adv-nnodes").value = "1";
document.getElementById("f-adv-gpn").value = "0";
document.getElementById("f-command").value = TPL_MERGE.split("command: |")[1]?.trimStart() || document.getElementById("f-command").value;
}
updateFormVisibility();
updatePreview();
};
for (const id of ["f-code-path","f-model-id","f-train-file","f-val-file","f-nnodes","f-gpn","f-epochs","f-steps","f-save-freq","f-test-freq","f-trainer-device","f-adv-nnodes","f-adv-gpn","f-command"]) {
const el = document.getElementById(id);
if (el) el.oninput = () => updatePreview();
}
document.getElementById("mode-yaml").onclick = () => {
// When switching to YAML, sync from form if form is visible.
const formVisible = document.getElementById("panel-form").style.display !== "none";
if (formVisible) yamlEl.value = updatePreview();
showMode("yaml");
};
document.getElementById("mode-form").onclick = () => {
showMode("form");
};
showMode("yaml");
document.getElementById("submit").onclick = async () => {
msg.textContent = "Submitting...";
const body = (document.getElementById("panel-form").style.display !== "none") ? updatePreview() : yamlEl.value;
const resp = await apiFetch("/api/v2/tasks", { method: "POST", headers: {"Content-Type":"text/plain"}, body });
const text = await resp.text();
if (!resp.ok) { msg.textContent = "Error: " + resp.status + "\\n" + text; return; }
const obj = JSON.parse(text);
msg.textContent = "OK: " + fmtJson(obj);
if (obj.task_id) window.location.href = "/ui/tasks/" + obj.task_id;
};
""".strip()
.replace("__TPL_PPO__", tpl_ppo)
.replace("__TPL_GRPO__", tpl_grpo)
.replace("__TPL_SFT__", tpl_sft)
.replace("__TPL_ADV__", tpl_adv)
.replace("__TPL_MERGE__", tpl_merge)
)
return HTMLResponse(content=_page("New Task", "new", body, script))
@app.get("/ui/tasks/{task_id}")
async def ui_task_detail(task_id: str) -> HTMLResponse:
safe_id = html.escape(task_id)
body = f"""
Task: {safe_id}
TaskSpec (YAML)
Resolved TaskSpec (includes default values; submission_id reflects latest attempt when available).
Loading...
""".strip()
script = f"""
document.getElementById("nav-ray-dashboard").href = curOriginWithPort(8265);
const out = document.getElementById("out");
const spec = document.getElementById("spec");
async function refresh() {{
out.textContent = "Loading...";
spec.textContent = "Loading...";
const resp = await apiFetch("/api/v2/tasks/{task_id}");
const text = await resp.text();
if (!resp.ok) {{ out.textContent = "Error: " + resp.status + "\\n" + text; return; }}
out.textContent = fmtJson(JSON.parse(text));
const resp2 = await apiFetch("/api/v2/tasks/{task_id}/spec");
const text2 = await resp2.text();
spec.textContent = resp2.ok ? text2 : ("Error: " + resp2.status + "\\n" + text2);
}}
document.getElementById("refresh").onclick = refresh;
document.getElementById("cancel").onclick = async () => {{
if (!confirm("Cancel this task?")) return;
const resp = await apiFetch("/api/v2/tasks/{task_id}:cancel", {{ method: "POST" }});
const text = await resp.text();
out.textContent = (resp.ok ? "Canceled.\\n" : "Error: " + resp.status + "\\n") + text;
setTimeout(refresh, 800);
}};
refresh();
""".strip()
return HTMLResponse(content=_page(f"Task {task_id}", "tasks", body, script))
@app.get("/ui/tasks/{task_id}/logs")
async def ui_task_logs(task_id: str) -> HTMLResponse:
safe_id = html.escape(task_id)
body = f"""
Logs: {safe_id}
""".strip()
script = f"""
document.getElementById("nav-ray-dashboard").href = curOriginWithPort(8265);
const out = document.getElementById("out");
let timer = null;
async function refresh() {{
const resp = await apiFetch("/api/v2/tasks/{task_id}/logs?tail=4000");
const text = await resp.text();
out.textContent = resp.ok ? text : ("Error: " + resp.status + "\\n" + text);
}}
document.getElementById("refresh").onclick = refresh;
document.getElementById("auto").onchange = (e) => {{
if (e.target.checked) {{
timer = setInterval(refresh, 2000);
}} else {{
if (timer) clearInterval(timer);
timer = null;
}}
}};
refresh();
""".strip()
return HTMLResponse(content=_page(f"Logs {task_id}", "tasks", body, script))
@app.get("/ui/data")
async def ui_data() -> HTMLResponse:
body = """
Data
User files live under your home directory. Keep long-term artifacts in models/ or datasets/.
SFTPGo password
You can also use an SFTP client (e.g. FileZilla) with the same username/password.
Host: , Port: .
""".strip()
script = """
const msg = document.getElementById("msg");
document.getElementById("nav-ray-dashboard").href = curOriginWithPort(8265);
const u = document.getElementById("u");
const p = document.getElementById("p");
const sftpWeb = document.getElementById("sftp-web");
const sftpHost = document.getElementById("sftp-host");
const sftpPort = document.getElementById("sftp-port");
document.getElementById("copy-u").onclick = async () => { await copyText(u.value || ""); };
document.getElementById("copy-p").onclick = async () => { await copyText(p.value || ""); };
async function refresh() {
const resp = await apiFetch("/api/v2/me");
const text = await resp.text();
if (!resp.ok) { msg.textContent = "Error: " + resp.status + "\\n" + text; return; }
const obj = JSON.parse(text);
u.value = (obj.user_id || "");
const cached = mvpSftpPasswordGet();
if (cached) p.value = cached;
const host = curOriginWithPort(8081);
sftpWeb.href = host + "/web/client";
sftpHost.textContent = (obj.sftp && obj.sftp.host) ? obj.sftp.host : window.location.hostname;
sftpPort.textContent = (obj.sftp && obj.sftp.port) ? String(obj.sftp.port) : "2022";
msg.textContent = "";
}
document.getElementById("reset-p").onclick = async () => {
p.value = "";
mvpSftpPasswordSet("");
msg.textContent = "Resetting...";
const resp = await apiFetch("/api/v2/me/sftp:reset_password", { method: "POST" });
const text = await resp.text();
if (!resp.ok) { msg.textContent = "Error: " + resp.status + "\\n" + text; return; }
const obj = JSON.parse(text);
p.value = obj.password || "";
mvpSftpPasswordSet(p.value);
msg.textContent = "SFTPGo password rotated.";
};
refresh();
""".strip()
return HTMLResponse(content=_page("Data", "data", body, script))
@app.get("/ui/admin")
async def ui_admin() -> HTMLResponse:
body = """
Admin
This page requires the
admin token (set it in
Login).
Create user
Loading...
""".strip()
script = """
const out = document.getElementById("out");
const createMsg = document.getElementById("create-msg");
const userIdEl = document.getElementById("new-user-id");
const displayNameEl = document.getElementById("new-display-name");
function esc(s) {
s = String(s || "");
return s.replaceAll("&","&").replaceAll("<","<").replaceAll(">",">");
}
async function refresh() {
out.textContent = "Loading...";
try {
const obj = await apiJson("/api/v2/users?limit=200");
const users = (obj.users || []);
function row(u) {
const uid = u.user_id;
const tok = u.token || "";
const tokShort = tok ? (tok.length > 18 ? (tok.slice(0, 18) + "…") : tok) : "";
const created = u.created_at || "";
const updated = u.updated_at || "";
const tCreated = u.token_created_at || "";
const tUsed = u.token_last_used_at || "";
return `
${esc(uid)} |
${esc(created)} |
${esc(updated)} |
${esc(tokShort)}
token_created_at: ${esc(tCreated)}; last_used_at: ${esc(tUsed)}
|
`;
}
out.innerHTML = `
| User | Created | Updated | Token |
${users.map(row).join("") || "| (none) |
"}
`;
for (const btn of out.querySelectorAll("button[data-copy]")) {
btn.onclick = async () => { await copyText(btn.getAttribute("data-copy") || ""); };
}
for (const btn of out.querySelectorAll("button[data-issue]")) {
btn.onclick = async () => {
const uid = btn.getAttribute("data-issue");
if (!uid) return;
try {
const r = await apiJson("/api/v2/users/" + encodeURIComponent(uid) + "/tokens", { method: "POST" });
createMsg.textContent = "Issued token:\\n" + fmtJson(r);
await refresh();
} catch (e) {
createMsg.textContent = "Error issuing token: " + (e.status || "") + "\\n" + (e.body || String(e));
}
};
}
} catch (e) {
out.textContent = "Error: " + (e.status || "") + "\\n" + (e.body || String(e));
}
}
document.getElementById("refresh").onclick = refresh;
document.getElementById("create-user").onclick = async () => {
createMsg.textContent = "Creating...";
const user_id = (userIdEl.value || "").trim();
const display_name = (displayNameEl.value || "").trim();
if (!user_id) { createMsg.textContent = "user_id is required"; return; }
const payload = { user_id: user_id };
if (display_name) payload.display_name = display_name;
try {
const r = await apiJson("/api/v2/users", { method: "POST", headers: {"Content-Type":"application/json"}, body: JSON.stringify(payload) });
createMsg.textContent = "Created:\\n" + fmtJson(r);
userIdEl.value = "";
displayNameEl.value = "";
await refresh();
} catch (e) {
createMsg.textContent = "Error: " + (e.status || "") + "\\n" + (e.body || String(e));
}
};
refresh();
""".strip()
return HTMLResponse(content=_page("Admin", "admin", body, script))