From 686739fea263ba1097c1d42f219bd02b6e19fcd7 Mon Sep 17 00:00:00 2001
From: yuyr <yuyr@zgclab.edu.cn>
Date: Tue, 6 Jan 2026 22:43:29 +0800
Subject: [PATCH] =?UTF-8?q?v3.8=20model=20serving=20=E9=83=A8=E7=BD=B2?=
 =?UTF-8?q?=E6=88=90=E5=8A=9F?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 specs/mvp/sw_arch.excalidraw                  | 4755 ++++++++++-------
 specs/mvp/v3.8/ray_serve.md                   |  314 ++
 specs/mvp/v3.8/ray_serve_llm.md               |   87 +
 specs/mvp/v3.8/requirements.md                |    8 +
 specs/mvp/v3.8/v3.8_api.md                    |  224 +
 specs/mvp/v3.8/v3.8_design.md                 |  371 ++
 specs/mvp/v3.8/v3.8_dev_plan.md               |  266 +
 specs/mvp/v3.8/v3.8_progress.md               |   48 +
 src/mvp/README.md                             |    6 +
 src/mvp/configs/dev.yaml                      |    8 +
 src/mvp/docker-compose.yaml                   |   16 +
 src/mvp/images/argus-ray-node/Dockerfile      |    9 +
 .../images/argus-ray-node/argus-head-ray.sh   |    3 +-
 src/mvp/py/argus/core/ids.py                  |   16 +
 src/mvp/py/argus/service/app.py               |  216 +-
 src/mvp/py/argus/service/config.py            |   38 +
 src/mvp/py/argus/service/db.py                |  273 +
 src/mvp/py/argus/service/scheduler.py         |   22 +
 src/mvp/py/argus/service/serve_client.py      |   45 +
 src/mvp/py/argus/service/serve_llm_config.py  |   63 +
 .../py/argus/service/serving_reconciler.py    |  151 +
 src/mvp/py/argus/service/serving_spec.py      |  144 +
 src/mvp/py/argus/service/ui.py                |  248 +
 src/mvp/py/tests/test_app_serving_api.py      |  282 +
 src/mvp/py/tests/test_db_serving.py           |   79 +
 src/mvp/py/tests/test_ids.py                  |   29 +
 src/mvp/py/tests/test_llm_config_builder.py   |   78 +
 src/mvp/py/tests/test_serve_client.py         |   55 +
 src/mvp/py/tests/test_service_config.py       |   22 +
 .../py/tests/test_serving_model_id_prefix.py  |   23 +
 src/mvp/py/tests/test_serving_reconciler.py   |  207 +
 src/mvp/py/tests/test_serving_spec_paths.py   |   47 +
 .../py/tests/test_serving_spec_validation.py  |   72 +
 src/mvp/py/tests/test_ui.py                   |    5 +-
 src/mvp/py/tests/test_ui_serving.py           |   56 +
 src/mvp/scripts/01_up.sh                      |    3 +-
 src/mvp/scripts/debug_serve_llm_smoke.sh      |   18 +
 src/mvp/scripts/run_all_v38_serving.sh        |  193 +
 src/mvp/scripts/serve_llm_smoke.py            |  102 +
 39 files changed, 6772 insertions(+), 1830 deletions(-)
 create mode 100644 specs/mvp/v3.8/ray_serve.md
 create mode 100644 specs/mvp/v3.8/ray_serve_llm.md
 create mode 100644 specs/mvp/v3.8/requirements.md
 create mode 100644 specs/mvp/v3.8/v3.8_api.md
 create mode 100644 specs/mvp/v3.8/v3.8_design.md
 create mode 100644 specs/mvp/v3.8/v3.8_dev_plan.md
 create mode 100644 specs/mvp/v3.8/v3.8_progress.md
 create mode 100644 src/mvp/py/argus/service/serve_client.py
 create mode 100644 src/mvp/py/argus/service/serve_llm_config.py
 create mode 100644 src/mvp/py/argus/service/serving_reconciler.py
 create mode 100644 src/mvp/py/argus/service/serving_spec.py
 create mode 100644 src/mvp/py/tests/test_app_serving_api.py
 create mode 100644 src/mvp/py/tests/test_db_serving.py
 create mode 100644 src/mvp/py/tests/test_llm_config_builder.py
 create mode 100644 src/mvp/py/tests/test_serve_client.py
 create mode 100644 src/mvp/py/tests/test_serving_model_id_prefix.py
 create mode 100644 src/mvp/py/tests/test_serving_reconciler.py
 create mode 100644 src/mvp/py/tests/test_serving_spec_paths.py
 create mode 100644 src/mvp/py/tests/test_serving_spec_validation.py
 create mode 100644 src/mvp/py/tests/test_ui_serving.py
 create mode 100644 src/mvp/scripts/debug_serve_llm_smoke.sh
 create mode 100755 src/mvp/scripts/run_all_v38_serving.sh
 create mode 100644 src/mvp/scripts/serve_llm_smoke.py

diff --git a/specs/mvp/sw_arch.excalidraw b/specs/mvp/sw_arch.excalidraw
index c026691..3a9ba1a 100644
--- a/specs/mvp/sw_arch.excalidraw
+++ b/specs/mvp/sw_arch.excalidraw
@@ -3331,8 +3331,8 @@
     {
       "id": "hfyGYREhwfOiJ0NmS7nWJ",
       "type": "rectangle",
-      "x": 184.78887213940638,
-      "y": 4494.097308130087,
+      "x": 201.45553880607312,
+      "y": 5354.097308130087,
       "width": 647.3203086953981,
       "height": 641.2700333334831,
       "angle": 0,
@@ -3350,19 +3350,19 @@
         "type": 3
       },
       "seed": 1547009185,
-      "version": 1063,
-      "versionNonce": 2017335811,
+      "version": 1118,
+      "versionNonce": 2081816253,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580421422,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "kcnXooRmgv12d40l7DX1I",
       "type": "rectangle",
-      "x": 390.4424022703514,
-      "y": 4674.763964624228,
+      "x": 407.10906893701815,
+      "y": 5534.763964624228,
       "width": 156.66668701171875,
       "height": 85,
       "angle": 0,
@@ -3378,8 +3378,8 @@
       "index": "b0T",
       "roundness": null,
       "seed": 142557313,
-      "version": 781,
-      "versionNonce": 1578230179,
+      "version": 836,
+      "versionNonce": 1304502045,
       "isDeleted": false,
       "boundElements": [
         {
@@ -3395,15 +3395,15 @@
           "type": "arrow"
         }
       ],
-      "updated": 1767580421422,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "-kTPtQM7ZjAJg_O72uvP0",
       "type": "text",
-      "x": 411.38576927474594,
-      "y": 4692.263964624228,
+      "x": 428.0524359414127,
+      "y": 5552.263964624228,
       "width": 114.77995300292969,
       "height": 50,
       "angle": 0,
@@ -3419,11 +3419,11 @@
       "index": "b0U",
       "roundness": null,
       "seed": 849709153,
-      "version": 771,
-      "versionNonce": 1363916099,
+      "version": 826,
+      "versionNonce": 300495741,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580421422,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "ray job tool\n(ray client)",
@@ -3439,8 +3439,8 @@
     {
       "id": "syy4HE220r-q_RMUUSsel",
       "type": "rectangle",
-      "x": 635.4424022703514,
-      "y": 4674.263964624228,
+      "x": 652.1090689370182,
+      "y": 5534.263964624228,
       "width": 156.66668701171875,
       "height": 85,
       "angle": 0,
@@ -3456,8 +3456,8 @@
       "index": "b0V",
       "roundness": null,
       "seed": 1954010177,
-      "version": 918,
-      "versionNonce": 1838907523,
+      "version": 973,
+      "versionNonce": 512112701,
       "isDeleted": false,
       "boundElements": [
         {
@@ -3469,15 +3469,15 @@
           "type": "arrow"
         }
       ],
-      "updated": 1767580421422,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "CA5CuK3pSeEixp1YwKENQ",
       "type": "text",
-      "x": 646.3258022337303,
-      "y": 4691.763964624228,
+      "x": 662.9924689003971,
+      "y": 5551.763964624228,
       "width": 134.89988708496094,
       "height": 50,
       "angle": 0,
@@ -3493,11 +3493,11 @@
       "index": "b0W",
       "roundness": null,
       "seed": 30778401,
-      "version": 922,
-      "versionNonce": 1073641507,
+      "version": 977,
+      "versionNonce": 2028873885,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580421422,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "VerlTaskSpec \nyaml",
@@ -3513,8 +3513,8 @@
     {
       "id": "BfGeVEKVLbnnxWatX-CuX",
       "type": "arrow",
-      "x": 636.7757457762108,
-      "y": 4712.763964624228,
+      "x": 653.4424124428775,
+      "y": 5572.763964624228,
       "width": 90.33331298828125,
       "height": 4,
       "angle": 0,
@@ -3532,11 +3532,11 @@
         "type": 2
       },
       "seed": 1722710017,
-      "version": 2068,
-      "versionNonce": 243331309,
+      "version": 2235,
+      "versionNonce": 245651379,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580422053,
+      "updated": 1767684541921,
       "link": null,
       "locked": false,
       "points": [
@@ -3552,12 +3552,12 @@
       "lastCommittedPoint": null,
       "startBinding": {
         "elementId": "syy4HE220r-q_RMUUSsel",
-        "focus": 0.1611880517201622,
+        "focus": 0.1611880517201682,
         "gap": 1.333343505859375
       },
       "endBinding": {
         "elementId": "kcnXooRmgv12d40l7DX1I",
-        "focus": 0.06393742185083832,
+        "focus": 0.06393742185081888,
         "gap": 1
       },
       "startArrowhead": null,
@@ -3567,8 +3567,8 @@
     {
       "id": "XwaSYITVD0m2YORWBiATj",
       "type": "text",
-      "x": 363.57537092345547,
-      "y": 4461.430651635947,
+      "x": 380.2420375901222,
+      "y": 5321.430651635947,
       "width": 480.5596923828125,
       "height": 25,
       "angle": 0,
@@ -3584,11 +3584,11 @@
       "index": "b0Y",
       "roundness": null,
       "seed": 84004833,
-      "version": 852,
-      "versionNonce": 399615747,
+      "version": 907,
+      "versionNonce": 1749922237,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580421422,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "v4.0 observability (prom, grafana, alert, ELK, etc)",
@@ -3604,8 +3604,8 @@
     {
       "id": "hO23c7cUKniC0ShSkbYHY",
       "type": "rectangle",
-      "x": 576.2663503435255,
-      "y": 4516.930560083212,
+      "x": 592.9330170101922,
+      "y": 5376.930560083212,
       "width": 210.1763265850289,
       "height": 55.000000000000014,
       "angle": 0,
@@ -3621,8 +3621,8 @@
       "index": "b0Z",
       "roundness": null,
       "seed": 1390357441,
-      "version": 1070,
-      "versionNonce": 1034669731,
+      "version": 1125,
+      "versionNonce": 1586131485,
       "isDeleted": false,
       "boundElements": [
         {
@@ -3630,15 +3630,15 @@
           "id": "WvC3BnzbmedjQgc_rcGOG"
         }
       ],
-      "updated": 1767580421422,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "WvC3BnzbmedjQgc_rcGOG",
       "type": "text",
-      "x": 628.5045533088916,
-      "y": 4531.930560083212,
+      "x": 645.1712199755583,
+      "y": 5391.930560083212,
       "width": 105.69992065429688,
       "height": 25,
       "angle": 0,
@@ -3654,11 +3654,11 @@
       "index": "b0a",
       "roundness": null,
       "seed": 1483082657,
-      "version": 1109,
-      "versionNonce": 1629487683,
+      "version": 1164,
+      "versionNonce": 68167293,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580421422,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "API server",
@@ -3674,8 +3674,8 @@
     {
       "id": "QTuaauYBrKurb9Uym-vJ0",
       "type": "rectangle",
-      "x": 390.2757457762108,
-      "y": 4587.930560083212,
+      "x": 406.9424124428775,
+      "y": 5447.930560083212,
       "width": 154.6436510018092,
       "height": 60,
       "angle": 0,
@@ -3691,8 +3691,8 @@
       "index": "b0b",
       "roundness": null,
       "seed": 1841688449,
-      "version": 1300,
-      "versionNonce": 1527562723,
+      "version": 1355,
+      "versionNonce": 1805668061,
       "isDeleted": false,
       "boundElements": [
         {
@@ -3700,15 +3700,15 @@
           "id": "wds-TjwVp7NzSUYdwGMzH"
         }
       ],
-      "updated": 1767580421422,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "wds-TjwVp7NzSUYdwGMzH",
       "type": "text",
-      "x": 406.4776143069005,
-      "y": 4592.930560083212,
+      "x": 423.1442809735672,
+      "y": 5452.930560083212,
       "width": 122.23991394042969,
       "height": 50,
       "angle": 0,
@@ -3724,11 +3724,11 @@
       "index": "b0c",
       "roundness": null,
       "seed": 960386913,
-      "version": 1359,
-      "versionNonce": 1686146435,
+      "version": 1414,
+      "versionNonce": 1405905725,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580421422,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "task\nmanagement ",
@@ -3744,8 +3744,8 @@
     {
       "id": "h-InDEDqJ6h9usuQPtO1A",
       "type": "rectangle",
-      "x": 574.3832843618806,
-      "y": 4588.504868395925,
+      "x": 591.0499510285473,
+      "y": 5448.504868395925,
       "width": 207.59447102997498,
       "height": 60,
       "angle": 0,
@@ -3761,8 +3761,8 @@
       "index": "b0d",
       "roundness": null,
       "seed": 1830732609,
-      "version": 1391,
-      "versionNonce": 280618275,
+      "version": 1446,
+      "versionNonce": 754318237,
       "isDeleted": false,
       "boundElements": [
         {
@@ -3770,15 +3770,15 @@
           "id": "B1uYGWk1HK8JtizGFaaGC"
         }
       ],
-      "updated": 1767580421422,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "B1uYGWk1HK8JtizGFaaGC",
       "type": "text",
-      "x": 590.650589762122,
-      "y": 4593.504868395925,
+      "x": 607.3172564287887,
+      "y": 5453.504868395925,
       "width": 175.0598602294922,
       "height": 50,
       "angle": 0,
@@ -3794,11 +3794,11 @@
       "index": "b0e",
       "roundness": null,
       "seed": 1170649889,
-      "version": 1481,
-      "versionNonce": 120015043,
+      "version": 1536,
+      "versionNonce": 1598980093,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580421422,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "node management\n(ssh, ray cluster) ",
@@ -3814,8 +3814,8 @@
     {
       "id": "XS4hxHD7hkjqqdIPoHNyM",
       "type": "rectangle",
-      "x": 911.7293968820569,
-      "y": 4726.479066772403,
+      "x": 928.3960635487236,
+      "y": 5586.479066772403,
       "width": 163.1357446724801,
       "height": 106.45708709635272,
       "angle": 0,
@@ -3831,8 +3831,8 @@
       "index": "b0f",
       "roundness": null,
       "seed": 1202038529,
-      "version": 716,
-      "versionNonce": 2103916643,
+      "version": 771,
+      "versionNonce": 1356298333,
       "isDeleted": false,
       "boundElements": [
         {
@@ -3840,15 +3840,15 @@
           "id": "ilnIrrhwIwdl5yMP0J_-o"
         }
       ],
-      "updated": 1767580421422,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "ilnIrrhwIwdl5yMP0J_-o",
       "type": "text",
-      "x": 942.6473134687852,
-      "y": 4754.707610320578,
+      "x": 959.313980135452,
+      "y": 5614.707610320578,
       "width": 101.29991149902344,
       "height": 50,
       "angle": 0,
@@ -3864,11 +3864,11 @@
       "index": "b0g",
       "roundness": null,
       "seed": 1027284705,
-      "version": 779,
-      "versionNonce": 1900557315,
+      "version": 834,
+      "versionNonce": 727868605,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580421422,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "ray worker\nnode",
@@ -3884,8 +3884,8 @@
     {
       "id": "qYCyZgffVShRdxYkZZEQC",
       "type": "rectangle",
-      "x": 912.5312001672861,
-      "y": 4533.440830751391,
+      "x": 929.1978668339528,
+      "y": 5393.440830751391,
       "width": 163.1357446724801,
       "height": 85,
       "angle": 0,
@@ -3901,8 +3901,8 @@
       "index": "b0h",
       "roundness": null,
       "seed": 1964415681,
-      "version": 737,
-      "versionNonce": 583580579,
+      "version": 792,
+      "versionNonce": 1653654813,
       "isDeleted": false,
       "boundElements": [
         {
@@ -3910,15 +3910,15 @@
           "id": "7cAjOCsL26YdZ9huXHSvG"
         }
       ],
-      "updated": 1767580421422,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "7cAjOCsL26YdZ9huXHSvG",
       "type": "text",
-      "x": 943.4491167540144,
-      "y": 4550.940830751391,
+      "x": 960.1157834206812,
+      "y": 5410.940830751391,
       "width": 101.29991149902344,
       "height": 50,
       "angle": 0,
@@ -3934,11 +3934,11 @@
       "index": "b0i",
       "roundness": null,
       "seed": 1071094433,
-      "version": 803,
-      "versionNonce": 1310150467,
+      "version": 858,
+      "versionNonce": 2043943293,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580421422,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "ray worker\nnode",
@@ -3954,8 +3954,8 @@
     {
       "id": "Ov7Ctp7PvJITFoQy7Yb4E",
       "type": "rectangle",
-      "x": 393.0812552422981,
-      "y": 4514.717667417468,
+      "x": 409.7479219089648,
+      "y": 5374.717667417468,
       "width": 147.4799234681481,
       "height": 55.000000000000014,
       "angle": 0,
@@ -3971,8 +3971,8 @@
       "index": "b0j",
       "roundness": null,
       "seed": 774741633,
-      "version": 1188,
-      "versionNonce": 630353635,
+      "version": 1243,
+      "versionNonce": 174973405,
       "isDeleted": false,
       "boundElements": [
         {
@@ -3980,15 +3980,15 @@
           "id": "hkWZM79d15pmmlJnpM5GC"
         }
       ],
-      "updated": 1767580421422,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "hkWZM79d15pmmlJnpM5GC",
       "type": "text",
-      "x": 435.59124413701664,
-      "y": 4529.717667417468,
+      "x": 452.2579108036834,
+      "y": 5389.717667417468,
       "width": 62.45994567871094,
       "height": 25,
       "angle": 0,
@@ -4004,11 +4004,11 @@
       "index": "b0k",
       "roundness": null,
       "seed": 249046625,
-      "version": 1238,
-      "versionNonce": 197696131,
+      "version": 1293,
+      "versionNonce": 158309949,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580421422,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "WebUI",
@@ -4024,8 +4024,8 @@
     {
       "id": "hJ1qbW1rf0n6-WrDss2Ro",
       "type": "rectangle",
-      "x": 636.7530370032708,
-      "y": 4789.813057433881,
+      "x": 653.4197036699376,
+      "y": 5649.813057433881,
       "width": 156.66668701171875,
       "height": 85,
       "angle": 0,
@@ -4041,8 +4041,8 @@
       "index": "b0l",
       "roundness": null,
       "seed": 749122113,
-      "version": 970,
-      "versionNonce": 225989155,
+      "version": 1025,
+      "versionNonce": 1304088221,
       "isDeleted": false,
       "boundElements": [
         {
@@ -4054,15 +4054,15 @@
           "type": "arrow"
         }
       ],
-      "updated": 1767580421422,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "w68ybhw27soUMyLsMu-vA",
       "type": "text",
-      "x": 641.7964329993646,
-      "y": 4794.813057433881,
+      "x": 658.4630996660313,
+      "y": 5654.813057433881,
       "width": 146.57989501953125,
       "height": 75,
       "angle": 0,
@@ -4078,11 +4078,11 @@
       "index": "b0m",
       "roundness": null,
       "seed": 1282982433,
-      "version": 984,
-      "versionNonce": 462427587,
+      "version": 1039,
+      "versionNonce": 1442902781,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580421422,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "Advanced\nVerlTaskSpec \n(code, ckpt, ib)",
@@ -4098,8 +4098,8 @@
     {
       "id": "OJ2YvI_4pyTp3tvnog5HM",
       "type": "arrow",
-      "x": 636.2892302063543,
-      "y": 4834.178049948081,
+      "x": 652.9558968730211,
+      "y": 5694.178049948081,
       "width": 80.66214281697523,
       "height": 84.85848269127973,
       "angle": 0,
@@ -4117,11 +4117,11 @@
         "type": 2
       },
       "seed": 1799214593,
-      "version": 831,
-      "versionNonce": 74337101,
+      "version": 942,
+      "versionNonce": 574941523,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580422053,
+      "updated": 1767684541921,
       "link": null,
       "locked": false,
       "points": [
@@ -4137,7 +4137,7 @@
       "lastCommittedPoint": null,
       "startBinding": {
         "elementId": "hJ1qbW1rf0n6-WrDss2Ro",
-        "focus": -0.6785882314731579,
+        "focus": -0.6785882314731589,
         "gap": 1
       },
       "endBinding": null,
@@ -4148,8 +4148,8 @@
     {
       "id": "ozgVxBsj7snNecNunrp11",
       "type": "rectangle",
-      "x": 207.87720669617303,
-      "y": 4587.835588719463,
+      "x": 224.54387336283978,
+      "y": 5447.835588719463,
       "width": 154.6436510018092,
       "height": 60,
       "angle": 0,
@@ -4165,8 +4165,8 @@
       "index": "b0o",
       "roundness": null,
       "seed": 1005940193,
-      "version": 1362,
-      "versionNonce": 443066531,
+      "version": 1417,
+      "versionNonce": 215226397,
       "isDeleted": false,
       "boundElements": [
         {
@@ -4174,15 +4174,15 @@
           "id": "i3wz2t7mbs8b39oPir_Gf"
         }
       ],
-      "updated": 1767580421422,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "i3wz2t7mbs8b39oPir_Gf",
       "type": "text",
-      "x": 224.07907522686276,
-      "y": 4592.835588719463,
+      "x": 240.7457418935295,
+      "y": 5452.835588719463,
       "width": 122.23991394042969,
       "height": 50,
       "angle": 0,
@@ -4198,11 +4198,11 @@
       "index": "b0p",
       "roundness": null,
       "seed": 1705970113,
-      "version": 1430,
-      "versionNonce": 87137347,
+      "version": 1485,
+      "versionNonce": 1177881725,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580421422,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "user\nmanagement ",
@@ -4218,8 +4218,8 @@
     {
       "id": "-6-EuXJhEm2mWx5ISTwFk",
       "type": "rectangle",
-      "x": 213.93860311509013,
-      "y": 4781.420420372215,
+      "x": 230.60526978175687,
+      "y": 5641.420420372215,
       "width": 154.6436510018092,
       "height": 85,
       "angle": 0,
@@ -4235,8 +4235,8 @@
       "index": "b0q",
       "roundness": null,
       "seed": 25831841,
-      "version": 1594,
-      "versionNonce": 2111511523,
+      "version": 1649,
+      "versionNonce": 607679709,
       "isDeleted": false,
       "boundElements": [
         {
@@ -4244,15 +4244,15 @@
           "id": "GabFf-T0sdW_Oq8DrJ2I3"
         }
       ],
-      "updated": 1767580421422,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "GabFf-T0sdW_Oq8DrJ2I3",
       "type": "text",
-      "x": 234.14047164577983,
-      "y": 4786.420420372215,
+      "x": 250.80713831244657,
+      "y": 5646.420420372215,
       "width": 114.23991394042969,
       "height": 75,
       "angle": 0,
@@ -4268,11 +4268,11 @@
       "index": "b0r",
       "roundness": null,
       "seed": 1488342401,
-      "version": 1676,
-      "versionNonce": 276014979,
+      "version": 1731,
+      "versionNonce": 1383059773,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580421422,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "data\nmanagement\nSFTPGo ",
@@ -4288,8 +4288,8 @@
     {
       "id": "OkOW_h1bpRwbXvHRE98qO",
       "type": "rectangle",
-      "x": 229.40224214693444,
-      "y": 5023.3894549921515,
+      "x": 246.06890881360118,
+      "y": 5883.3894549921515,
       "width": 154.6436510018092,
       "height": 60,
       "angle": 0,
@@ -4305,8 +4305,8 @@
       "index": "b0s",
       "roundness": null,
       "seed": 905236833,
-      "version": 1466,
-      "versionNonce": 1127857955,
+      "version": 1521,
+      "versionNonce": 238854557,
       "isDeleted": false,
       "boundElements": [
         {
@@ -4314,15 +4314,15 @@
           "id": "dr1KmTO-yRI3g5vju74ZK"
         }
       ],
-      "updated": 1767580421422,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "dr1KmTO-yRI3g5vju74ZK",
       "type": "text",
-      "x": 251.75410457410862,
-      "y": 5040.8894549921515,
+      "x": 268.42077124077537,
+      "y": 5900.8894549921515,
       "width": 109.93992614746094,
       "height": 25,
       "angle": 0,
@@ -4338,11 +4338,11 @@
       "index": "b0t",
       "roundness": null,
       "seed": 1089862977,
-      "version": 1546,
-      "versionNonce": 1352393411,
+      "version": 1601,
+      "versionNonce": 1825596925,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580421422,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "prometheus",
@@ -4358,8 +4358,8 @@
     {
       "id": "PwNRlpy3F_V5w-3sfAYAQ",
       "type": "rectangle",
-      "x": 427.0943513641362,
-      "y": 5021.524547851837,
+      "x": 443.7610180308029,
+      "y": 5881.524547851837,
       "width": 154.6436510018092,
       "height": 60,
       "angle": 0,
@@ -4375,8 +4375,8 @@
       "index": "b0u",
       "roundness": null,
       "seed": 1448322511,
-      "version": 1510,
-      "versionNonce": 557355619,
+      "version": 1565,
+      "versionNonce": 1630314077,
       "isDeleted": false,
       "boundElements": [
         {
@@ -4384,15 +4384,15 @@
           "id": "kWS_uGtXbrBusArKpVdGc"
         }
       ],
-      "updated": 1767580421422,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "kWS_uGtXbrBusArKpVdGc",
       "type": "text",
-      "x": 467.63621623271655,
-      "y": 5039.024547851837,
+      "x": 484.3028828993833,
+      "y": 5899.024547851837,
       "width": 73.55992126464844,
       "height": 25,
       "angle": 0,
@@ -4408,11 +4408,11 @@
       "index": "b0v",
       "roundness": null,
       "seed": 1134302191,
-      "version": 1599,
-      "versionNonce": 98455043,
+      "version": 1654,
+      "versionNonce": 35834557,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580421422,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "grafana",
@@ -4428,8 +4428,8 @@
     {
       "id": "DHdIrKA2SmPRr2rTtJJ8E",
       "type": "rectangle",
-      "x": 940.088203276161,
-      "y": 4825.921889114552,
+      "x": 956.7548699428278,
+      "y": 5685.921889114552,
       "width": 106.83929336612698,
       "height": 60,
       "angle": 0,
@@ -4445,8 +4445,8 @@
       "index": "b1a",
       "roundness": null,
       "seed": 1809008399,
-      "version": 1585,
-      "versionNonce": 831231395,
+      "version": 1640,
+      "versionNonce": 273967901,
       "isDeleted": false,
       "boundElements": [
         {
@@ -4454,15 +4454,15 @@
           "id": "HbJRSZyHsD6NLPzLVoa9C"
         }
       ],
-      "updated": 1767580421422,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "HbJRSZyHsD6NLPzLVoa9C",
       "type": "text",
-      "x": 951.4178765095176,
-      "y": 4843.421889114552,
+      "x": 968.0845431761843,
+      "y": 5703.421889114552,
       "width": 84.17994689941406,
       "height": 25,
       "angle": 0,
@@ -4478,11 +4478,11 @@
       "index": "b1b",
       "roundness": null,
       "seed": 1662356783,
-      "version": 1676,
-      "versionNonce": 1936648515,
+      "version": 1731,
+      "versionNonce": 1009975165,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580421422,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "exporter",
@@ -4498,8 +4498,8 @@
     {
       "id": "SYvgt1uhLv6KU1Snpyha0",
       "type": "rectangle",
-      "x": 941.8090593134823,
-      "y": 4614.817365416434,
+      "x": 958.475725980149,
+      "y": 5474.817365416434,
       "width": 106.83929336612698,
       "height": 60,
       "angle": 0,
@@ -4515,8 +4515,8 @@
       "index": "b1c",
       "roundness": null,
       "seed": 1250827695,
-      "version": 1599,
-      "versionNonce": 820431075,
+      "version": 1654,
+      "versionNonce": 738199517,
       "isDeleted": false,
       "boundElements": [
         {
@@ -4524,15 +4524,15 @@
           "id": "zbUHQA7llONcizbRp8kkI"
         }
       ],
-      "updated": 1767580421422,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "zbUHQA7llONcizbRp8kkI",
       "type": "text",
-      "x": 953.1387325468388,
-      "y": 4632.317365416434,
+      "x": 969.8053992135056,
+      "y": 5492.317365416434,
       "width": 84.17994689941406,
       "height": 25,
       "angle": 0,
@@ -4548,11 +4548,11 @@
       "index": "b1d",
       "roundness": null,
       "seed": 1568444367,
-      "version": 1690,
-      "versionNonce": 528594051,
+      "version": 1745,
+      "versionNonce": 1103955005,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580421422,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "exporter",
@@ -4568,8 +4568,8 @@
     {
       "id": "ND0kaXREeIHTrNlSqY34p",
       "type": "rectangle",
-      "x": 629.4389274863165,
-      "y": 5021.045738109877,
+      "x": 646.1055941529833,
+      "y": 5881.045738109877,
       "width": 154.6436510018092,
       "height": 60,
       "angle": 0,
@@ -4585,8 +4585,8 @@
       "index": "b1i",
       "roundness": null,
       "seed": 814939215,
-      "version": 1566,
-      "versionNonce": 1056844835,
+      "version": 1621,
+      "versionNonce": 561738909,
       "isDeleted": false,
       "boundElements": [
         {
@@ -4594,15 +4594,15 @@
           "id": "e8-1MnFz0QWrPJXZ-ZnQ3"
         }
       ],
-      "updated": 1767580421422,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "e8-1MnFz0QWrPJXZ-ZnQ3",
       "type": "text",
-      "x": 688.1307633631977,
-      "y": 5038.545738109877,
+      "x": 704.7974300298645,
+      "y": 5898.545738109877,
       "width": 37.259979248046875,
       "height": 25,
       "angle": 0,
@@ -4618,11 +4618,11 @@
       "index": "b1j",
       "roundness": null,
       "seed": 2024577647,
-      "version": 1659,
-      "versionNonce": 323661763,
+      "version": 1714,
+      "versionNonce": 1992087805,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580421422,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "ELK",
@@ -4638,8 +4638,8 @@
     {
       "id": "UOlgJuhEaQm81KRHgyY-0",
       "type": "rectangle",
-      "x": 179.07220592595837,
-      "y": 5244.209019024574,
+      "x": 195.7388725926251,
+      "y": 6104.209019024574,
       "width": 647.3203086953981,
       "height": 717.4711059065564,
       "angle": 0,
@@ -4657,19 +4657,19 @@
         "type": 3
       },
       "seed": 790237583,
-      "version": 1166,
-      "versionNonce": 1146533539,
+      "version": 1221,
+      "versionNonce": 2064822621,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580076534,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "ExBkrVpyvo_OXAog8ivWn",
       "type": "rectangle",
-      "x": 384.7257360569034,
-      "y": 5424.875675518715,
+      "x": 401.3924027235701,
+      "y": 6284.875675518715,
       "width": 156.66668701171875,
       "height": 85,
       "angle": 0,
@@ -4685,8 +4685,8 @@
       "index": "b1n",
       "roundness": null,
       "seed": 1478428591,
-      "version": 819,
-      "versionNonce": 573434435,
+      "version": 874,
+      "versionNonce": 1003388349,
       "isDeleted": false,
       "boundElements": [
         {
@@ -4702,15 +4702,15 @@
           "type": "arrow"
         }
       ],
-      "updated": 1767580076534,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "hqH-l-CJ15NjmjoZtiq-9",
       "type": "text",
-      "x": 405.6691030612979,
-      "y": 5442.375675518715,
+      "x": 422.33576972796465,
+      "y": 6302.375675518715,
       "width": 114.77995300292969,
       "height": 50,
       "angle": 0,
@@ -4726,11 +4726,11 @@
       "index": "b1o",
       "roundness": null,
       "seed": 492845519,
-      "version": 809,
-      "versionNonce": 303933923,
+      "version": 864,
+      "versionNonce": 538033693,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580076534,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "ray job tool\n(ray client)",
@@ -4746,8 +4746,8 @@
     {
       "id": "kurhiArg2sM7XFjEdTyrI",
       "type": "rectangle",
-      "x": 629.7257360569034,
-      "y": 5424.375675518715,
+      "x": 646.3924027235702,
+      "y": 6284.375675518715,
       "width": 156.66668701171875,
       "height": 85,
       "angle": 0,
@@ -4763,8 +4763,8 @@
       "index": "b1p",
       "roundness": null,
       "seed": 603334639,
-      "version": 958,
-      "versionNonce": 380685603,
+      "version": 1013,
+      "versionNonce": 348429021,
       "isDeleted": false,
       "boundElements": [
         {
@@ -4776,15 +4776,15 @@
           "type": "arrow"
         }
       ],
-      "updated": 1767580076534,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "7k75o8I7t4NCGdyrtenGs",
       "type": "text",
-      "x": 640.6091360202823,
-      "y": 5441.875675518715,
+      "x": 657.2758026869491,
+      "y": 6301.875675518715,
       "width": 134.89988708496094,
       "height": 50,
       "angle": 0,
@@ -4800,11 +4800,11 @@
       "index": "b1q",
       "roundness": null,
       "seed": 1508463119,
-      "version": 960,
-      "versionNonce": 2114007235,
+      "version": 1015,
+      "versionNonce": 1787246397,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580076534,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "VerlTaskSpec \nyaml",
@@ -4820,8 +4820,8 @@
     {
       "id": "qO3lO1bGwrfyKuWKhDJ1J",
       "type": "arrow",
-      "x": 631.0590795627628,
-      "y": 5462.875675518715,
+      "x": 647.7257462294295,
+      "y": 6322.875675518715,
       "width": 90.33331298828125,
       "height": 4,
       "angle": 0,
@@ -4839,11 +4839,11 @@
         "type": 2
       },
       "seed": 1479686191,
-      "version": 2170,
-      "versionNonce": 668673901,
+      "version": 2337,
+      "versionNonce": 1203957907,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580076859,
+      "updated": 1767684541922,
       "link": null,
       "locked": false,
       "points": [
@@ -4859,12 +4859,12 @@
       "lastCommittedPoint": null,
       "startBinding": {
         "elementId": "kurhiArg2sM7XFjEdTyrI",
-        "focus": 0.16118805172015774,
+        "focus": 0.1611880517201637,
         "gap": 1.333343505859375
       },
       "endBinding": {
         "elementId": "ExBkrVpyvo_OXAog8ivWn",
-        "focus": 0.06393742185084282,
+        "focus": 0.0639374218508417,
         "gap": 1
       },
       "startArrowhead": null,
@@ -4874,8 +4874,8 @@
     {
       "id": "oZv8__WhJ3c5fkGqXVl2_",
       "type": "text",
-      "x": 357.3924352379864,
-      "y": 5210.609823586392,
+      "x": 374.0591019046531,
+      "y": 6070.609823586392,
       "width": 385.8797302246094,
       "height": 25,
       "angle": 0,
@@ -4891,11 +4891,11 @@
       "index": "b1s",
       "roundness": null,
       "seed": 79256143,
-      "version": 977,
-      "versionNonce": 571188131,
+      "version": 1032,
+      "versionNonce": 259376221,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580076534,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "v5.0 operability (statistics, sop, agent)",
@@ -4911,8 +4911,8 @@
     {
       "id": "WNodxBjaE-CYp8dK1fMQN",
       "type": "rectangle",
-      "x": 570.5496841300775,
-      "y": 5267.042270977699,
+      "x": 587.2163507967442,
+      "y": 6127.042270977699,
       "width": 210.1763265850289,
       "height": 55.000000000000014,
       "angle": 0,
@@ -4928,8 +4928,8 @@
       "index": "b1t",
       "roundness": null,
       "seed": 1951873135,
-      "version": 1108,
-      "versionNonce": 884815683,
+      "version": 1163,
+      "versionNonce": 408367293,
       "isDeleted": false,
       "boundElements": [
         {
@@ -4937,15 +4937,15 @@
           "id": "G2DrVh_u1lhQlxAkO611N"
         }
       ],
-      "updated": 1767580076534,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "G2DrVh_u1lhQlxAkO611N",
       "type": "text",
-      "x": 622.7878870954435,
-      "y": 5282.042270977699,
+      "x": 639.4545537621102,
+      "y": 6142.042270977699,
       "width": 105.69992065429688,
       "height": 25,
       "angle": 0,
@@ -4961,11 +4961,11 @@
       "index": "b1u",
       "roundness": null,
       "seed": 215241359,
-      "version": 1149,
-      "versionNonce": 255547107,
+      "version": 1204,
+      "versionNonce": 1863061789,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580076534,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "API server",
@@ -4981,8 +4981,8 @@
     {
       "id": "XxNyZL0flCwhAO9zT3UQy",
       "type": "rectangle",
-      "x": 384.55907956276275,
-      "y": 5338.042270977699,
+      "x": 401.2257462294295,
+      "y": 6198.042270977699,
       "width": 154.6436510018092,
       "height": 60,
       "angle": 0,
@@ -4998,8 +4998,8 @@
       "index": "b1v",
       "roundness": null,
       "seed": 1726556335,
-      "version": 1338,
-      "versionNonce": 538215043,
+      "version": 1393,
+      "versionNonce": 1896211837,
       "isDeleted": false,
       "boundElements": [
         {
@@ -5007,15 +5007,15 @@
           "id": "O2dtUuIqb0fs_uv7D29cW"
         }
       ],
-      "updated": 1767580076534,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "O2dtUuIqb0fs_uv7D29cW",
       "type": "text",
-      "x": 400.7609480934525,
-      "y": 5343.042270977699,
+      "x": 417.42761476011924,
+      "y": 6203.042270977699,
       "width": 122.23991394042969,
       "height": 50,
       "angle": 0,
@@ -5031,11 +5031,11 @@
       "index": "b1w",
       "roundness": null,
       "seed": 1883126479,
-      "version": 1399,
-      "versionNonce": 473635,
+      "version": 1454,
+      "versionNonce": 1367748061,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580076534,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "task\nmanagement ",
@@ -5051,8 +5051,8 @@
     {
       "id": "UK3M2EByn3haJdkH9Z21T",
       "type": "rectangle",
-      "x": 568.6666181484326,
-      "y": 5338.616579290413,
+      "x": 585.3332848150993,
+      "y": 6198.616579290413,
       "width": 207.59447102997498,
       "height": 60,
       "angle": 0,
@@ -5068,8 +5068,8 @@
       "index": "b1x",
       "roundness": null,
       "seed": 1298925807,
-      "version": 1429,
-      "versionNonce": 738328003,
+      "version": 1484,
+      "versionNonce": 1604217405,
       "isDeleted": false,
       "boundElements": [
         {
@@ -5077,15 +5077,15 @@
           "id": "_erL3NkeChZxbmMXnWyQ8"
         }
       ],
-      "updated": 1767580076534,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "_erL3NkeChZxbmMXnWyQ8",
       "type": "text",
-      "x": 584.933923548674,
-      "y": 5343.616579290413,
+      "x": 601.6005902153407,
+      "y": 6203.616579290413,
       "width": 175.0598602294922,
       "height": 50,
       "angle": 0,
@@ -5101,11 +5101,11 @@
       "index": "b1y",
       "roundness": null,
       "seed": 2080051983,
-      "version": 1519,
-      "versionNonce": 2133020003,
+      "version": 1574,
+      "versionNonce": 204479133,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580076534,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "node management\n(ssh, ray cluster) ",
@@ -5121,8 +5121,8 @@
     {
       "id": "lQT5-sydwGN_vbadu61qE",
       "type": "rectangle",
-      "x": 906.7776256001704,
-      "y": 5472.766443061252,
+      "x": 923.4442922668371,
+      "y": 6332.766443061252,
       "width": 163.1357446724801,
       "height": 106.45708709635272,
       "angle": 0,
@@ -5138,8 +5138,8 @@
       "index": "b1z",
       "roundness": null,
       "seed": 878958895,
-      "version": 760,
-      "versionNonce": 1824106755,
+      "version": 815,
+      "versionNonce": 804044541,
       "isDeleted": false,
       "boundElements": [
         {
@@ -5147,15 +5147,15 @@
           "id": "yqwCJHk3GkcnyEzFHSU2z"
         }
       ],
-      "updated": 1767580076534,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "yqwCJHk3GkcnyEzFHSU2z",
       "type": "text",
-      "x": 937.6955421868987,
-      "y": 5500.994986609428,
+      "x": 954.3622088535654,
+      "y": 6360.994986609428,
       "width": 101.29991149902344,
       "height": 50,
       "angle": 0,
@@ -5171,11 +5171,11 @@
       "index": "b20",
       "roundness": null,
       "seed": 202300239,
-      "version": 824,
-      "versionNonce": 2051248291,
+      "version": 879,
+      "versionNonce": 290838365,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580076534,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "ray worker\nnode",
@@ -5191,8 +5191,8 @@
     {
       "id": "n74P3TAm78jOVPnYqFccD",
       "type": "rectangle",
-      "x": 906.8145339538381,
-      "y": 5283.5525416458795,
+      "x": 923.4812006205049,
+      "y": 6143.5525416458795,
       "width": 163.1357446724801,
       "height": 85,
       "angle": 0,
@@ -5208,8 +5208,8 @@
       "index": "b21",
       "roundness": null,
       "seed": 1465162095,
-      "version": 775,
-      "versionNonce": 2037351491,
+      "version": 830,
+      "versionNonce": 717022141,
       "isDeleted": false,
       "boundElements": [
         {
@@ -5217,15 +5217,15 @@
           "id": "BTIKScHoPW_m2eS-Ro8GX"
         }
       ],
-      "updated": 1767580076534,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "BTIKScHoPW_m2eS-Ro8GX",
       "type": "text",
-      "x": 937.7324505405664,
-      "y": 5301.0525416458795,
+      "x": 954.3991172072332,
+      "y": 6161.0525416458795,
       "width": 101.29991149902344,
       "height": 50,
       "angle": 0,
@@ -5241,11 +5241,11 @@
       "index": "b22",
       "roundness": null,
       "seed": 1266520975,
-      "version": 841,
-      "versionNonce": 35390435,
+      "version": 896,
+      "versionNonce": 809722909,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580076534,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "ray worker\nnode",
@@ -5261,8 +5261,8 @@
     {
       "id": "L9owxY-Ly_UhJE3U0jLgC",
       "type": "rectangle",
-      "x": 387.36458902885005,
-      "y": 5264.829378311956,
+      "x": 404.0312556955168,
+      "y": 6124.829378311956,
       "width": 147.4799234681481,
       "height": 55.000000000000014,
       "angle": 0,
@@ -5278,8 +5278,8 @@
       "index": "b23",
       "roundness": null,
       "seed": 1623345583,
-      "version": 1226,
-      "versionNonce": 743935875,
+      "version": 1281,
+      "versionNonce": 1440405629,
       "isDeleted": false,
       "boundElements": [
         {
@@ -5287,15 +5287,15 @@
           "id": "8S3xYcC5-56RoTln81Vps"
         }
       ],
-      "updated": 1767580076534,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "8S3xYcC5-56RoTln81Vps",
       "type": "text",
-      "x": 429.8745779235686,
-      "y": 5279.829378311956,
+      "x": 446.54124459023535,
+      "y": 6139.829378311956,
       "width": 62.45994567871094,
       "height": 25,
       "angle": 0,
@@ -5311,11 +5311,11 @@
       "index": "b24",
       "roundness": null,
       "seed": 1951087567,
-      "version": 1278,
-      "versionNonce": 174106403,
+      "version": 1333,
+      "versionNonce": 1430569181,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580076534,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "WebUI",
@@ -5331,8 +5331,8 @@
     {
       "id": "r5vmCFPIVwxZGcbV8EtDP",
       "type": "rectangle",
-      "x": 631.0363707898229,
-      "y": 5539.924768328369,
+      "x": 647.7030374564896,
+      "y": 6399.924768328369,
       "width": 156.66668701171875,
       "height": 85,
       "angle": 0,
@@ -5348,8 +5348,8 @@
       "index": "b25",
       "roundness": null,
       "seed": 707848687,
-      "version": 1010,
-      "versionNonce": 1655032515,
+      "version": 1065,
+      "versionNonce": 873635133,
       "isDeleted": false,
       "boundElements": [
         {
@@ -5361,15 +5361,15 @@
           "type": "arrow"
         }
       ],
-      "updated": 1767580076534,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "BPGRZQXUil0Bmwl2yz1n9",
       "type": "text",
-      "x": 641.9197707532018,
-      "y": 5557.424768328369,
+      "x": 658.5864374198685,
+      "y": 6417.424768328369,
       "width": 134.89988708496094,
       "height": 50,
       "angle": 0,
@@ -5385,11 +5385,11 @@
       "index": "b26",
       "roundness": null,
       "seed": 259773455,
-      "version": 1023,
-      "versionNonce": 700807779,
+      "version": 1078,
+      "versionNonce": 1235611037,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580076534,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "Advanced\nVerlTaskSpec ",
@@ -5405,8 +5405,8 @@
     {
       "id": "DS3tViVPwVs_FojIMqBSU",
       "type": "arrow",
-      "x": 630.5725639929063,
-      "y": 5584.289760842569,
+      "x": 647.2392306595731,
+      "y": 6444.289760842569,
       "width": 80.66214281697523,
       "height": 84.85848269127973,
       "angle": 0,
@@ -5424,11 +5424,11 @@
         "type": 2
       },
       "seed": 2082765359,
-      "version": 1017,
-      "versionNonce": 2023747629,
+      "version": 1128,
+      "versionNonce": 1910335027,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580076859,
+      "updated": 1767684541922,
       "link": null,
       "locked": false,
       "points": [
@@ -5444,7 +5444,7 @@
       "lastCommittedPoint": null,
       "startBinding": {
         "elementId": "r5vmCFPIVwxZGcbV8EtDP",
-        "focus": -0.6785882314731624,
+        "focus": -0.6785882314731602,
         "gap": 1
       },
       "endBinding": null,
@@ -5455,8 +5455,8 @@
     {
       "id": "AUK2tr7zEw-7pPSMh0Ric",
       "type": "rectangle",
-      "x": 202.16054048272503,
-      "y": 5337.947299613951,
+      "x": 218.82720714939177,
+      "y": 6197.947299613951,
       "width": 154.6436510018092,
       "height": 60,
       "angle": 0,
@@ -5472,8 +5472,8 @@
       "index": "b28",
       "roundness": null,
       "seed": 890461263,
-      "version": 1400,
-      "versionNonce": 1564791107,
+      "version": 1455,
+      "versionNonce": 100636349,
       "isDeleted": false,
       "boundElements": [
         {
@@ -5481,15 +5481,15 @@
           "id": "B-9nH3-XOZNOEeCLd2Fu-"
         }
       ],
-      "updated": 1767580076534,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "B-9nH3-XOZNOEeCLd2Fu-",
       "type": "text",
-      "x": 218.36240901341478,
-      "y": 5342.947299613951,
+      "x": 235.02907568008152,
+      "y": 6202.947299613951,
       "width": 122.23991394042969,
       "height": 50,
       "angle": 0,
@@ -5505,11 +5505,11 @@
       "index": "b29",
       "roundness": null,
       "seed": 1310784111,
-      "version": 1468,
-      "versionNonce": 935309539,
+      "version": 1523,
+      "versionNonce": 381138717,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580076534,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "user\nmanagement ",
@@ -5525,8 +5525,8 @@
     {
       "id": "rfGCTpcFlf1T-8NxoJELc",
       "type": "rectangle",
-      "x": 208.2219369016421,
-      "y": 5531.532131266703,
+      "x": 224.88860356830884,
+      "y": 6391.532131266703,
       "width": 154.6436510018092,
       "height": 85,
       "angle": 0,
@@ -5542,8 +5542,8 @@
       "index": "b2A",
       "roundness": null,
       "seed": 1276499087,
-      "version": 1632,
-      "versionNonce": 721263747,
+      "version": 1687,
+      "versionNonce": 810339197,
       "isDeleted": false,
       "boundElements": [
         {
@@ -5551,15 +5551,15 @@
           "id": "12ZSYQo1l8nLuktF_M13y"
         }
       ],
-      "updated": 1767580076534,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "12ZSYQo1l8nLuktF_M13y",
       "type": "text",
-      "x": 228.42380543233185,
-      "y": 5536.532131266703,
+      "x": 245.0904720989986,
+      "y": 6396.532131266703,
       "width": 114.23991394042969,
       "height": 75,
       "angle": 0,
@@ -5575,11 +5575,11 @@
       "index": "b2B",
       "roundness": null,
       "seed": 111831727,
-      "version": 1714,
-      "versionNonce": 1119687715,
+      "version": 1769,
+      "versionNonce": 318788573,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580076534,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "data\nmanagement\nSFTPGo ",
@@ -5595,8 +5595,8 @@
     {
       "id": "kvfUPQgQ5eG_Fxn-tGm59",
       "type": "rectangle",
-      "x": 217.25149426722095,
-      "y": 5786.938428579538,
+      "x": 233.9181609338877,
+      "y": 6646.938428579538,
       "width": 154.6436510018092,
       "height": 60,
       "angle": 0,
@@ -5612,8 +5612,8 @@
       "index": "b2C",
       "roundness": null,
       "seed": 1250376911,
-      "version": 1501,
-      "versionNonce": 965575715,
+      "version": 1556,
+      "versionNonce": 2061750333,
       "isDeleted": false,
       "boundElements": [
         {
@@ -5621,15 +5621,15 @@
           "id": "a5mX2A_-o1LoWTFedzdqg"
         }
       ],
-      "updated": 1767580554066,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "a5mX2A_-o1LoWTFedzdqg",
       "type": "text",
-      "x": 239.60335669439507,
-      "y": 5804.438428579538,
+      "x": 256.2700233610618,
+      "y": 6664.438428579538,
       "width": 109.93992614746094,
       "height": 25,
       "angle": 0,
@@ -5645,11 +5645,11 @@
       "index": "b2D",
       "roundness": null,
       "seed": 994364143,
-      "version": 1580,
-      "versionNonce": 841942979,
+      "version": 1635,
+      "versionNonce": 634962077,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580554066,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "prometheus",
@@ -5665,8 +5665,8 @@
     {
       "id": "_M4VtkTCC_G6rAalQU_8m",
       "type": "rectangle",
-      "x": 414.9436034844226,
-      "y": 5785.0735214392225,
+      "x": 431.61027015108937,
+      "y": 6645.0735214392225,
       "width": 154.6436510018092,
       "height": 60,
       "angle": 0,
@@ -5682,8 +5682,8 @@
       "index": "b2E",
       "roundness": null,
       "seed": 1969984783,
-      "version": 1545,
-      "versionNonce": 253695843,
+      "version": 1600,
+      "versionNonce": 428909821,
       "isDeleted": false,
       "boundElements": [
         {
@@ -5691,15 +5691,15 @@
           "id": "xQD6qQi-05xLjq0nbHiqd"
         }
       ],
-      "updated": 1767580554066,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "xQD6qQi-05xLjq0nbHiqd",
       "type": "text",
-      "x": 455.485468353003,
-      "y": 5802.5735214392225,
+      "x": 472.15213501966974,
+      "y": 6662.5735214392225,
       "width": 73.55992126464844,
       "height": 25,
       "angle": 0,
@@ -5715,11 +5715,11 @@
       "index": "b2F",
       "roundness": null,
       "seed": 1886657327,
-      "version": 1634,
-      "versionNonce": 1588343555,
+      "version": 1689,
+      "versionNonce": 697105757,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580554066,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "grafana",
@@ -5735,8 +5735,8 @@
     {
       "id": "aCuSAi4oJ_KJdRKAHJ5y_",
       "type": "rectangle",
-      "x": 617.2984501087772,
-      "y": 5783.208443551139,
+      "x": 633.9651167754439,
+      "y": 6643.208443551139,
       "width": 154.6436510018092,
       "height": 60,
       "angle": 0,
@@ -5752,8 +5752,8 @@
       "index": "b2G",
       "roundness": null,
       "seed": 549903695,
-      "version": 1593,
-      "versionNonce": 1632950947,
+      "version": 1648,
+      "versionNonce": 1025460669,
       "isDeleted": false,
       "boundElements": [
         {
@@ -5761,15 +5761,15 @@
           "id": "25Rm9RG-UnIb_WWemRDJY"
         }
       ],
-      "updated": 1767580554066,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "25Rm9RG-UnIb_WWemRDJY",
       "type": "text",
-      "x": 675.9902859856581,
-      "y": 5800.708443551139,
+      "x": 692.6569526523249,
+      "y": 6660.708443551139,
       "width": 37.259979248046875,
       "height": 25,
       "angle": 0,
@@ -5785,11 +5785,11 @@
       "index": "b2H",
       "roundness": null,
       "seed": 1505516399,
-      "version": 1702,
-      "versionNonce": 1137151555,
+      "version": 1757,
+      "versionNonce": 1432947229,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580554066,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "ELK",
@@ -5805,8 +5805,8 @@
     {
       "id": "aVk5d_J2lP8Z8cTxcLino",
       "type": "rectangle",
-      "x": 392.3807843726648,
-      "y": 5539.532434700749,
+      "x": 409.0474510393315,
+      "y": 6399.532434700749,
       "width": 156.66668701171875,
       "height": 85,
       "angle": 0,
@@ -5822,8 +5822,8 @@
       "index": "b2I",
       "roundness": null,
       "seed": 1872565647,
-      "version": 1034,
-      "versionNonce": 686514829,
+      "version": 1089,
+      "versionNonce": 1398521469,
       "isDeleted": false,
       "boundElements": [
         {
@@ -5831,15 +5831,15 @@
           "id": "-7mLziiSO10QZS0oA7Zmp"
         }
       ],
-      "updated": 1767580076859,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "-7mLziiSO10QZS0oA7Zmp",
       "type": "text",
-      "x": 436.5041593116296,
-      "y": 5557.032434700749,
+      "x": 453.17082597829636,
+      "y": 6417.032434700749,
       "width": 68.41993713378906,
       "height": 50,
       "angle": 0,
@@ -5855,11 +5855,11 @@
       "index": "b2J",
       "roundness": null,
       "seed": 430017455,
-      "version": 1060,
-      "versionNonce": 907837731,
+      "version": 1115,
+      "versionNonce": 2109269725,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580076534,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "model\nServing",
@@ -5875,8 +5875,8 @@
     {
       "id": "Kwettsi3WP-Qkpn71oRV_",
       "type": "rectangle",
-      "x": 930.4278663822328,
-      "y": 5574.9785648802235,
+      "x": 947.0945330488995,
+      "y": 6434.9785648802235,
       "width": 106.83929336612698,
       "height": 60,
       "angle": 0,
@@ -5892,8 +5892,8 @@
       "index": "b2K",
       "roundness": null,
       "seed": 334642639,
-      "version": 1609,
-      "versionNonce": 668128355,
+      "version": 1664,
+      "versionNonce": 1192362813,
       "isDeleted": false,
       "boundElements": [
         {
@@ -5901,15 +5901,15 @@
           "id": "Ws50jhW1M-jJ3lnoEAvY2"
         }
       ],
-      "updated": 1767580076534,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "Ws50jhW1M-jJ3lnoEAvY2",
       "type": "text",
-      "x": 941.7575396155892,
-      "y": 5592.4785648802235,
+      "x": 958.4242062822559,
+      "y": 6452.4785648802235,
       "width": 84.17994689941406,
       "height": 25,
       "angle": 0,
@@ -5925,11 +5925,11 @@
       "index": "b2L",
       "roundness": null,
       "seed": 27765743,
-      "version": 1700,
-      "versionNonce": 65032195,
+      "version": 1755,
+      "versionNonce": 1405653917,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580076534,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "exporter",
@@ -5945,8 +5945,8 @@
     {
       "id": "uYm1lMA4TITqp8EHogQDU",
       "type": "rectangle",
-      "x": 932.1487224195542,
-      "y": 5363.874041182105,
+      "x": 948.815389086221,
+      "y": 6223.874041182105,
       "width": 106.83929336612698,
       "height": 60,
       "angle": 0,
@@ -5962,8 +5962,8 @@
       "index": "b2M",
       "roundness": null,
       "seed": 248141327,
-      "version": 1623,
-      "versionNonce": 1677547427,
+      "version": 1678,
+      "versionNonce": 1074665469,
       "isDeleted": false,
       "boundElements": [
         {
@@ -5971,15 +5971,15 @@
           "id": "tX9OTModeIVpu_FonBF3E"
         }
       ],
-      "updated": 1767580076534,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "tX9OTModeIVpu_FonBF3E",
       "type": "text",
-      "x": 943.4783956529106,
-      "y": 5381.374041182105,
+      "x": 960.1450623195774,
+      "y": 6241.374041182105,
       "width": 84.17994689941406,
       "height": 25,
       "angle": 0,
@@ -5995,11 +5995,11 @@
       "index": "b2N",
       "roundness": null,
       "seed": 1381153839,
-      "version": 1714,
-      "versionNonce": 911764291,
+      "version": 1769,
+      "versionNonce": 635703389,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580076534,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "exporter",
@@ -6015,8 +6015,8 @@
     {
       "id": "yGh_yLim4aBl8oGqW_oU1",
       "type": "rectangle",
-      "x": 222.20371819421865,
-      "y": 5873.0398650179195,
+      "x": 238.8703848608854,
+      "y": 6733.0398650179195,
       "width": 154.6436510018092,
       "height": 60,
       "angle": 0,
@@ -6032,8 +6032,8 @@
       "index": "b2Q",
       "roundness": null,
       "seed": 170223553,
-      "version": 1511,
-      "versionNonce": 1904143907,
+      "version": 1566,
+      "versionNonce": 1662778557,
       "isDeleted": false,
       "boundElements": [
         {
@@ -6041,15 +6041,15 @@
           "id": "lqdb6MLxScVe5ns9aqeeu"
         }
       ],
-      "updated": 1767580076534,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "lqdb6MLxScVe5ns9aqeeu",
       "type": "text",
-      "x": 250.96559191289668,
-      "y": 5890.5398650179195,
+      "x": 267.6322585795634,
+      "y": 6750.5398650179195,
       "width": 97.11990356445312,
       "height": 25,
       "angle": 0,
@@ -6065,11 +6065,11 @@
       "index": "b2R",
       "roundness": null,
       "seed": 1132775329,
-      "version": 1599,
-      "versionNonce": 2086875587,
+      "version": 1654,
+      "versionNonce": 525819165,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580076534,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "statistics",
@@ -6085,8 +6085,8 @@
     {
       "id": "TtGKkGSFi4MbYg3ZfyNiG",
       "type": "rectangle",
-      "x": 417.28500640069166,
-      "y": 5873.0398650179195,
+      "x": 433.9516730673584,
+      "y": 6733.0398650179195,
       "width": 154.6436510018092,
       "height": 60,
       "angle": 0,
@@ -6102,8 +6102,8 @@
       "index": "b2S",
       "roundness": null,
       "seed": 62346383,
-      "version": 1570,
-      "versionNonce": 356385123,
+      "version": 1625,
+      "versionNonce": 302494077,
       "isDeleted": false,
       "boundElements": [
         {
@@ -6111,15 +6111,15 @@
           "id": "G7gy-YlXMFxKiO7gmm6tl"
         }
       ],
-      "updated": 1767580076534,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "G7gy-YlXMFxKiO7gmm6tl",
       "type": "text",
-      "x": 448.5968526535494,
-      "y": 5890.5398650179195,
+      "x": 465.2635193202161,
+      "y": 6750.5398650179195,
       "width": 92.01995849609375,
       "height": 25,
       "angle": 0,
@@ -6135,11 +6135,11 @@
       "index": "b2T",
       "roundness": null,
       "seed": 1310452399,
-      "version": 1667,
-      "versionNonce": 1432214787,
+      "version": 1722,
+      "versionNonce": 141457885,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580076534,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "sop tools",
@@ -6155,8 +6155,8 @@
     {
       "id": "JldxRrrtgsQXtVb-OKmEY",
       "type": "rectangle",
-      "x": 619.5583295451758,
-      "y": 5873.039865017919,
+      "x": 636.2249962118425,
+      "y": 6733.039865017919,
       "width": 154.6436510018092,
       "height": 60,
       "angle": 0,
@@ -6172,8 +6172,8 @@
       "index": "b2U",
       "roundness": null,
       "seed": 1283509455,
-      "version": 1606,
-      "versionNonce": 675366051,
+      "version": 1661,
+      "versionNonce": 731723325,
       "isDeleted": false,
       "boundElements": [
         {
@@ -6181,15 +6181,15 @@
           "id": "YbDIjM6HTsYLldkdyo8ed"
         }
       ],
-      "updated": 1767580076534,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "YbDIjM6HTsYLldkdyo8ed",
       "type": "text",
-      "x": 669.4101767135609,
-      "y": 5890.539865017919,
+      "x": 686.0768433802276,
+      "y": 6750.539865017919,
       "width": 54.93995666503906,
       "height": 25,
       "angle": 0,
@@ -6205,11 +6205,11 @@
       "index": "b2V",
       "roundness": null,
       "seed": 1737251567,
-      "version": 1709,
-      "versionNonce": 93763651,
+      "version": 1764,
+      "versionNonce": 108357277,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580076534,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "agent",
@@ -6435,8 +6435,8 @@
     {
       "id": "Z88ttfOpQUtGXPUZOTNLh",
       "type": "rectangle",
-      "x": 1085.3820455873556,
-      "y": 4565.045103668226,
+      "x": 1102.0487122540223,
+      "y": 5425.045103668226,
       "width": 78.84661364258261,
       "height": 239.8912269868912,
       "angle": 0,
@@ -6452,8 +6452,8 @@
       "index": "b2l",
       "roundness": null,
       "seed": 995417473,
-      "version": 1395,
-      "versionNonce": 512081763,
+      "version": 1450,
+      "versionNonce": 1269336829,
       "isDeleted": false,
       "boundElements": [
         {
@@ -6461,15 +6461,15 @@
           "id": "JUS2JvDrkx5GXQjxkI78l"
         }
       ],
-      "updated": 1767580421422,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "JUS2JvDrkx5GXQjxkI78l",
       "type": "text",
-      "x": 1097.1953746864792,
-      "y": 4672.4907171616715,
+      "x": 1113.862041353146,
+      "y": 5532.4907171616715,
       "width": 55.21995544433594,
       "height": 25,
       "angle": 0,
@@ -6485,11 +6485,11 @@
       "index": "b2m",
       "roundness": null,
       "seed": 314541409,
-      "version": 1411,
-      "versionNonce": 452844291,
+      "version": 1466,
+      "versionNonce": 1538794333,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580421422,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "GPFS",
@@ -6505,8 +6505,8 @@
     {
       "id": "py2gWjP2fsxBZN-9ttfBh",
       "type": "rectangle",
-      "x": 1080.7158973597411,
-      "y": 5309.476392482113,
+      "x": 1097.3825640264079,
+      "y": 6169.476392482113,
       "width": 78.84661364258261,
       "height": 239.8912269868912,
       "angle": 0,
@@ -6522,8 +6522,8 @@
       "index": "b2p",
       "roundness": null,
       "seed": 105076673,
-      "version": 1344,
-      "versionNonce": 854339171,
+      "version": 1399,
+      "versionNonce": 183873469,
       "isDeleted": false,
       "boundElements": [
         {
@@ -6531,15 +6531,15 @@
           "id": "5tfhe4hwG92sJ3lHY77pG"
         }
       ],
-      "updated": 1767580076534,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "5tfhe4hwG92sJ3lHY77pG",
       "type": "text",
-      "x": 1092.5292264588647,
-      "y": 5416.922005975558,
+      "x": 1109.1958931255315,
+      "y": 6276.922005975558,
       "width": 55.21995544433594,
       "height": 25,
       "angle": 0,
@@ -6555,11 +6555,11 @@
       "index": "b2q",
       "roundness": null,
       "seed": 1717876641,
-      "version": 1361,
-      "versionNonce": 1455319555,
+      "version": 1416,
+      "versionNonce": 662876189,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580076534,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "GPFS",
@@ -6640,7 +6640,7 @@
       "version": 123,
       "versionNonce": 2087641069,
       "isDeleted": false,
-      "boundElements": null,
+      "boundElements": [],
       "updated": 1766652208471,
       "link": null,
       "locked": false,
@@ -6677,7 +6677,7 @@
       "version": 147,
       "versionNonce": 1921843565,
       "isDeleted": false,
-      "boundElements": null,
+      "boundElements": [],
       "updated": 1766652209287,
       "link": null,
       "locked": false,
@@ -6764,7 +6764,7 @@
       "version": 70,
       "versionNonce": 1883440109,
       "isDeleted": false,
-      "boundElements": null,
+      "boundElements": [],
       "updated": 1766653783354,
       "link": null,
       "locked": false,
@@ -6839,7 +6839,7 @@
       "version": 84,
       "versionNonce": 1858333453,
       "isDeleted": false,
-      "boundElements": null,
+      "boundElements": [],
       "updated": 1766652208471,
       "link": null,
       "locked": false,
@@ -6906,7 +6906,7 @@
       "version": 31,
       "versionNonce": 712375885,
       "isDeleted": false,
-      "boundElements": null,
+      "boundElements": [],
       "updated": 1766653783355,
       "link": null,
       "locked": false,
@@ -6958,7 +6958,7 @@
       "version": 44,
       "versionNonce": 306885709,
       "isDeleted": false,
-      "boundElements": null,
+      "boundElements": [],
       "updated": 1766652192424,
       "link": null,
       "locked": false,
@@ -8088,1107 +8088,6 @@
       "autoResize": true,
       "lineHeight": 1.25
     },
-    {
-      "id": "qg8jFPFtp50OAJT2ab_YO",
-      "type": "rectangle",
-      "x": 187.2240309153068,
-      "y": 3838.839430576088,
-      "width": 647.3203086953981,
-      "height": 541.9127073088301,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b3U",
-      "roundness": {
-        "type": 3
-      },
-      "seed": 253245411,
-      "version": 1077,
-      "versionNonce": 1966178979,
-      "isDeleted": false,
-      "boundElements": [],
-      "updated": 1767580421422,
-      "link": null,
-      "locked": false
-    },
-    {
-      "id": "ej_5uPkV3roLWgrNmRVQJ",
-      "type": "rectangle",
-      "x": 392.8775610462518,
-      "y": 4019.5060870702287,
-      "width": 156.66668701171875,
-      "height": 85,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b3V",
-      "roundness": null,
-      "seed": 866993027,
-      "version": 826,
-      "versionNonce": 97465923,
-      "isDeleted": false,
-      "boundElements": [
-        {
-          "type": "text",
-          "id": "5J21ZHZTHpGu78H0oBGne"
-        },
-        {
-          "id": "dCW4muSHBOjk2yt4VP-8M",
-          "type": "arrow"
-        },
-        {
-          "id": "mfel3yZDZK2O4BoiB9bfp",
-          "type": "arrow"
-        }
-      ],
-      "updated": 1767580421422,
-      "link": null,
-      "locked": false
-    },
-    {
-      "id": "5J21ZHZTHpGu78H0oBGne",
-      "type": "text",
-      "x": 413.82092805064633,
-      "y": 4037.0060870702287,
-      "width": 114.77995300292969,
-      "height": 50,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b3W",
-      "roundness": null,
-      "seed": 81779491,
-      "version": 817,
-      "versionNonce": 196441571,
-      "isDeleted": false,
-      "boundElements": [],
-      "updated": 1767580421422,
-      "link": null,
-      "locked": false,
-      "text": "ray job tool\n(ray client)",
-      "fontSize": 20,
-      "fontFamily": 5,
-      "textAlign": "center",
-      "verticalAlign": "middle",
-      "containerId": "ej_5uPkV3roLWgrNmRVQJ",
-      "originalText": "ray job tool\n(ray client)",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
-    {
-      "id": "UJvTvQ4khDdrdYsxDaKt0",
-      "type": "rectangle",
-      "x": 637.8775610462517,
-      "y": 4019.0060870702287,
-      "width": 156.66668701171875,
-      "height": 85,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b3X",
-      "roundness": null,
-      "seed": 790527683,
-      "version": 963,
-      "versionNonce": 1352432835,
-      "isDeleted": false,
-      "boundElements": [
-        {
-          "type": "text",
-          "id": "0hvq7uN4KgQ1e0xT7DKVG"
-        },
-        {
-          "id": "dCW4muSHBOjk2yt4VP-8M",
-          "type": "arrow"
-        }
-      ],
-      "updated": 1767580421422,
-      "link": null,
-      "locked": false
-    },
-    {
-      "id": "0hvq7uN4KgQ1e0xT7DKVG",
-      "type": "text",
-      "x": 648.7609610096306,
-      "y": 4036.5060870702287,
-      "width": 134.89988708496094,
-      "height": 50,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b3Y",
-      "roundness": null,
-      "seed": 1292349027,
-      "version": 967,
-      "versionNonce": 1242950755,
-      "isDeleted": false,
-      "boundElements": [],
-      "updated": 1767580421422,
-      "link": null,
-      "locked": false,
-      "text": "VerlTaskSpec \nyaml",
-      "fontSize": 20,
-      "fontFamily": 5,
-      "textAlign": "center",
-      "verticalAlign": "middle",
-      "containerId": "UJvTvQ4khDdrdYsxDaKt0",
-      "originalText": "VerlTaskSpec \nyaml",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
-    {
-      "id": "dCW4muSHBOjk2yt4VP-8M",
-      "type": "arrow",
-      "x": 639.2109045521111,
-      "y": 4057.5060870702287,
-      "width": 90.33331298828125,
-      "height": 4,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b3Z",
-      "roundness": {
-        "type": 2
-      },
-      "seed": 560589315,
-      "version": 2197,
-      "versionNonce": 803445773,
-      "isDeleted": false,
-      "boundElements": [],
-      "updated": 1767580422054,
-      "link": null,
-      "locked": false,
-      "points": [
-        [
-          0,
-          0
-        ],
-        [
-          -90.33331298828125,
-          4
-        ]
-      ],
-      "lastCommittedPoint": null,
-      "startBinding": {
-        "elementId": "UJvTvQ4khDdrdYsxDaKt0",
-        "focus": 0.16118805172016018,
-        "gap": 1.333343505859375
-      },
-      "endBinding": {
-        "elementId": "ej_5uPkV3roLWgrNmRVQJ",
-        "focus": 0.06393742185082936,
-        "gap": 1
-      },
-      "startArrowhead": null,
-      "endArrowhead": "arrow",
-      "elbowed": false
-    },
-    {
-      "id": "oSLr5BRtyqrLyySPfeO9j",
-      "type": "text",
-      "x": 225.14633005688438,
-      "y": 3790.8170038464273,
-      "width": 476.19970703125,
-      "height": 25,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b3a",
-      "roundness": null,
-      "seed": 289649059,
-      "version": 1167,
-      "versionNonce": 626310957,
-      "isDeleted": false,
-      "boundElements": [],
-      "updated": 1767580447801,
-      "link": null,
-      "locked": false,
-      "text": "v3.8 IB & Roce support for multi node training  ",
-      "fontSize": 20,
-      "fontFamily": 5,
-      "textAlign": "left",
-      "verticalAlign": "top",
-      "containerId": null,
-      "originalText": "v3.8 IB & Roce support for multi node training  ",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
-    {
-      "id": "ia_9sG-dZPebY0HaRl5mG",
-      "type": "rectangle",
-      "x": 578.7015091194257,
-      "y": 3861.672682529213,
-      "width": 210.1763265850289,
-      "height": 55.000000000000014,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b3b",
-      "roundness": null,
-      "seed": 1573094723,
-      "version": 1115,
-      "versionNonce": 1531388643,
-      "isDeleted": false,
-      "boundElements": [
-        {
-          "type": "text",
-          "id": "LsfI3OipT-Y5Jx7jpcccf"
-        }
-      ],
-      "updated": 1767580421422,
-      "link": null,
-      "locked": false
-    },
-    {
-      "id": "LsfI3OipT-Y5Jx7jpcccf",
-      "type": "text",
-      "x": 630.9397120847918,
-      "y": 3876.672682529213,
-      "width": 105.69992065429688,
-      "height": 25,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b3c",
-      "roundness": null,
-      "seed": 508028131,
-      "version": 1155,
-      "versionNonce": 281464451,
-      "isDeleted": false,
-      "boundElements": [],
-      "updated": 1767580421422,
-      "link": null,
-      "locked": false,
-      "text": "API server",
-      "fontSize": 20,
-      "fontFamily": 5,
-      "textAlign": "center",
-      "verticalAlign": "middle",
-      "containerId": "ia_9sG-dZPebY0HaRl5mG",
-      "originalText": "API server",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
-    {
-      "id": "xrABjxBFNPVtYSJ6ZPiSh",
-      "type": "rectangle",
-      "x": 392.7109045521112,
-      "y": 3932.672682529213,
-      "width": 154.6436510018092,
-      "height": 60,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b3d",
-      "roundness": null,
-      "seed": 2053066883,
-      "version": 1345,
-      "versionNonce": 1016071715,
-      "isDeleted": false,
-      "boundElements": [
-        {
-          "type": "text",
-          "id": "-Kr5CotillYBlIuQsHPGW"
-        }
-      ],
-      "updated": 1767580421422,
-      "link": null,
-      "locked": false
-    },
-    {
-      "id": "-Kr5CotillYBlIuQsHPGW",
-      "type": "text",
-      "x": 408.9127730828009,
-      "y": 3937.672682529213,
-      "width": 122.23991394042969,
-      "height": 50,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b3e",
-      "roundness": null,
-      "seed": 600328227,
-      "version": 1404,
-      "versionNonce": 1972455875,
-      "isDeleted": false,
-      "boundElements": [],
-      "updated": 1767580421422,
-      "link": null,
-      "locked": false,
-      "text": "task\nmanagement ",
-      "fontSize": 20,
-      "fontFamily": 5,
-      "textAlign": "center",
-      "verticalAlign": "middle",
-      "containerId": "xrABjxBFNPVtYSJ6ZPiSh",
-      "originalText": "task management ",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
-    {
-      "id": "Shkqjthowq7IUHYuPuzXr",
-      "type": "rectangle",
-      "x": 576.8184431377808,
-      "y": 3933.2469908419275,
-      "width": 207.59447102997498,
-      "height": 60,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b3f",
-      "roundness": null,
-      "seed": 275860419,
-      "version": 1436,
-      "versionNonce": 1894324579,
-      "isDeleted": false,
-      "boundElements": [
-        {
-          "type": "text",
-          "id": "NUOv1Sok4Fz4O-Jom4drR"
-        }
-      ],
-      "updated": 1767580421422,
-      "link": null,
-      "locked": false
-    },
-    {
-      "id": "NUOv1Sok4Fz4O-Jom4drR",
-      "type": "text",
-      "x": 593.0857485380222,
-      "y": 3938.2469908419275,
-      "width": 175.0598602294922,
-      "height": 50,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b3g",
-      "roundness": null,
-      "seed": 1954077539,
-      "version": 1526,
-      "versionNonce": 1004926211,
-      "isDeleted": false,
-      "boundElements": [],
-      "updated": 1767580421422,
-      "link": null,
-      "locked": false,
-      "text": "node management\n(ssh, ray cluster) ",
-      "fontSize": 20,
-      "fontFamily": 5,
-      "textAlign": "center",
-      "verticalAlign": "middle",
-      "containerId": "Shkqjthowq7IUHYuPuzXr",
-      "originalText": "node management\n(ssh, ray cluster) ",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
-    {
-      "id": "rdU6p3LbZpLNIPDuhEU6z",
-      "type": "rectangle",
-      "x": 913.0172482736575,
-      "y": 4014.620714934976,
-      "width": 163.1357446724801,
-      "height": 106.45708709635272,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b3h",
-      "roundness": null,
-      "seed": 2083354371,
-      "version": 733,
-      "versionNonce": 906386595,
-      "isDeleted": false,
-      "boundElements": [
-        {
-          "type": "text",
-          "id": "fyUCxFxmJICAq48idEvPj"
-        }
-      ],
-      "updated": 1767580421422,
-      "link": null,
-      "locked": false
-    },
-    {
-      "id": "fyUCxFxmJICAq48idEvPj",
-      "type": "text",
-      "x": 943.9351648603858,
-      "y": 4042.8492584831524,
-      "width": 101.29991149902344,
-      "height": 50,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b3i",
-      "roundness": null,
-      "seed": 1175297699,
-      "version": 795,
-      "versionNonce": 1257820227,
-      "isDeleted": false,
-      "boundElements": [],
-      "updated": 1767580421422,
-      "link": null,
-      "locked": false,
-      "text": "ray worker\nnode",
-      "fontSize": 20,
-      "fontFamily": 5,
-      "textAlign": "center",
-      "verticalAlign": "middle",
-      "containerId": "rdU6p3LbZpLNIPDuhEU6z",
-      "originalText": "ray worker node",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
-    {
-      "id": "cl1dzmeCwwz4SXuZrad2a",
-      "type": "rectangle",
-      "x": 914.9663589431866,
-      "y": 3878.182953197394,
-      "width": 163.1357446724801,
-      "height": 85,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b3j",
-      "roundness": null,
-      "seed": 482857539,
-      "version": 782,
-      "versionNonce": 1822150627,
-      "isDeleted": false,
-      "boundElements": [
-        {
-          "type": "text",
-          "id": "fPAvr_57exrKFQWCnW2mb"
-        }
-      ],
-      "updated": 1767580421422,
-      "link": null,
-      "locked": false
-    },
-    {
-      "id": "fPAvr_57exrKFQWCnW2mb",
-      "type": "text",
-      "x": 945.8842755299149,
-      "y": 3895.682953197394,
-      "width": 101.29991149902344,
-      "height": 50,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b3k",
-      "roundness": null,
-      "seed": 1899663843,
-      "version": 848,
-      "versionNonce": 1291705219,
-      "isDeleted": false,
-      "boundElements": [],
-      "updated": 1767580421422,
-      "link": null,
-      "locked": false,
-      "text": "ray worker\nnode",
-      "fontSize": 20,
-      "fontFamily": 5,
-      "textAlign": "center",
-      "verticalAlign": "middle",
-      "containerId": "cl1dzmeCwwz4SXuZrad2a",
-      "originalText": "ray worker node",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
-    {
-      "id": "ReF6vnI-dB3lXIMd4AvGs",
-      "type": "rectangle",
-      "x": 395.5164140181985,
-      "y": 3859.4597898634706,
-      "width": 147.4799234681481,
-      "height": 55.000000000000014,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b3l",
-      "roundness": null,
-      "seed": 343170435,
-      "version": 1233,
-      "versionNonce": 1482519331,
-      "isDeleted": false,
-      "boundElements": [
-        {
-          "type": "text",
-          "id": "5tqRv0HXQ949u10Kw2eiU"
-        }
-      ],
-      "updated": 1767580421422,
-      "link": null,
-      "locked": false
-    },
-    {
-      "id": "5tqRv0HXQ949u10Kw2eiU",
-      "type": "text",
-      "x": 438.02640291291704,
-      "y": 3874.4597898634706,
-      "width": 62.45994567871094,
-      "height": 25,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b3m",
-      "roundness": null,
-      "seed": 833860899,
-      "version": 1282,
-      "versionNonce": 1864963779,
-      "isDeleted": false,
-      "boundElements": [],
-      "updated": 1767580421422,
-      "link": null,
-      "locked": false,
-      "text": "WebUI",
-      "fontSize": 20,
-      "fontFamily": 5,
-      "textAlign": "center",
-      "verticalAlign": "middle",
-      "containerId": "ReF6vnI-dB3lXIMd4AvGs",
-      "originalText": "WebUI",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
-    {
-      "id": "QFS8OSVEFsg_hE5ngbZ3q",
-      "type": "rectangle",
-      "x": 639.1881957791711,
-      "y": 4134.555179879884,
-      "width": 156.66668701171875,
-      "height": 85,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b3n",
-      "roundness": null,
-      "seed": 140974275,
-      "version": 1015,
-      "versionNonce": 1610565219,
-      "isDeleted": false,
-      "boundElements": [
-        {
-          "type": "text",
-          "id": "vd21IfNtOA5knKWzVf_sk"
-        },
-        {
-          "id": "mfel3yZDZK2O4BoiB9bfp",
-          "type": "arrow"
-        }
-      ],
-      "updated": 1767580421422,
-      "link": null,
-      "locked": false
-    },
-    {
-      "id": "vd21IfNtOA5knKWzVf_sk",
-      "type": "text",
-      "x": 650.07159574255,
-      "y": 4152.055179879884,
-      "width": 134.89988708496094,
-      "height": 50,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b3o",
-      "roundness": null,
-      "seed": 1714227299,
-      "version": 1067,
-      "versionNonce": 565084675,
-      "isDeleted": false,
-      "boundElements": [],
-      "updated": 1767580421422,
-      "link": null,
-      "locked": false,
-      "text": "Advanced\nVerlTaskSpec ",
-      "fontSize": 20,
-      "fontFamily": 5,
-      "textAlign": "center",
-      "verticalAlign": "middle",
-      "containerId": "QFS8OSVEFsg_hE5ngbZ3q",
-      "originalText": "Advanced VerlTaskSpec ",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
-    {
-      "id": "mfel3yZDZK2O4BoiB9bfp",
-      "type": "arrow",
-      "x": 638.7243889822546,
-      "y": 4178.920172394084,
-      "width": 80.66214281697523,
-      "height": 84.85848269127973,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "#ffc9c9",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b3p",
-      "roundness": {
-        "type": 2
-      },
-      "seed": 1624507395,
-      "version": 1044,
-      "versionNonce": 1833680077,
-      "isDeleted": false,
-      "boundElements": [],
-      "updated": 1767580422054,
-      "link": null,
-      "locked": false,
-      "points": [
-        [
-          0,
-          0
-        ],
-        [
-          -80.66214281697523,
-          -84.85848269127973
-        ]
-      ],
-      "lastCommittedPoint": null,
-      "startBinding": {
-        "elementId": "QFS8OSVEFsg_hE5ngbZ3q",
-        "focus": -0.6785882314731545,
-        "gap": 1
-      },
-      "endBinding": {
-        "elementId": "ej_5uPkV3roLWgrNmRVQJ",
-        "focus": -0.47485998984048416,
-        "gap": 8.517998107308813
-      },
-      "startArrowhead": null,
-      "endArrowhead": "arrow",
-      "elbowed": false
-    },
-    {
-      "id": "aWTI7cnR7QatcqJUTZEkH",
-      "type": "rectangle",
-      "x": 210.31236547207345,
-      "y": 3932.5777111654647,
-      "width": 154.6436510018092,
-      "height": 60,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b3q",
-      "roundness": null,
-      "seed": 1348111267,
-      "version": 1407,
-      "versionNonce": 213089507,
-      "isDeleted": false,
-      "boundElements": [
-        {
-          "type": "text",
-          "id": "NDAe6kDHeZy_G-F24BbSA"
-        }
-      ],
-      "updated": 1767580421422,
-      "link": null,
-      "locked": false
-    },
-    {
-      "id": "NDAe6kDHeZy_G-F24BbSA",
-      "type": "text",
-      "x": 226.5142340027632,
-      "y": 3937.5777111654647,
-      "width": 122.23991394042969,
-      "height": 50,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b3r",
-      "roundness": null,
-      "seed": 149888835,
-      "version": 1475,
-      "versionNonce": 1201137795,
-      "isDeleted": false,
-      "boundElements": [],
-      "updated": 1767580421422,
-      "link": null,
-      "locked": false,
-      "text": "user\nmanagement ",
-      "fontSize": 20,
-      "fontFamily": 5,
-      "textAlign": "center",
-      "verticalAlign": "middle",
-      "containerId": "aWTI7cnR7QatcqJUTZEkH",
-      "originalText": "user management ",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
-    {
-      "id": "6DIoCPbBjmqXhdWQiF-Ds",
-      "type": "rectangle",
-      "x": 216.37376189099052,
-      "y": 4126.162542818218,
-      "width": 154.6436510018092,
-      "height": 85,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b3s",
-      "roundness": null,
-      "seed": 1425030883,
-      "version": 1639,
-      "versionNonce": 1848500259,
-      "isDeleted": false,
-      "boundElements": [
-        {
-          "type": "text",
-          "id": "ZMAp89RSyrH2uBqKYd6xX"
-        }
-      ],
-      "updated": 1767580421422,
-      "link": null,
-      "locked": false
-    },
-    {
-      "id": "ZMAp89RSyrH2uBqKYd6xX",
-      "type": "text",
-      "x": 236.57563042168027,
-      "y": 4131.162542818218,
-      "width": 114.23991394042969,
-      "height": 75,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b3t",
-      "roundness": null,
-      "seed": 787245699,
-      "version": 1721,
-      "versionNonce": 711189443,
-      "isDeleted": false,
-      "boundElements": [],
-      "updated": 1767580421422,
-      "link": null,
-      "locked": false,
-      "text": "data\nmanagement\nSFTPGo ",
-      "fontSize": 20,
-      "fontFamily": 5,
-      "textAlign": "center",
-      "verticalAlign": "middle",
-      "containerId": "6DIoCPbBjmqXhdWQiF-Ds",
-      "originalText": "data management\nSFTPGo ",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
-    {
-      "id": "i9kRvyS1BKdIOzkDPwoS0",
-      "type": "rectangle",
-      "x": 1088.0908401287593,
-      "y": 3900.9122913284605,
-      "width": 78.84661364258261,
-      "height": 188.34936741723558,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b3u",
-      "roundness": null,
-      "seed": 1000481315,
-      "version": 1510,
-      "versionNonce": 1683518307,
-      "isDeleted": false,
-      "boundElements": [
-        {
-          "type": "text",
-          "id": "j1I0PKrtvzLMy6fsxx_6a"
-        }
-      ],
-      "updated": 1767580421422,
-      "link": null,
-      "locked": false
-    },
-    {
-      "id": "j1I0PKrtvzLMy6fsxx_6a",
-      "type": "text",
-      "x": 1099.9041692278827,
-      "y": 3982.5869750370784,
-      "width": 55.21995544433594,
-      "height": 25,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b3v",
-      "roundness": null,
-      "seed": 67102147,
-      "version": 1528,
-      "versionNonce": 2086276867,
-      "isDeleted": false,
-      "boundElements": [],
-      "updated": 1767580421422,
-      "link": null,
-      "locked": false,
-      "text": "GPFS",
-      "fontSize": 20,
-      "fontFamily": 5,
-      "textAlign": "center",
-      "verticalAlign": "middle",
-      "containerId": "i9kRvyS1BKdIOzkDPwoS0",
-      "originalText": "GPFS",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
-    {
-      "id": "mSWRJ8RKPV7XfoaSxNjz8",
-      "type": "rectangle",
-      "x": 221.28360050567835,
-      "y": 4249.526425506172,
-      "width": 239.9955982022902,
-      "height": 60,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b3w",
-      "roundness": null,
-      "seed": 1864643939,
-      "version": 2128,
-      "versionNonce": 688984067,
-      "isDeleted": false,
-      "boundElements": [
-        {
-          "type": "text",
-          "id": "39BgAR4RVEZm9MjMi7RPJ"
-        }
-      ],
-      "updated": 1767580500208,
-      "link": null,
-      "locked": false
-    },
-    {
-      "id": "39BgAR4RVEZm9MjMi7RPJ",
-      "type": "text",
-      "x": 275.4314545384641,
-      "y": 4267.026425506172,
-      "width": 131.69989013671875,
-      "height": 25,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b3x",
-      "roundness": null,
-      "seed": 2086506755,
-      "version": 2253,
-      "versionNonce": 622204835,
-      "isDeleted": false,
-      "boundElements": [],
-      "updated": 1767580500209,
-      "link": null,
-      "locked": false,
-      "text": "weight & bias",
-      "fontSize": 20,
-      "fontFamily": 5,
-      "textAlign": "center",
-      "verticalAlign": "middle",
-      "containerId": "mSWRJ8RKPV7XfoaSxNjz8",
-      "originalText": "weight & bias",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
     {
       "id": "_dawvRGHc8iIWvtZgQ8my",
       "type": "rectangle",
@@ -9430,7 +8329,7 @@
       "type": "text",
       "x": 193.03751973340792,
       "y": 3146.4696998450304,
-      "width": 184.7998809814453,
+      "width": 707.179443359375,
       "height": 25,
       "angle": 0,
       "strokeColor": "#1e1e1e",
@@ -9445,20 +8344,20 @@
       "index": "b46",
       "roundness": null,
       "seed": 136585027,
-      "version": 1220,
-      "versionNonce": 22919299,
+      "version": 1331,
+      "versionNonce": 2053451517,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580456184,
+      "updated": 1767684665253,
       "link": null,
       "locked": false,
-      "text": "v3.7 Model serving ",
+      "text": "v3.7 Shift from SGLang to VLLM backend, prepare for ray serve function",
       "fontSize": 20,
       "fontFamily": 5,
       "textAlign": "left",
       "verticalAlign": "top",
       "containerId": null,
-      "originalText": "v3.7 Model serving ",
+      "originalText": "v3.7 Shift from SGLang to VLLM backend, prepare for ray serve function",
       "autoResize": true,
       "lineHeight": 1.25
     },
@@ -9675,13 +8574,13 @@
     {
       "id": "ztzSxxw4gFHtLebqPFwq_",
       "type": "rectangle",
-      "x": 912.9010236820349,
-      "y": 3386.821291398311,
+      "x": 911.0491492245695,
+      "y": 3386.0805280519567,
       "width": 163.1357446724801,
       "height": 106.45708709635272,
       "angle": 0,
       "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
+      "backgroundColor": "#ffc9c9",
       "fillStyle": "solid",
       "strokeWidth": 2,
       "strokeStyle": "solid",
@@ -9692,8 +8591,8 @@
       "index": "b4D",
       "roundness": null,
       "seed": 1509914787,
-      "version": 795,
-      "versionNonce": 1575047299,
+      "version": 797,
+      "versionNonce": 274293011,
       "isDeleted": false,
       "boundElements": [
         {
@@ -9701,17 +8600,17 @@
           "id": "idEPabW5S1LZD69d85ve1"
         }
       ],
-      "updated": 1767580434170,
+      "updated": 1767684619945,
       "link": null,
       "locked": false
     },
     {
       "id": "idEPabW5S1LZD69d85ve1",
       "type": "text",
-      "x": 943.8189402687632,
-      "y": 3415.049834946488,
-      "width": 101.29991149902344,
-      "height": 50,
+      "x": 939.5570392610049,
+      "y": 3401.809071600133,
+      "width": 106.11996459960938,
+      "height": 75,
       "angle": 0,
       "strokeColor": "#1e1e1e",
       "backgroundColor": "transparent",
@@ -9725,20 +8624,20 @@
       "index": "b4E",
       "roundness": null,
       "seed": 1231973443,
-      "version": 858,
-      "versionNonce": 1191856163,
+      "version": 863,
+      "versionNonce": 576524349,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580434170,
+      "updated": 1767684622696,
       "link": null,
       "locked": false,
-      "text": "ray worker\nnode",
+      "text": "ray worker\nnode （vllm\nbackend）",
       "fontSize": 20,
       "fontFamily": 5,
       "textAlign": "center",
       "verticalAlign": "middle",
       "containerId": "ztzSxxw4gFHtLebqPFwq_",
-      "originalText": "ray worker node",
+      "originalText": "ray worker node （vllm backend）",
       "autoResize": true,
       "lineHeight": 1.25
     },
@@ -9751,7 +8650,7 @@
       "height": 85,
       "angle": 0,
       "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
+      "backgroundColor": "#ffc9c9",
       "fillStyle": "solid",
       "strokeWidth": 2,
       "strokeStyle": "solid",
@@ -9762,8 +8661,8 @@
       "index": "b4F",
       "roundness": null,
       "seed": 1107160035,
-      "version": 844,
-      "versionNonce": 2071153603,
+      "version": 847,
+      "versionNonce": 1986361811,
       "isDeleted": false,
       "boundElements": [
         {
@@ -9771,7 +8670,7 @@
           "id": "stn0JrExRn0tfK-9wqEh2"
         }
       ],
-      "updated": 1767580434170,
+      "updated": 1767684615249,
       "link": null,
       "locked": false
     },
@@ -9779,9 +8678,9 @@
       "id": "stn0JrExRn0tfK-9wqEh2",
       "type": "text",
       "x": 945.7680509382923,
-      "y": 3267.883529660728,
+      "y": 3255.383529660728,
       "width": 101.29991149902344,
-      "height": 50,
+      "height": 75,
       "angle": 0,
       "strokeColor": "#1e1e1e",
       "backgroundColor": "transparent",
@@ -9795,20 +8694,20 @@
       "index": "b4G",
       "roundness": null,
       "seed": 1740404611,
-      "version": 910,
-      "versionNonce": 792386403,
+      "version": 943,
+      "versionNonce": 1538062387,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580434170,
+      "updated": 1767684615248,
       "link": null,
       "locked": false,
-      "text": "ray worker\nnode",
+      "text": "ray worker\nnode（vllm\nbackend）",
       "fontSize": 20,
       "fontFamily": 5,
       "textAlign": "center",
       "verticalAlign": "middle",
       "containerId": "UhDaYiDTmw9wnsmkDqwgD",
-      "originalText": "ray worker node",
+      "originalText": "ray worker node（vllm backend）",
       "autoResize": true,
       "lineHeight": 1.25
     },
@@ -10293,8 +9192,8 @@
     {
       "id": "GdoROXtlGBxCQBezTYH1E",
       "type": "rectangle",
-      "x": 393.437456483182,
-      "y": 4786.201212157448,
+      "x": 410.10412314984876,
+      "y": 5646.201212157448,
       "width": 156.66668701171875,
       "height": 85,
       "angle": 0,
@@ -10310,8 +9209,8 @@
       "index": "b4U",
       "roundness": null,
       "seed": 2087868493,
-      "version": 1083,
-      "versionNonce": 1630780301,
+      "version": 1138,
+      "versionNonce": 1062249597,
       "isDeleted": false,
       "boundElements": [
         {
@@ -10319,15 +9218,15 @@
           "id": "SBSYmCOAYENhuZjDKG2bQ"
         }
       ],
-      "updated": 1767580466531,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "SBSYmCOAYENhuZjDKG2bQ",
       "type": "text",
-      "x": 437.56083142214686,
-      "y": 4803.701212157448,
+      "x": 454.2274980888136,
+      "y": 5663.701212157448,
       "width": 68.41993713378906,
       "height": 50,
       "angle": 0,
@@ -10343,11 +9242,11 @@
       "index": "b4V",
       "roundness": null,
       "seed": 846972589,
-      "version": 1109,
-      "versionNonce": 397328877,
+      "version": 1164,
+      "versionNonce": 639700189,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580466531,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "model\nServing",
@@ -10360,221 +9259,11 @@
       "autoResize": true,
       "lineHeight": 1.25
     },
-    {
-      "id": "NUlw66Q9OIa8TIBSh8shW",
-      "type": "rectangle",
-      "x": 397.85025827385925,
-      "y": 4127.595569661166,
-      "width": 156.66668701171875,
-      "height": 85,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b4W",
-      "roundness": null,
-      "seed": 1603019427,
-      "version": 1113,
-      "versionNonce": 319397283,
-      "isDeleted": false,
-      "boundElements": [
-        {
-          "type": "text",
-          "id": "iI-RJL93xebiF9s9i5MaW"
-        }
-      ],
-      "updated": 1767580471663,
-      "link": null,
-      "locked": false
-    },
-    {
-      "id": "iI-RJL93xebiF9s9i5MaW",
-      "type": "text",
-      "x": 441.9736332128241,
-      "y": 4145.095569661166,
-      "width": 68.41993713378906,
-      "height": 50,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b4X",
-      "roundness": null,
-      "seed": 198563395,
-      "version": 1139,
-      "versionNonce": 132903235,
-      "isDeleted": false,
-      "boundElements": [],
-      "updated": 1767580471663,
-      "link": null,
-      "locked": false,
-      "text": "model\nServing",
-      "fontSize": 20,
-      "fontFamily": 5,
-      "textAlign": "center",
-      "verticalAlign": "middle",
-      "containerId": "NUlw66Q9OIa8TIBSh8shW",
-      "originalText": "model\nServing",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
-    {
-      "id": "yPC5J4VL6MFR8KYd3TIqn",
-      "type": "rectangle",
-      "x": 395.643907878644,
-      "y": 3497.672886303671,
-      "width": 156.66668701171875,
-      "height": 85,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "#ffc9c9",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b4Y",
-      "roundness": null,
-      "seed": 1077772589,
-      "version": 1123,
-      "versionNonce": 1715199907,
-      "isDeleted": false,
-      "boundElements": [
-        {
-          "type": "text",
-          "id": "aX2wRhDW7R4kZ49ZHsB7X"
-        }
-      ],
-      "updated": 1767580479760,
-      "link": null,
-      "locked": false
-    },
-    {
-      "id": "aX2wRhDW7R4kZ49ZHsB7X",
-      "type": "text",
-      "x": 439.7672828176088,
-      "y": 3515.172886303671,
-      "width": 68.41993713378906,
-      "height": 50,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b4Z",
-      "roundness": null,
-      "seed": 682546061,
-      "version": 1148,
-      "versionNonce": 539578189,
-      "isDeleted": false,
-      "boundElements": [],
-      "updated": 1767580476679,
-      "link": null,
-      "locked": false,
-      "text": "model\nServing",
-      "fontSize": 20,
-      "fontFamily": 5,
-      "textAlign": "center",
-      "verticalAlign": "middle",
-      "containerId": "yPC5J4VL6MFR8KYd3TIqn",
-      "originalText": "model\nServing",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
-    {
-      "id": "sfgomunt3ThMckJ6XKU_q",
-      "type": "rectangle",
-      "x": 488.5689978967893,
-      "y": 4249.311580728391,
-      "width": 299.56796787532073,
-      "height": 60,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "#ffc9c9",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b4a",
-      "roundness": null,
-      "seed": 512539075,
-      "version": 2215,
-      "versionNonce": 190733965,
-      "isDeleted": false,
-      "boundElements": [
-        {
-          "type": "text",
-          "id": "WeNR6EN-asrq8s5DPDrqu"
-        }
-      ],
-      "updated": 1767580516436,
-      "link": null,
-      "locked": false
-    },
-    {
-      "id": "WeNR6EN-asrq8s5DPDrqu",
-      "type": "text",
-      "x": 541.7930376816176,
-      "y": 4266.811580728391,
-      "width": 193.11988830566406,
-      "height": 25,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "b4b",
-      "roundness": null,
-      "seed": 1224116579,
-      "version": 2358,
-      "versionNonce": 2100077421,
-      "isDeleted": false,
-      "boundElements": [],
-      "updated": 1767580513277,
-      "link": null,
-      "locked": false,
-      "text": "IB & RoCE support",
-      "fontSize": 20,
-      "fontFamily": 5,
-      "textAlign": "center",
-      "verticalAlign": "middle",
-      "containerId": "sfgomunt3ThMckJ6XKU_q",
-      "originalText": "IB & RoCE support",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
     {
       "id": "MFBSa2ubaUD9ofO7oFDAX",
       "type": "rectangle",
-      "x": 219.23352789007174,
-      "y": 4915.747023497185,
+      "x": 235.90019455673848,
+      "y": 5775.747023497185,
       "width": 239.9955982022902,
       "height": 60,
       "angle": 0,
@@ -10590,8 +9279,8 @@
       "index": "b4c",
       "roundness": null,
       "seed": 1230068963,
-      "version": 2172,
-      "versionNonce": 1968459331,
+      "version": 2227,
+      "versionNonce": 998598973,
       "isDeleted": false,
       "boundElements": [
         {
@@ -10599,15 +9288,15 @@
           "id": "L9ANH6aHqTr18kVeHpkD7"
         }
       ],
-      "updated": 1767580528071,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "L9ANH6aHqTr18kVeHpkD7",
       "type": "text",
-      "x": 273.3813819228575,
-      "y": 4933.247023497185,
+      "x": 290.0480485895242,
+      "y": 5793.247023497185,
       "width": 131.69989013671875,
       "height": 25,
       "angle": 0,
@@ -10623,11 +9312,11 @@
       "index": "b4d",
       "roundness": null,
       "seed": 1970915459,
-      "version": 2297,
-      "versionNonce": 2060946915,
+      "version": 2352,
+      "versionNonce": 1893677469,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580528071,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "weight & bias",
@@ -10643,8 +9332,8 @@
     {
       "id": "lfA_f7LdHHZOlVJcdyc3d",
       "type": "rectangle",
-      "x": 486.51892528118265,
-      "y": 4915.532178719405,
+      "x": 503.1855919478494,
+      "y": 5775.532178719405,
       "width": 299.56796787532073,
       "height": 60,
       "angle": 0,
@@ -10660,8 +9349,8 @@
       "index": "b4e",
       "roundness": null,
       "seed": 1154727971,
-      "version": 2260,
-      "versionNonce": 1418667853,
+      "version": 2315,
+      "versionNonce": 257949181,
       "isDeleted": false,
       "boundElements": [
         {
@@ -10669,15 +9358,15 @@
           "id": "u1iQgDeEBo4G9WP1xSZyQ"
         }
       ],
-      "updated": 1767580531532,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "u1iQgDeEBo4G9WP1xSZyQ",
       "type": "text",
-      "x": 539.742965066011,
-      "y": 4933.032178719405,
+      "x": 556.4096317326778,
+      "y": 5793.032178719405,
       "width": 193.11988830566406,
       "height": 25,
       "angle": 0,
@@ -10693,11 +9382,11 @@
       "index": "b4f",
       "roundness": null,
       "seed": 1900921795,
-      "version": 2402,
-      "versionNonce": 2101457187,
+      "version": 2457,
+      "versionNonce": 1040791133,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580528071,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "IB & RoCE support",
@@ -10713,8 +9402,8 @@
     {
       "id": "d6HJM5niCdXm__tjACWnG",
       "type": "rectangle",
-      "x": 202.68559692521774,
-      "y": 5681.362225665228,
+      "x": 219.35226359188448,
+      "y": 6541.362225665228,
       "width": 239.9955982022902,
       "height": 60,
       "angle": 0,
@@ -10730,8 +9419,8 @@
       "index": "b4g",
       "roundness": null,
       "seed": 2084012493,
-      "version": 2298,
-      "versionNonce": 29748483,
+      "version": 2353,
+      "versionNonce": 1202479805,
       "isDeleted": false,
       "boundElements": [
         {
@@ -10739,15 +9428,15 @@
           "id": "Qqo4oxKBaxNHFRuFrl-Mx"
         }
       ],
-      "updated": 1767580559782,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "Qqo4oxKBaxNHFRuFrl-Mx",
       "type": "text",
-      "x": 256.8334509580035,
-      "y": 5698.862225665228,
+      "x": 273.5001176246702,
+      "y": 6558.862225665228,
       "width": 131.69989013671875,
       "height": 25,
       "angle": 0,
@@ -10763,11 +9452,11 @@
       "index": "b4h",
       "roundness": null,
       "seed": 585089069,
-      "version": 2423,
-      "versionNonce": 757552291,
+      "version": 2478,
+      "versionNonce": 552272669,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580559782,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "weight & bias",
@@ -10783,8 +9472,8 @@
     {
       "id": "WAAd0XFKUqYZrZJzHBml5",
       "type": "rectangle",
-      "x": 469.97099431632864,
-      "y": 5681.147380887448,
+      "x": 486.6376609829954,
+      "y": 6541.147380887448,
       "width": 299.56796787532073,
       "height": 60,
       "angle": 0,
@@ -10800,8 +9489,8 @@
       "index": "b4i",
       "roundness": null,
       "seed": 447820429,
-      "version": 2386,
-      "versionNonce": 591054915,
+      "version": 2441,
+      "versionNonce": 775329661,
       "isDeleted": false,
       "boundElements": [
         {
@@ -10809,15 +9498,15 @@
           "id": "JtiTV8-vNxkwt8Ymi12EQ"
         }
       ],
-      "updated": 1767580559782,
+      "updated": 1767684541917,
       "link": null,
       "locked": false
     },
     {
       "id": "JtiTV8-vNxkwt8Ymi12EQ",
       "type": "text",
-      "x": 523.195034101157,
-      "y": 5698.647380887448,
+      "x": 539.8617007678238,
+      "y": 6558.647380887448,
       "width": 193.11988830566406,
       "height": 25,
       "angle": 0,
@@ -10833,11 +9522,11 @@
       "index": "b4j",
       "roundness": null,
       "seed": 1348819181,
-      "version": 2528,
-      "versionNonce": 1507772387,
+      "version": 2583,
+      "versionNonce": 1238409181,
       "isDeleted": false,
       "boundElements": [],
-      "updated": 1767580559782,
+      "updated": 1767684541917,
       "link": null,
       "locked": false,
       "text": "IB & RoCE support",
@@ -10849,6 +9538,2422 @@
       "originalText": "IB & RoCE support",
       "autoResize": true,
       "lineHeight": 1.25
+    },
+    {
+      "id": "heTk23zJWeUvAArJU1npN",
+      "type": "rectangle",
+      "x": 222.18039794698217,
+      "y": 4622.016351777231,
+      "width": 647.3203086953981,
+      "height": 541.9127073088301,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b4k",
+      "roundness": {
+        "type": 3
+      },
+      "seed": 1217405693,
+      "version": 1114,
+      "versionNonce": 651415293,
+      "isDeleted": false,
+      "boundElements": [],
+      "updated": 1767684554854,
+      "link": null,
+      "locked": false
+    },
+    {
+      "id": "dNJs8EC_o9ivMRSpRAiyN",
+      "type": "rectangle",
+      "x": 427.8339280779272,
+      "y": 4802.683008271371,
+      "width": 156.66668701171875,
+      "height": 85,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b4l",
+      "roundness": null,
+      "seed": 132954973,
+      "version": 863,
+      "versionNonce": 953870173,
+      "isDeleted": false,
+      "boundElements": [
+        {
+          "type": "text",
+          "id": "PAzLrJPYsg3e4DH5pekhu"
+        },
+        {
+          "id": "CPCfaq7jaWOpUL6QoV_AW",
+          "type": "arrow"
+        },
+        {
+          "id": "sZd_dP_CcDzIaAB3aKJFv",
+          "type": "arrow"
+        }
+      ],
+      "updated": 1767684554854,
+      "link": null,
+      "locked": false
+    },
+    {
+      "id": "PAzLrJPYsg3e4DH5pekhu",
+      "type": "text",
+      "x": 448.77729508232176,
+      "y": 4820.183008271371,
+      "width": 114.77995300292969,
+      "height": 50,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b4m",
+      "roundness": null,
+      "seed": 1960265661,
+      "version": 854,
+      "versionNonce": 1059058621,
+      "isDeleted": false,
+      "boundElements": [],
+      "updated": 1767684554854,
+      "link": null,
+      "locked": false,
+      "text": "ray job tool\n(ray client)",
+      "fontSize": 20,
+      "fontFamily": 5,
+      "textAlign": "center",
+      "verticalAlign": "middle",
+      "containerId": "dNJs8EC_o9ivMRSpRAiyN",
+      "originalText": "ray job tool\n(ray client)",
+      "autoResize": true,
+      "lineHeight": 1.25
+    },
+    {
+      "id": "XtP3-kgEREwW-vJ3J9Bb2",
+      "type": "rectangle",
+      "x": 672.833928077927,
+      "y": 4802.183008271371,
+      "width": 156.66668701171875,
+      "height": 85,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b4n",
+      "roundness": null,
+      "seed": 292636701,
+      "version": 1000,
+      "versionNonce": 1237438685,
+      "isDeleted": false,
+      "boundElements": [
+        {
+          "type": "text",
+          "id": "rVXMMkuJQM04yTMLDy42R"
+        },
+        {
+          "id": "CPCfaq7jaWOpUL6QoV_AW",
+          "type": "arrow"
+        }
+      ],
+      "updated": 1767684554854,
+      "link": null,
+      "locked": false
+    },
+    {
+      "id": "rVXMMkuJQM04yTMLDy42R",
+      "type": "text",
+      "x": 683.7173280413059,
+      "y": 4819.683008271371,
+      "width": 134.89988708496094,
+      "height": 50,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b4o",
+      "roundness": null,
+      "seed": 531381373,
+      "version": 1004,
+      "versionNonce": 913157437,
+      "isDeleted": false,
+      "boundElements": [],
+      "updated": 1767684554854,
+      "link": null,
+      "locked": false,
+      "text": "VerlTaskSpec \nyaml",
+      "fontSize": 20,
+      "fontFamily": 5,
+      "textAlign": "center",
+      "verticalAlign": "middle",
+      "containerId": "XtP3-kgEREwW-vJ3J9Bb2",
+      "originalText": "VerlTaskSpec \nyaml",
+      "autoResize": true,
+      "lineHeight": 1.25
+    },
+    {
+      "id": "CPCfaq7jaWOpUL6QoV_AW",
+      "type": "arrow",
+      "x": 674.1672715837864,
+      "y": 4840.683008271371,
+      "width": 90.33331298828125,
+      "height": 4,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b4p",
+      "roundness": {
+        "type": 2
+      },
+      "seed": 659137757,
+      "version": 2304,
+      "versionNonce": 379611827,
+      "isDeleted": false,
+      "boundElements": [],
+      "updated": 1767684554974,
+      "link": null,
+      "locked": false,
+      "points": [
+        [
+          0,
+          0
+        ],
+        [
+          -90.33331298828125,
+          4
+        ]
+      ],
+      "lastCommittedPoint": null,
+      "startBinding": {
+        "elementId": "XtP3-kgEREwW-vJ3J9Bb2",
+        "focus": 0.1611880517201581,
+        "gap": 1.333343505859375
+      },
+      "endBinding": {
+        "elementId": "dNJs8EC_o9ivMRSpRAiyN",
+        "focus": 0.06393742185083161,
+        "gap": 1
+      },
+      "startArrowhead": null,
+      "endArrowhead": "arrow",
+      "elbowed": false
+    },
+    {
+      "id": "qCvbOZiLw9alEjLTQG-6n",
+      "type": "text",
+      "x": 260.10269708855975,
+      "y": 4573.993925047569,
+      "width": 476.19970703125,
+      "height": 25,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b4q",
+      "roundness": null,
+      "seed": 1004208445,
+      "version": 1204,
+      "versionNonce": 2032663133,
+      "isDeleted": false,
+      "boundElements": [],
+      "updated": 1767684554854,
+      "link": null,
+      "locked": false,
+      "text": "v3.8 IB & Roce support for multi node training  ",
+      "fontSize": 20,
+      "fontFamily": 5,
+      "textAlign": "left",
+      "verticalAlign": "top",
+      "containerId": null,
+      "originalText": "v3.8 IB & Roce support for multi node training  ",
+      "autoResize": true,
+      "lineHeight": 1.25
+    },
+    {
+      "id": "h6MaiZUa5w9nYowJZhKBM",
+      "type": "rectangle",
+      "x": 613.657876151101,
+      "y": 4644.849603730356,
+      "width": 210.1763265850289,
+      "height": 55.000000000000014,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b4r",
+      "roundness": null,
+      "seed": 46256541,
+      "version": 1152,
+      "versionNonce": 978495165,
+      "isDeleted": false,
+      "boundElements": [
+        {
+          "type": "text",
+          "id": "q8TUwsZ-OkjHwr0CSSxxi"
+        }
+      ],
+      "updated": 1767684554854,
+      "link": null,
+      "locked": false
+    },
+    {
+      "id": "q8TUwsZ-OkjHwr0CSSxxi",
+      "type": "text",
+      "x": 665.896079116467,
+      "y": 4659.849603730356,
+      "width": 105.69992065429688,
+      "height": 25,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b4s",
+      "roundness": null,
+      "seed": 2033161725,
+      "version": 1193,
+      "versionNonce": 1848166173,
+      "isDeleted": false,
+      "boundElements": [],
+      "updated": 1767684554854,
+      "link": null,
+      "locked": false,
+      "text": "API server",
+      "fontSize": 20,
+      "fontFamily": 5,
+      "textAlign": "center",
+      "verticalAlign": "middle",
+      "containerId": "h6MaiZUa5w9nYowJZhKBM",
+      "originalText": "API server",
+      "autoResize": true,
+      "lineHeight": 1.25
+    },
+    {
+      "id": "LqmUk3bhOWMYlt25NtSMl",
+      "type": "rectangle",
+      "x": 427.6672715837866,
+      "y": 4715.849603730356,
+      "width": 154.6436510018092,
+      "height": 60,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b4t",
+      "roundness": null,
+      "seed": 2049298013,
+      "version": 1382,
+      "versionNonce": 387701629,
+      "isDeleted": false,
+      "boundElements": [
+        {
+          "type": "text",
+          "id": "XvT8JfmI8TNOVB-15YovG"
+        }
+      ],
+      "updated": 1767684554854,
+      "link": null,
+      "locked": false
+    },
+    {
+      "id": "XvT8JfmI8TNOVB-15YovG",
+      "type": "text",
+      "x": 443.86914011447635,
+      "y": 4720.849603730356,
+      "width": 122.23991394042969,
+      "height": 50,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b4u",
+      "roundness": null,
+      "seed": 74840765,
+      "version": 1442,
+      "versionNonce": 1835885533,
+      "isDeleted": false,
+      "boundElements": [],
+      "updated": 1767684554854,
+      "link": null,
+      "locked": false,
+      "text": "task\nmanagement ",
+      "fontSize": 20,
+      "fontFamily": 5,
+      "textAlign": "center",
+      "verticalAlign": "middle",
+      "containerId": "LqmUk3bhOWMYlt25NtSMl",
+      "originalText": "task management ",
+      "autoResize": true,
+      "lineHeight": 1.25
+    },
+    {
+      "id": "7EhI0Iv9n5QBOT2mbAzRu",
+      "type": "rectangle",
+      "x": 611.7748101694561,
+      "y": 4716.42391204307,
+      "width": 207.59447102997498,
+      "height": 60,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b4v",
+      "roundness": null,
+      "seed": 1086429981,
+      "version": 1473,
+      "versionNonce": 623829053,
+      "isDeleted": false,
+      "boundElements": [
+        {
+          "type": "text",
+          "id": "uMtMs3zVaipTL39V1z9qO"
+        }
+      ],
+      "updated": 1767684554854,
+      "link": null,
+      "locked": false
+    },
+    {
+      "id": "uMtMs3zVaipTL39V1z9qO",
+      "type": "text",
+      "x": 628.0421155696976,
+      "y": 4721.42391204307,
+      "width": 175.0598602294922,
+      "height": 50,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b4w",
+      "roundness": null,
+      "seed": 1199189885,
+      "version": 1563,
+      "versionNonce": 1950470301,
+      "isDeleted": false,
+      "boundElements": [],
+      "updated": 1767684554854,
+      "link": null,
+      "locked": false,
+      "text": "node management\n(ssh, ray cluster) ",
+      "fontSize": 20,
+      "fontFamily": 5,
+      "textAlign": "center",
+      "verticalAlign": "middle",
+      "containerId": "7EhI0Iv9n5QBOT2mbAzRu",
+      "originalText": "node management\n(ssh, ray cluster) ",
+      "autoResize": true,
+      "lineHeight": 1.25
+    },
+    {
+      "id": "xbLezhTmbaiuNOowcP5-Q",
+      "type": "rectangle",
+      "x": 947.9736153053328,
+      "y": 4797.797636136118,
+      "width": 163.1357446724801,
+      "height": 106.45708709635272,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b4x",
+      "roundness": null,
+      "seed": 652294109,
+      "version": 770,
+      "versionNonce": 1482142973,
+      "isDeleted": false,
+      "boundElements": [
+        {
+          "type": "text",
+          "id": "cnKFukZFqDoDP63iIUm9_"
+        }
+      ],
+      "updated": 1767684554854,
+      "link": null,
+      "locked": false
+    },
+    {
+      "id": "cnKFukZFqDoDP63iIUm9_",
+      "type": "text",
+      "x": 978.8915318920612,
+      "y": 4826.026179684295,
+      "width": 101.29991149902344,
+      "height": 50,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b4y",
+      "roundness": null,
+      "seed": 688403517,
+      "version": 833,
+      "versionNonce": 485625181,
+      "isDeleted": false,
+      "boundElements": [],
+      "updated": 1767684554854,
+      "link": null,
+      "locked": false,
+      "text": "ray worker\nnode",
+      "fontSize": 20,
+      "fontFamily": 5,
+      "textAlign": "center",
+      "verticalAlign": "middle",
+      "containerId": "xbLezhTmbaiuNOowcP5-Q",
+      "originalText": "ray worker node",
+      "autoResize": true,
+      "lineHeight": 1.25
+    },
+    {
+      "id": "RZW6ujHgIQvMdNW8cevGx",
+      "type": "rectangle",
+      "x": 949.9227259748619,
+      "y": 4661.359874398537,
+      "width": 163.1357446724801,
+      "height": 85,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b4z",
+      "roundness": null,
+      "seed": 1506845853,
+      "version": 819,
+      "versionNonce": 560093629,
+      "isDeleted": false,
+      "boundElements": [
+        {
+          "type": "text",
+          "id": "5AFbh75wn3qwB-QijQWzD"
+        }
+      ],
+      "updated": 1767684554854,
+      "link": null,
+      "locked": false
+    },
+    {
+      "id": "5AFbh75wn3qwB-QijQWzD",
+      "type": "text",
+      "x": 980.8406425615902,
+      "y": 4678.859874398537,
+      "width": 101.29991149902344,
+      "height": 50,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b50",
+      "roundness": null,
+      "seed": 1290537213,
+      "version": 885,
+      "versionNonce": 1590303261,
+      "isDeleted": false,
+      "boundElements": [],
+      "updated": 1767684554854,
+      "link": null,
+      "locked": false,
+      "text": "ray worker\nnode",
+      "fontSize": 20,
+      "fontFamily": 5,
+      "textAlign": "center",
+      "verticalAlign": "middle",
+      "containerId": "RZW6ujHgIQvMdNW8cevGx",
+      "originalText": "ray worker node",
+      "autoResize": true,
+      "lineHeight": 1.25
+    },
+    {
+      "id": "FtfYzzfZJog6J-3gXZioA",
+      "type": "rectangle",
+      "x": 430.4727810498738,
+      "y": 4642.636711064613,
+      "width": 147.4799234681481,
+      "height": 55.000000000000014,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b51",
+      "roundness": null,
+      "seed": 601612637,
+      "version": 1270,
+      "versionNonce": 773508733,
+      "isDeleted": false,
+      "boundElements": [
+        {
+          "type": "text",
+          "id": "cOCve3brssvurWdwd9Nth"
+        }
+      ],
+      "updated": 1767684554854,
+      "link": null,
+      "locked": false
+    },
+    {
+      "id": "cOCve3brssvurWdwd9Nth",
+      "type": "text",
+      "x": 472.98276994459235,
+      "y": 4657.636711064613,
+      "width": 62.45994567871094,
+      "height": 25,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b52",
+      "roundness": null,
+      "seed": 1408347581,
+      "version": 1320,
+      "versionNonce": 1310062301,
+      "isDeleted": false,
+      "boundElements": [],
+      "updated": 1767684554854,
+      "link": null,
+      "locked": false,
+      "text": "WebUI",
+      "fontSize": 20,
+      "fontFamily": 5,
+      "textAlign": "center",
+      "verticalAlign": "middle",
+      "containerId": "FtfYzzfZJog6J-3gXZioA",
+      "originalText": "WebUI",
+      "autoResize": true,
+      "lineHeight": 1.25
+    },
+    {
+      "id": "ASyFxd3DW54qxF8tKwBg_",
+      "type": "rectangle",
+      "x": 674.1445628108464,
+      "y": 4917.7321010810265,
+      "width": 156.66668701171875,
+      "height": 85,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b53",
+      "roundness": null,
+      "seed": 743370269,
+      "version": 1052,
+      "versionNonce": 692917053,
+      "isDeleted": false,
+      "boundElements": [
+        {
+          "type": "text",
+          "id": "UCfmqG8sQLg78t_qD4wwa"
+        },
+        {
+          "id": "sZd_dP_CcDzIaAB3aKJFv",
+          "type": "arrow"
+        }
+      ],
+      "updated": 1767684554854,
+      "link": null,
+      "locked": false
+    },
+    {
+      "id": "UCfmqG8sQLg78t_qD4wwa",
+      "type": "text",
+      "x": 685.0279627742253,
+      "y": 4935.2321010810265,
+      "width": 134.89988708496094,
+      "height": 50,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b54",
+      "roundness": null,
+      "seed": 468001405,
+      "version": 1104,
+      "versionNonce": 1545708445,
+      "isDeleted": false,
+      "boundElements": [],
+      "updated": 1767684554854,
+      "link": null,
+      "locked": false,
+      "text": "Advanced\nVerlTaskSpec ",
+      "fontSize": 20,
+      "fontFamily": 5,
+      "textAlign": "center",
+      "verticalAlign": "middle",
+      "containerId": "ASyFxd3DW54qxF8tKwBg_",
+      "originalText": "Advanced VerlTaskSpec ",
+      "autoResize": true,
+      "lineHeight": 1.25
+    },
+    {
+      "id": "sZd_dP_CcDzIaAB3aKJFv",
+      "type": "arrow",
+      "x": 673.6807560139299,
+      "y": 4962.097093595226,
+      "width": 80.66214281697523,
+      "height": 84.85848269127973,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "#ffc9c9",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b55",
+      "roundness": {
+        "type": 2
+      },
+      "seed": 930466525,
+      "version": 1151,
+      "versionNonce": 863345139,
+      "isDeleted": false,
+      "boundElements": [],
+      "updated": 1767684554975,
+      "link": null,
+      "locked": false,
+      "points": [
+        [
+          0,
+          0
+        ],
+        [
+          -80.66214281697523,
+          -84.85848269127973
+        ]
+      ],
+      "lastCommittedPoint": null,
+      "startBinding": {
+        "elementId": "ASyFxd3DW54qxF8tKwBg_",
+        "focus": -0.6785882314731602,
+        "gap": 1
+      },
+      "endBinding": {
+        "elementId": "Hs9wFmEEbdIpA9vJXphjV",
+        "focus": 0,
+        "gap": 14
+      },
+      "startArrowhead": null,
+      "endArrowhead": "arrow",
+      "elbowed": false
+    },
+    {
+      "id": "dtRUpIgFf9hybiVPPWsVE",
+      "type": "rectangle",
+      "x": 245.26873250374882,
+      "y": 4715.7546323666065,
+      "width": 154.6436510018092,
+      "height": 60,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b56",
+      "roundness": null,
+      "seed": 2140301117,
+      "version": 1444,
+      "versionNonce": 122621117,
+      "isDeleted": false,
+      "boundElements": [
+        {
+          "type": "text",
+          "id": "EQCb1M_dJna0DGVu3DOzf"
+        }
+      ],
+      "updated": 1767684554854,
+      "link": null,
+      "locked": false
+    },
+    {
+      "id": "EQCb1M_dJna0DGVu3DOzf",
+      "type": "text",
+      "x": 261.4706010344386,
+      "y": 4720.7546323666065,
+      "width": 122.23991394042969,
+      "height": 50,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b57",
+      "roundness": null,
+      "seed": 1721530269,
+      "version": 1512,
+      "versionNonce": 1789422877,
+      "isDeleted": false,
+      "boundElements": [],
+      "updated": 1767684554854,
+      "link": null,
+      "locked": false,
+      "text": "user\nmanagement ",
+      "fontSize": 20,
+      "fontFamily": 5,
+      "textAlign": "center",
+      "verticalAlign": "middle",
+      "containerId": "dtRUpIgFf9hybiVPPWsVE",
+      "originalText": "user management ",
+      "autoResize": true,
+      "lineHeight": 1.25
+    },
+    {
+      "id": "8WUBjDJs7nLl0Nbm4mEYI",
+      "type": "rectangle",
+      "x": 251.3301289226659,
+      "y": 4909.33946401936,
+      "width": 154.6436510018092,
+      "height": 85,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b58",
+      "roundness": null,
+      "seed": 382041085,
+      "version": 1676,
+      "versionNonce": 1891025277,
+      "isDeleted": false,
+      "boundElements": [
+        {
+          "type": "text",
+          "id": "wTlq9A4HXC9t4SBYr6VzG"
+        }
+      ],
+      "updated": 1767684554854,
+      "link": null,
+      "locked": false
+    },
+    {
+      "id": "wTlq9A4HXC9t4SBYr6VzG",
+      "type": "text",
+      "x": 271.53199745335564,
+      "y": 4914.33946401936,
+      "width": 114.23991394042969,
+      "height": 75,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b59",
+      "roundness": null,
+      "seed": 2014245981,
+      "version": 1758,
+      "versionNonce": 1589348829,
+      "isDeleted": false,
+      "boundElements": [],
+      "updated": 1767684554854,
+      "link": null,
+      "locked": false,
+      "text": "data\nmanagement\nSFTPGo ",
+      "fontSize": 20,
+      "fontFamily": 5,
+      "textAlign": "center",
+      "verticalAlign": "middle",
+      "containerId": "8WUBjDJs7nLl0Nbm4mEYI",
+      "originalText": "data management\nSFTPGo ",
+      "autoResize": true,
+      "lineHeight": 1.25
+    },
+    {
+      "id": "eaMsiShOv4_vndnhERhg1",
+      "type": "rectangle",
+      "x": 1123.0472071604347,
+      "y": 4684.089212529603,
+      "width": 78.84661364258261,
+      "height": 188.34936741723558,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5A",
+      "roundness": null,
+      "seed": 1984585917,
+      "version": 1547,
+      "versionNonce": 1439188541,
+      "isDeleted": false,
+      "boundElements": [
+        {
+          "type": "text",
+          "id": "IV3hZPShTMVrFQj4jqOr8"
+        }
+      ],
+      "updated": 1767684554854,
+      "link": null,
+      "locked": false
+    },
+    {
+      "id": "IV3hZPShTMVrFQj4jqOr8",
+      "type": "text",
+      "x": 1134.860536259558,
+      "y": 4765.763896238221,
+      "width": 55.21995544433594,
+      "height": 25,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5B",
+      "roundness": null,
+      "seed": 119095581,
+      "version": 1566,
+      "versionNonce": 1488877213,
+      "isDeleted": false,
+      "boundElements": [],
+      "updated": 1767684554854,
+      "link": null,
+      "locked": false,
+      "text": "GPFS",
+      "fontSize": 20,
+      "fontFamily": 5,
+      "textAlign": "center",
+      "verticalAlign": "middle",
+      "containerId": "eaMsiShOv4_vndnhERhg1",
+      "originalText": "GPFS",
+      "autoResize": true,
+      "lineHeight": 1.25
+    },
+    {
+      "id": "xROBVjCQGEk7huVn90qgK",
+      "type": "rectangle",
+      "x": 256.2399675373537,
+      "y": 5032.703346707314,
+      "width": 239.9955982022902,
+      "height": 60,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5C",
+      "roundness": null,
+      "seed": 1877808509,
+      "version": 2165,
+      "versionNonce": 307206909,
+      "isDeleted": false,
+      "boundElements": [
+        {
+          "type": "text",
+          "id": "qCqMkkJlJPTEM1aPwAgMA"
+        }
+      ],
+      "updated": 1767684554854,
+      "link": null,
+      "locked": false
+    },
+    {
+      "id": "qCqMkkJlJPTEM1aPwAgMA",
+      "type": "text",
+      "x": 310.38782157013947,
+      "y": 5050.203346707314,
+      "width": 131.69989013671875,
+      "height": 25,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5D",
+      "roundness": null,
+      "seed": 1275551197,
+      "version": 2290,
+      "versionNonce": 1526543197,
+      "isDeleted": false,
+      "boundElements": [],
+      "updated": 1767684554854,
+      "link": null,
+      "locked": false,
+      "text": "weight & bias",
+      "fontSize": 20,
+      "fontFamily": 5,
+      "textAlign": "center",
+      "verticalAlign": "middle",
+      "containerId": "xROBVjCQGEk7huVn90qgK",
+      "originalText": "weight & bias",
+      "autoResize": true,
+      "lineHeight": 1.25
+    },
+    {
+      "id": "Hs9wFmEEbdIpA9vJXphjV",
+      "type": "rectangle",
+      "x": 432.80662530553457,
+      "y": 4910.772490862308,
+      "width": 156.66668701171875,
+      "height": 85,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5E",
+      "roundness": null,
+      "seed": 1383701053,
+      "version": 1151,
+      "versionNonce": 406053779,
+      "isDeleted": false,
+      "boundElements": [
+        {
+          "type": "text",
+          "id": "nrpfg4A7-jJ2tNBa4ebUP"
+        },
+        {
+          "id": "sZd_dP_CcDzIaAB3aKJFv",
+          "type": "arrow"
+        }
+      ],
+      "updated": 1767684554975,
+      "link": null,
+      "locked": false
+    },
+    {
+      "id": "nrpfg4A7-jJ2tNBa4ebUP",
+      "type": "text",
+      "x": 476.9300002444994,
+      "y": 4928.272490862308,
+      "width": 68.41993713378906,
+      "height": 50,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5F",
+      "roundness": null,
+      "seed": 1545947805,
+      "version": 1176,
+      "versionNonce": 1153679389,
+      "isDeleted": false,
+      "boundElements": [],
+      "updated": 1767684554854,
+      "link": null,
+      "locked": false,
+      "text": "model\nServing",
+      "fontSize": 20,
+      "fontFamily": 5,
+      "textAlign": "center",
+      "verticalAlign": "middle",
+      "containerId": "Hs9wFmEEbdIpA9vJXphjV",
+      "originalText": "model\nServing",
+      "autoResize": true,
+      "lineHeight": 1.25
+    },
+    {
+      "id": "BoeIvlBhUQlPBlOZpHR08",
+      "type": "rectangle",
+      "x": 523.5253649284647,
+      "y": 5032.488501929534,
+      "width": 299.56796787532073,
+      "height": 60,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "#ffc9c9",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5G",
+      "roundness": null,
+      "seed": 700182269,
+      "version": 2252,
+      "versionNonce": 837792893,
+      "isDeleted": false,
+      "boundElements": [
+        {
+          "type": "text",
+          "id": "b1wGV0cEUxZSUWjHVW5bB"
+        }
+      ],
+      "updated": 1767684554854,
+      "link": null,
+      "locked": false
+    },
+    {
+      "id": "b1wGV0cEUxZSUWjHVW5bB",
+      "type": "text",
+      "x": 576.749404713293,
+      "y": 5049.988501929534,
+      "width": 193.11988830566406,
+      "height": 25,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5H",
+      "roundness": null,
+      "seed": 1774126941,
+      "version": 2396,
+      "versionNonce": 308157661,
+      "isDeleted": false,
+      "boundElements": [],
+      "updated": 1767684554854,
+      "link": null,
+      "locked": false,
+      "text": "IB & RoCE support",
+      "fontSize": 20,
+      "fontFamily": 5,
+      "textAlign": "center",
+      "verticalAlign": "middle",
+      "containerId": "BoeIvlBhUQlPBlOZpHR08",
+      "originalText": "IB & RoCE support",
+      "autoResize": true,
+      "lineHeight": 1.25
+    },
+    {
+      "id": "MjLfupuIEyHsJXHTOkWGm",
+      "type": "rectangle",
+      "x": 211.51373636657854,
+      "y": 3915.623564307773,
+      "width": 647.3203086953981,
+      "height": 541.9127073088301,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5I",
+      "roundness": {
+        "type": 3
+      },
+      "seed": 71288691,
+      "version": 1196,
+      "versionNonce": 959699283,
+      "isDeleted": false,
+      "boundElements": [],
+      "updated": 1767684575234,
+      "link": null,
+      "locked": false
+    },
+    {
+      "id": "Gf0CtJAjOH9kfDFhCUE1Z",
+      "type": "rectangle",
+      "x": 417.16726649752354,
+      "y": 4096.290220801913,
+      "width": 156.66668701171875,
+      "height": 85,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5J",
+      "roundness": null,
+      "seed": 2052874515,
+      "version": 945,
+      "versionNonce": 2125674227,
+      "isDeleted": false,
+      "boundElements": [
+        {
+          "type": "text",
+          "id": "20DYWEmDFmwM_oJVjOqay"
+        },
+        {
+          "id": "Gs0St6vy7AzWropCInu6q",
+          "type": "arrow"
+        },
+        {
+          "id": "zN_q98Ryj-3lovwWckFiO",
+          "type": "arrow"
+        }
+      ],
+      "updated": 1767684575234,
+      "link": null,
+      "locked": false
+    },
+    {
+      "id": "20DYWEmDFmwM_oJVjOqay",
+      "type": "text",
+      "x": 438.1106335019181,
+      "y": 4113.790220801913,
+      "width": 114.77995300292969,
+      "height": 50,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5K",
+      "roundness": null,
+      "seed": 1626654387,
+      "version": 936,
+      "versionNonce": 1135992979,
+      "isDeleted": false,
+      "boundElements": [],
+      "updated": 1767684575234,
+      "link": null,
+      "locked": false,
+      "text": "ray job tool\n(ray client)",
+      "fontSize": 20,
+      "fontFamily": 5,
+      "textAlign": "center",
+      "verticalAlign": "middle",
+      "containerId": "Gf0CtJAjOH9kfDFhCUE1Z",
+      "originalText": "ray job tool\n(ray client)",
+      "autoResize": true,
+      "lineHeight": 1.25
+    },
+    {
+      "id": "PvoN0jKnOWq1xXn2lroWv",
+      "type": "rectangle",
+      "x": 662.1672664975235,
+      "y": 4095.7902208019136,
+      "width": 156.66668701171875,
+      "height": 85,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5L",
+      "roundness": null,
+      "seed": 1320122451,
+      "version": 1082,
+      "versionNonce": 1692575091,
+      "isDeleted": false,
+      "boundElements": [
+        {
+          "type": "text",
+          "id": "ntCCl1Q6fx9M0P9RABpHn"
+        },
+        {
+          "id": "Gs0St6vy7AzWropCInu6q",
+          "type": "arrow"
+        }
+      ],
+      "updated": 1767684575234,
+      "link": null,
+      "locked": false
+    },
+    {
+      "id": "ntCCl1Q6fx9M0P9RABpHn",
+      "type": "text",
+      "x": 673.0506664609024,
+      "y": 4113.290220801913,
+      "width": 134.89988708496094,
+      "height": 50,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5M",
+      "roundness": null,
+      "seed": 1097004531,
+      "version": 1086,
+      "versionNonce": 1056145171,
+      "isDeleted": false,
+      "boundElements": [],
+      "updated": 1767684575234,
+      "link": null,
+      "locked": false,
+      "text": "VerlTaskSpec \nyaml",
+      "fontSize": 20,
+      "fontFamily": 5,
+      "textAlign": "center",
+      "verticalAlign": "middle",
+      "containerId": "PvoN0jKnOWq1xXn2lroWv",
+      "originalText": "VerlTaskSpec \nyaml",
+      "autoResize": true,
+      "lineHeight": 1.25
+    },
+    {
+      "id": "Gs0St6vy7AzWropCInu6q",
+      "type": "arrow",
+      "x": 663.5006100033829,
+      "y": 4134.290220801913,
+      "width": 90.33331298828125,
+      "height": 4,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5N",
+      "roundness": {
+        "type": 2
+      },
+      "seed": 940949395,
+      "version": 2546,
+      "versionNonce": 1745201981,
+      "isDeleted": false,
+      "boundElements": [],
+      "updated": 1767684575978,
+      "link": null,
+      "locked": false,
+      "points": [
+        [
+          0,
+          0
+        ],
+        [
+          -90.33331298828125,
+          4
+        ]
+      ],
+      "lastCommittedPoint": null,
+      "startBinding": {
+        "elementId": "PvoN0jKnOWq1xXn2lroWv",
+        "focus": 0.16118805172015999,
+        "gap": 1.333343505859375
+      },
+      "endBinding": {
+        "elementId": "Gf0CtJAjOH9kfDFhCUE1Z",
+        "focus": 0.06393742185083104,
+        "gap": 1
+      },
+      "startArrowhead": null,
+      "endArrowhead": "arrow",
+      "elbowed": false
+    },
+    {
+      "id": "K8wHlmlEligI_h0jTUnJO",
+      "type": "text",
+      "x": 217.44344977630226,
+      "y": 3851.0532571133813,
+      "width": 330.57977294921875,
+      "height": 25,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5O",
+      "roundness": null,
+      "seed": 217994547,
+      "version": 1296,
+      "versionNonce": 102748531,
+      "isDeleted": false,
+      "boundElements": [],
+      "updated": 1767684673131,
+      "link": null,
+      "locked": false,
+      "text": "v3.7 Model serving  with ray serve",
+      "fontSize": 20,
+      "fontFamily": 5,
+      "textAlign": "left",
+      "verticalAlign": "top",
+      "containerId": null,
+      "originalText": "v3.7 Model serving  with ray serve",
+      "autoResize": true,
+      "lineHeight": 1.25
+    },
+    {
+      "id": "n-Ff4Ze34fZJUns_NTd20",
+      "type": "rectangle",
+      "x": 602.9912145706976,
+      "y": 3938.456816260898,
+      "width": 210.1763265850289,
+      "height": 55.000000000000014,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5P",
+      "roundness": null,
+      "seed": 260257491,
+      "version": 1234,
+      "versionNonce": 1093524883,
+      "isDeleted": false,
+      "boundElements": [
+        {
+          "type": "text",
+          "id": "vLsB0AI_CkbcIkZaJdNiJ"
+        }
+      ],
+      "updated": 1767684575234,
+      "link": null,
+      "locked": false
+    },
+    {
+      "id": "vLsB0AI_CkbcIkZaJdNiJ",
+      "type": "text",
+      "x": 655.2294175360637,
+      "y": 3953.456816260898,
+      "width": 105.69992065429688,
+      "height": 25,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5Q",
+      "roundness": null,
+      "seed": 1580917875,
+      "version": 1276,
+      "versionNonce": 962769715,
+      "isDeleted": false,
+      "boundElements": [],
+      "updated": 1767684575234,
+      "link": null,
+      "locked": false,
+      "text": "API server",
+      "fontSize": 20,
+      "fontFamily": 5,
+      "textAlign": "center",
+      "verticalAlign": "middle",
+      "containerId": "n-Ff4Ze34fZJUns_NTd20",
+      "originalText": "API server",
+      "autoResize": true,
+      "lineHeight": 1.25
+    },
+    {
+      "id": "qxqtFfa77QmZ0wtXJbNdf",
+      "type": "rectangle",
+      "x": 417.0006100033829,
+      "y": 4009.456816260898,
+      "width": 154.6436510018092,
+      "height": 60,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5R",
+      "roundness": null,
+      "seed": 15457811,
+      "version": 1464,
+      "versionNonce": 732681427,
+      "isDeleted": false,
+      "boundElements": [
+        {
+          "type": "text",
+          "id": "21ZGMSVPphjNxPbD4fYsP"
+        }
+      ],
+      "updated": 1767684575234,
+      "link": null,
+      "locked": false
+    },
+    {
+      "id": "21ZGMSVPphjNxPbD4fYsP",
+      "type": "text",
+      "x": 433.2024785340727,
+      "y": 4014.456816260898,
+      "width": 122.23991394042969,
+      "height": 50,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5S",
+      "roundness": null,
+      "seed": 1116513203,
+      "version": 1524,
+      "versionNonce": 1738523251,
+      "isDeleted": false,
+      "boundElements": [],
+      "updated": 1767684575234,
+      "link": null,
+      "locked": false,
+      "text": "task\nmanagement ",
+      "fontSize": 20,
+      "fontFamily": 5,
+      "textAlign": "center",
+      "verticalAlign": "middle",
+      "containerId": "qxqtFfa77QmZ0wtXJbNdf",
+      "originalText": "task management ",
+      "autoResize": true,
+      "lineHeight": 1.25
+    },
+    {
+      "id": "KcxzNn7Z9-QxXtT7AkZ3C",
+      "type": "rectangle",
+      "x": 601.1081485890527,
+      "y": 4010.0311245736125,
+      "width": 207.59447102997498,
+      "height": 60,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5T",
+      "roundness": null,
+      "seed": 1262728531,
+      "version": 1555,
+      "versionNonce": 100989971,
+      "isDeleted": false,
+      "boundElements": [
+        {
+          "type": "text",
+          "id": "Xmeq8g2n9AW6KRTGM-M5I"
+        }
+      ],
+      "updated": 1767684575234,
+      "link": null,
+      "locked": false
+    },
+    {
+      "id": "Xmeq8g2n9AW6KRTGM-M5I",
+      "type": "text",
+      "x": 617.3754539892941,
+      "y": 4015.0311245736125,
+      "width": 175.0598602294922,
+      "height": 50,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5U",
+      "roundness": null,
+      "seed": 2053608179,
+      "version": 1646,
+      "versionNonce": 1121059251,
+      "isDeleted": false,
+      "boundElements": [],
+      "updated": 1767684575234,
+      "link": null,
+      "locked": false,
+      "text": "node management\n(ssh, ray cluster) ",
+      "fontSize": 20,
+      "fontFamily": 5,
+      "textAlign": "center",
+      "verticalAlign": "middle",
+      "containerId": "KcxzNn7Z9-QxXtT7AkZ3C",
+      "originalText": "node management\n(ssh, ray cluster) ",
+      "autoResize": true,
+      "lineHeight": 1.25
+    },
+    {
+      "id": "X3LvP6ca37T41BZ-Q0noS",
+      "type": "rectangle",
+      "x": 937.3069537249294,
+      "y": 4091.404848666662,
+      "width": 163.1357446724801,
+      "height": 106.45708709635272,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5V",
+      "roundness": null,
+      "seed": 8305811,
+      "version": 852,
+      "versionNonce": 388846419,
+      "isDeleted": false,
+      "boundElements": [
+        {
+          "type": "text",
+          "id": "d1Zxyw9BfzoP8jVeALpVW"
+        }
+      ],
+      "updated": 1767684575234,
+      "link": null,
+      "locked": false
+    },
+    {
+      "id": "d1Zxyw9BfzoP8jVeALpVW",
+      "type": "text",
+      "x": 968.2248703116577,
+      "y": 4119.633392214839,
+      "width": 101.29991149902344,
+      "height": 50,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5W",
+      "roundness": null,
+      "seed": 960475699,
+      "version": 916,
+      "versionNonce": 325939443,
+      "isDeleted": false,
+      "boundElements": [],
+      "updated": 1767684575234,
+      "link": null,
+      "locked": false,
+      "text": "ray worker\nnode",
+      "fontSize": 20,
+      "fontFamily": 5,
+      "textAlign": "center",
+      "verticalAlign": "middle",
+      "containerId": "X3LvP6ca37T41BZ-Q0noS",
+      "originalText": "ray worker node",
+      "autoResize": true,
+      "lineHeight": 1.25
+    },
+    {
+      "id": "Om29ryg7xCDfxim1gwvSQ",
+      "type": "rectangle",
+      "x": 939.2560643944585,
+      "y": 3954.967086929079,
+      "width": 163.1357446724801,
+      "height": 85,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5X",
+      "roundness": null,
+      "seed": 1167127507,
+      "version": 901,
+      "versionNonce": 2031671955,
+      "isDeleted": false,
+      "boundElements": [
+        {
+          "type": "text",
+          "id": "rRfaJGVJvSMVoayQ_ZCBZ"
+        }
+      ],
+      "updated": 1767684575234,
+      "link": null,
+      "locked": false
+    },
+    {
+      "id": "rRfaJGVJvSMVoayQ_ZCBZ",
+      "type": "text",
+      "x": 970.1739809811868,
+      "y": 3972.467086929079,
+      "width": 101.29991149902344,
+      "height": 50,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5Y",
+      "roundness": null,
+      "seed": 1703477619,
+      "version": 968,
+      "versionNonce": 350621747,
+      "isDeleted": false,
+      "boundElements": [],
+      "updated": 1767684575234,
+      "link": null,
+      "locked": false,
+      "text": "ray worker\nnode",
+      "fontSize": 20,
+      "fontFamily": 5,
+      "textAlign": "center",
+      "verticalAlign": "middle",
+      "containerId": "Om29ryg7xCDfxim1gwvSQ",
+      "originalText": "ray worker node",
+      "autoResize": true,
+      "lineHeight": 1.25
+    },
+    {
+      "id": "XVf5M1qJ68ycZad4qnKcr",
+      "type": "rectangle",
+      "x": 419.8061194694701,
+      "y": 3936.2439235951556,
+      "width": 147.4799234681481,
+      "height": 55.000000000000014,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5Z",
+      "roundness": null,
+      "seed": 309222163,
+      "version": 1352,
+      "versionNonce": 1488687571,
+      "isDeleted": false,
+      "boundElements": [
+        {
+          "type": "text",
+          "id": "oKk-BRI0PDZXF4MkLu6xs"
+        }
+      ],
+      "updated": 1767684575234,
+      "link": null,
+      "locked": false
+    },
+    {
+      "id": "oKk-BRI0PDZXF4MkLu6xs",
+      "type": "text",
+      "x": 462.3161083641887,
+      "y": 3951.2439235951556,
+      "width": 62.45994567871094,
+      "height": 25,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5a",
+      "roundness": null,
+      "seed": 1488421043,
+      "version": 1402,
+      "versionNonce": 900718451,
+      "isDeleted": false,
+      "boundElements": [],
+      "updated": 1767684575234,
+      "link": null,
+      "locked": false,
+      "text": "WebUI",
+      "fontSize": 20,
+      "fontFamily": 5,
+      "textAlign": "center",
+      "verticalAlign": "middle",
+      "containerId": "XVf5M1qJ68ycZad4qnKcr",
+      "originalText": "WebUI",
+      "autoResize": true,
+      "lineHeight": 1.25
+    },
+    {
+      "id": "Hg8ObmOCWMqPbn09S7yjk",
+      "type": "rectangle",
+      "x": 663.477901230443,
+      "y": 4211.33931361157,
+      "width": 156.66668701171875,
+      "height": 85,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5b",
+      "roundness": null,
+      "seed": 1913320019,
+      "version": 1134,
+      "versionNonce": 906590483,
+      "isDeleted": false,
+      "boundElements": [
+        {
+          "type": "text",
+          "id": "pI6z32ZixK5gmBToyLilS"
+        },
+        {
+          "id": "zN_q98Ryj-3lovwWckFiO",
+          "type": "arrow"
+        }
+      ],
+      "updated": 1767684575234,
+      "link": null,
+      "locked": false
+    },
+    {
+      "id": "pI6z32ZixK5gmBToyLilS",
+      "type": "text",
+      "x": 674.3613011938219,
+      "y": 4228.83931361157,
+      "width": 134.89988708496094,
+      "height": 50,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5c",
+      "roundness": null,
+      "seed": 524234739,
+      "version": 1186,
+      "versionNonce": 1463738035,
+      "isDeleted": false,
+      "boundElements": [],
+      "updated": 1767684575234,
+      "link": null,
+      "locked": false,
+      "text": "Advanced\nVerlTaskSpec ",
+      "fontSize": 20,
+      "fontFamily": 5,
+      "textAlign": "center",
+      "verticalAlign": "middle",
+      "containerId": "Hg8ObmOCWMqPbn09S7yjk",
+      "originalText": "Advanced VerlTaskSpec ",
+      "autoResize": true,
+      "lineHeight": 1.25
+    },
+    {
+      "id": "zN_q98Ryj-3lovwWckFiO",
+      "type": "arrow",
+      "x": 663.0140944335262,
+      "y": 4255.70430612577,
+      "width": 80.66214281697523,
+      "height": 84.85848269127973,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "#ffc9c9",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5d",
+      "roundness": {
+        "type": 2
+      },
+      "seed": 1113242003,
+      "version": 1393,
+      "versionNonce": 641187837,
+      "isDeleted": false,
+      "boundElements": [],
+      "updated": 1767684575978,
+      "link": null,
+      "locked": false,
+      "points": [
+        [
+          0,
+          0
+        ],
+        [
+          -80.66214281697523,
+          -84.85848269127973
+        ]
+      ],
+      "lastCommittedPoint": null,
+      "startBinding": {
+        "elementId": "Hg8ObmOCWMqPbn09S7yjk",
+        "focus": -0.6785882314731602,
+        "gap": 1
+      },
+      "endBinding": {
+        "elementId": "Gf0CtJAjOH9kfDFhCUE1Z",
+        "focus": -0.47485998984046796,
+        "gap": 8.5179981073087
+      },
+      "startArrowhead": null,
+      "endArrowhead": "arrow",
+      "elbowed": false
+    },
+    {
+      "id": "BPhlnGltdlIkoIoRjKnat",
+      "type": "rectangle",
+      "x": 234.6020709233453,
+      "y": 4009.3618448971506,
+      "width": 154.6436510018092,
+      "height": 60,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5e",
+      "roundness": null,
+      "seed": 949227315,
+      "version": 1526,
+      "versionNonce": 142006163,
+      "isDeleted": false,
+      "boundElements": [
+        {
+          "type": "text",
+          "id": "AZwdH-22GzEBgAHGN1nsM"
+        }
+      ],
+      "updated": 1767684575234,
+      "link": null,
+      "locked": false
+    },
+    {
+      "id": "AZwdH-22GzEBgAHGN1nsM",
+      "type": "text",
+      "x": 250.80393945403512,
+      "y": 4014.3618448971506,
+      "width": 122.23991394042969,
+      "height": 50,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5f",
+      "roundness": null,
+      "seed": 1951510739,
+      "version": 1595,
+      "versionNonce": 1635544371,
+      "isDeleted": false,
+      "boundElements": [],
+      "updated": 1767684575234,
+      "link": null,
+      "locked": false,
+      "text": "user\nmanagement ",
+      "fontSize": 20,
+      "fontFamily": 5,
+      "textAlign": "center",
+      "verticalAlign": "middle",
+      "containerId": "BPhlnGltdlIkoIoRjKnat",
+      "originalText": "user management ",
+      "autoResize": true,
+      "lineHeight": 1.25
+    },
+    {
+      "id": "4gXYDCzJEyB-R3vjaRh-v",
+      "type": "rectangle",
+      "x": 240.66346734226227,
+      "y": 4202.946676549904,
+      "width": 154.6436510018092,
+      "height": 85,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5g",
+      "roundness": null,
+      "seed": 1462378099,
+      "version": 1758,
+      "versionNonce": 1551781587,
+      "isDeleted": false,
+      "boundElements": [
+        {
+          "type": "text",
+          "id": "mSgJ1Tvg6_a8nKBUdLcd9"
+        }
+      ],
+      "updated": 1767684575234,
+      "link": null,
+      "locked": false
+    },
+    {
+      "id": "mSgJ1Tvg6_a8nKBUdLcd9",
+      "type": "text",
+      "x": 260.8653358729521,
+      "y": 4207.946676549904,
+      "width": 114.23991394042969,
+      "height": 75,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5h",
+      "roundness": null,
+      "seed": 1273223187,
+      "version": 1840,
+      "versionNonce": 202570867,
+      "isDeleted": false,
+      "boundElements": [],
+      "updated": 1767684575234,
+      "link": null,
+      "locked": false,
+      "text": "data\nmanagement\nSFTPGo ",
+      "fontSize": 20,
+      "fontFamily": 5,
+      "textAlign": "center",
+      "verticalAlign": "middle",
+      "containerId": "4gXYDCzJEyB-R3vjaRh-v",
+      "originalText": "data management\nSFTPGo ",
+      "autoResize": true,
+      "lineHeight": 1.25
+    },
+    {
+      "id": "I7ATDrhNb6eDEYhRqzYfp",
+      "type": "rectangle",
+      "x": 1112.3805455800311,
+      "y": 3977.6964250601454,
+      "width": 78.84661364258261,
+      "height": 188.34936741723558,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5i",
+      "roundness": null,
+      "seed": 544562611,
+      "version": 1629,
+      "versionNonce": 1812002323,
+      "isDeleted": false,
+      "boundElements": [
+        {
+          "type": "text",
+          "id": "Y5vi7JlFMdxgh4SbYm5LG"
+        }
+      ],
+      "updated": 1767684575234,
+      "link": null,
+      "locked": false
+    },
+    {
+      "id": "Y5vi7JlFMdxgh4SbYm5LG",
+      "type": "text",
+      "x": 1124.1938746791545,
+      "y": 4059.3711087687634,
+      "width": 55.21995544433594,
+      "height": 25,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5j",
+      "roundness": null,
+      "seed": 537210707,
+      "version": 1649,
+      "versionNonce": 993967027,
+      "isDeleted": false,
+      "boundElements": [],
+      "updated": 1767684575234,
+      "link": null,
+      "locked": false,
+      "text": "GPFS",
+      "fontSize": 20,
+      "fontFamily": 5,
+      "textAlign": "center",
+      "verticalAlign": "middle",
+      "containerId": "I7ATDrhNb6eDEYhRqzYfp",
+      "originalText": "GPFS",
+      "autoResize": true,
+      "lineHeight": 1.25
+    },
+    {
+      "id": "xDPUWf7m9nXCB78-A-vMP",
+      "type": "rectangle",
+      "x": 245.5733059569502,
+      "y": 4326.310559237858,
+      "width": 566.5404057062265,
+      "height": 60,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5k",
+      "roundness": null,
+      "seed": 1827261683,
+      "version": 2202,
+      "versionNonce": 368233811,
+      "isDeleted": false,
+      "boundElements": [
+        {
+          "type": "text",
+          "id": "Q2W49xaHGQW3e7Xc2e0Hm"
+        }
+      ],
+      "updated": 1767684575234,
+      "link": null,
+      "locked": false
+    },
+    {
+      "id": "Q2W49xaHGQW3e7Xc2e0Hm",
+      "type": "text",
+      "x": 462.99356374170407,
+      "y": 4343.810559237858,
+      "width": 131.69989013671875,
+      "height": 25,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5l",
+      "roundness": null,
+      "seed": 447329939,
+      "version": 2329,
+      "versionNonce": 1332980467,
+      "isDeleted": false,
+      "boundElements": [],
+      "updated": 1767684575234,
+      "link": null,
+      "locked": false,
+      "text": "weight & bias",
+      "fontSize": 20,
+      "fontFamily": 5,
+      "textAlign": "center",
+      "verticalAlign": "middle",
+      "containerId": "xDPUWf7m9nXCB78-A-vMP",
+      "originalText": "weight & bias",
+      "autoResize": true,
+      "lineHeight": 1.25
+    },
+    {
+      "id": "9rlu1cDuS6ea839qCFeyC",
+      "type": "rectangle",
+      "x": 420.04983792153826,
+      "y": 4202.256443572021,
+      "width": 156.66668701171875,
+      "height": 85,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "#ffc9c9",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5m",
+      "roundness": null,
+      "seed": 133917747,
+      "version": 1180,
+      "versionNonce": 206034067,
+      "isDeleted": false,
+      "boundElements": [
+        {
+          "type": "text",
+          "id": "OMatW1nuQfmSyfQVTADJH"
+        }
+      ],
+      "updated": 1767684575234,
+      "link": null,
+      "locked": false
+    },
+    {
+      "id": "OMatW1nuQfmSyfQVTADJH",
+      "type": "text",
+      "x": 464.1732128605031,
+      "y": 4219.756443572021,
+      "width": 68.41993713378906,
+      "height": 50,
+      "angle": 0,
+      "strokeColor": "#1e1e1e",
+      "backgroundColor": "transparent",
+      "fillStyle": "solid",
+      "strokeWidth": 2,
+      "strokeStyle": "solid",
+      "roughness": 1,
+      "opacity": 100,
+      "groupIds": [],
+      "frameId": null,
+      "index": "b5n",
+      "roundness": null,
+      "seed": 2026204627,
+      "version": 1205,
+      "versionNonce": 472238643,
+      "isDeleted": false,
+      "boundElements": [],
+      "updated": 1767684575234,
+      "link": null,
+      "locked": false,
+      "text": "model\nServing",
+      "fontSize": 20,
+      "fontFamily": 5,
+      "textAlign": "center",
+      "verticalAlign": "middle",
+      "containerId": "9rlu1cDuS6ea839qCFeyC",
+      "originalText": "model\nServing",
+      "autoResize": true,
+      "lineHeight": 1.25
     }
   ],
   "appState": {
diff --git a/specs/mvp/v3.8/ray_serve.md b/specs/mvp/v3.8/ray_serve.md
new file mode 100644
index 0000000..c2f8a96
--- /dev/null
+++ b/specs/mvp/v3.8/ray_serve.md
@@ -0,0 +1,314 @@
+
+API参考资料
+https://docs.ray.io/en/latest/serve/api/doc/ray.serve.llm.LLMConfig.html
+
+ray.serve.llm.LLMConfig
+pydantic model ray.serve.llm.LLMConfig[source]
+The configuration for starting an LLM deployment.
+
+PublicAPI (alpha): This API is in alpha and may change before becoming stable.
+
+field accelerator_type: str | None = None
+The type of accelerator runs the model on. Only the following values are supported: [‘V100’, ‘P100’, ‘T4’, ‘P4’, ‘K80’, ‘A10G’, ‘L4’, ‘L40S’, ‘A100’, ‘H100’, ‘H200’, ‘H20’, ‘B200’, ‘Intel-GPU-Max-1550’, ‘Intel-GPU-Max-1100’, ‘Intel-GAUDI’, ‘AMD-Instinct-MI100’, ‘AMD-Instinct-MI250X’, ‘AMD-Instinct-MI250X-MI250’, ‘AMD-Instinct-MI210’, ‘AMD-Instinct-MI300A’, ‘AMD-Instinct-MI300X-OAM’, ‘AMD-Instinct-MI300X-HF’, ‘AMD-Instinct-MI308X’, ‘AMD-Instinct-MI325X-OAM’, ‘AMD-Instinct-MI350X-OAM’, ‘AMD-Instinct-MI355X-OAM’, ‘AMD-Radeon-R9-200-HD-7900’, ‘AMD-Radeon-HD-7900’, ‘aws-neuron-core’, ‘TPU-V2’, ‘TPU-V3’, ‘TPU-V4’, ‘TPU-V5P’, ‘TPU-V5LITEPOD’, ‘TPU-V6E’, ‘Ascend910B’, ‘Ascend910B4’, ‘MXC500’, ‘MXC550’, ‘A100-40G’, ‘A100-80G’]
+
+field callback_config: CallbackConfig [Optional]
+Callback configuration to use for model initialization. Can be a string path to a class or a Callback subclass.
+
+field deployment_config: Dict[str, Any] [Optional]
+The Ray @server.deployment options. Supported fields are: name, num_replicas, ray_actor_options, max_ongoing_requests, autoscaling_config, max_queued_requests, user_config, health_check_period_s, health_check_timeout_s, graceful_shutdown_wait_loop_s, graceful_shutdown_timeout_s, logging_config, request_router_config. For more details, see the Ray Serve Documentation.
+
+field engine_kwargs: Dict[str, Any] = {}
+Additional keyword arguments for the engine. In case of vLLM, this will include all the configuration knobs they provide out of the box, except for tensor-parallelism which is set automatically from Ray Serve configs.
+
+field experimental_configs: Dict[str, Any] [Optional]
+Experimental configurations for Ray Serve LLM. This is a dictionary of key-value pairs. Current supported keys are: - stream_batching_interval_ms: Ray Serve LLM batches streaming requests together. This config decides how long to wait for the batch before processing the requests. Defaults to 50.0. - num_ingress_replicas: The number of replicas for the router. Ray Serve will take the max amount all the replicas. Default would be 2 router replicas per model replica.
+
+field llm_engine: str = 'vLLM'
+The LLMEngine that should be used to run the model. Only the following values are supported: [‘vLLM’]
+
+field log_engine_metrics: bool | None = True
+Enable additional engine metrics via Ray Prometheus port.
+
+field lora_config: Dict[str, Any] | LoraConfig | None = None
+Settings for LoRA adapter. Validated against LoraConfig.
+
+field model_loading_config: Dict[str, Any] | ModelLoadingConfig [Required]
+The settings for how to download and expose the model. Validated against ModelLoadingConfig.
+
+field placement_group_config: Dict[str, Any] | None = None
+Ray placement group configuration for scheduling vLLM engine workers. Defines resource bundles and placement strategy for multi-node deployments. Should contain ‘bundles’ (list of resource dicts) and optionally ‘strategy’ (defaults to ‘PACK’). Example: {‘bundles’: [{‘GPU’: 1, ‘CPU’: 2}], ‘strategy’: ‘PACK’}
+
+field runtime_env: Dict[str, Any] | None = None
+The runtime_env to use for the model deployment replica and the engine workers.
+
+apply_checkpoint_info(model_id_or_path: str, trust_remote_code: bool = False) → None[source]
+Apply the checkpoint info to the model config.
+
+classmethod from_file(path: str, **kwargs) → ModelT
+Load a model from a YAML file path.
+
+get_engine_config() → None | VLLMEngineConfig[source]
+Returns the engine config for the given LLM config.
+
+LLMConfig not only has engine config but also deployment config, etc.
+
+get_or_create_callback() → CallbackBase | None[source]
+Get or create the callback instance for this process.
+
+This ensures one callback instance per process (singleton pattern). The instance is cached so the same object is used across all hooks.
+
+Returns
+:
+Instance of class that implements Callback
+
+multiplex_config() → ServeMultiplexConfig[source]
+classmethod parse_yaml(file, **kwargs) → ModelT
+setup_engine_backend()[source]
+update_engine_kwargs(**kwargs: Any) → None[source]
+Update the engine_kwargs and the engine_config engine_kwargs.
+
+This is typically called during engine starts, when certain engine_kwargs (e.g., data_parallel_rank) become available.
+
+validator validate_accelerator_type  »  accelerator_type[source]
+validator validate_deployment_config  »  deployment_config[source]
+Validates the deployment config dictionary.
+
+validator validate_experimental_configs  »  experimental_configs[source]
+Validates the experimental configs dictionary.
+
+validator validate_llm_engine  »  llm_engine[source]
+Validates the llm_engine string value.
+
+validator validate_lora_config  »  lora_config[source]
+Validates the lora config dictionary.
+
+validator validate_model_loading_config  »  model_loading_config[source]
+Validates the model loading config dictionary.
+
+property input_modality: str
+Returns the input modality of the model. There could be more types in the future. Right now assumes if the model doesn’t support version, it’ll be text.
+
+property max_request_context_length: int | None
+property model_architecture: str
+property model_id: str
+property supports_vision: bool
+
+# Python API
+ray serve api
+https://docs.ray.io/en/latest/serve/api/index.html#serve-api
+
+
+Python API
+Writing Applications
+serve.Deployment
+
+Class (or function) decorated with the @serve.deployment decorator.
+
+serve.Application
+
+One or more deployments bound with arguments that can be deployed together.
+
+Deployment Decorators
+serve.deployment
+
+Decorator that converts a Python class to a Deployment.
+
+serve.ingress
+
+Wrap a deployment class with an ASGI application for HTTP request parsing.
+
+serve.batch
+
+Converts a function to asynchronously handle batches.
+
+serve.multiplexed
+
+Wrap a callable or method used to load multiplexed models in a replica.
+
+Deployment Handles
+Note
+
+The deprecated RayServeHandle and RayServeSyncHandle APIs have been fully removed as of Ray 2.10. See the model composition guide for how to update code to use the DeploymentHandle API instead.
+
+serve.handle.DeploymentHandle
+
+A handle used to make requests to a deployment at runtime.
+
+serve.handle.DeploymentResponse
+
+A future-like object wrapping the result of a unary deployment handle call.
+
+serve.handle.DeploymentResponseGenerator
+
+A future-like object wrapping the result of a streaming deployment handle call.
+
+Running Applications
+serve.start
+
+Start Serve on the cluster.
+
+serve.run
+
+Run an application and return a handle to its ingress deployment.
+
+serve.delete
+
+Delete an application by its name.
+
+serve.status
+
+Get the status of Serve on the cluster.
+
+serve.shutdown
+
+Completely shut down Serve on the cluster.
+
+serve.shutdown_async
+
+Completely shut down Serve on the cluster asynchronously.
+
+Configurations
+serve.config.ProxyLocation
+
+Config for where to run proxies to receive ingress traffic to the cluster.
+
+serve.config.gRPCOptions
+
+gRPC options for the proxies.
+
+serve.config.HTTPOptions
+
+HTTP options for the proxies.
+
+serve.config.AutoscalingConfig
+
+Config for the Serve Autoscaler.
+
+serve.config.AutoscalingPolicy
+
+PublicAPI (alpha): This API is in alpha and may change before becoming stable.
+
+serve.config.AutoscalingContext
+
+Rich context provided to custom autoscaling policies.
+
+serve.config.AggregationFunction
+
+An enumeration.
+
+serve.config.RequestRouterConfig
+
+Config for the Serve request router.
+
+Schemas
+serve.schema.ServeActorDetails
+
+Detailed info about a Ray Serve actor.
+
+serve.schema.ProxyDetails
+
+Detailed info about a Ray Serve ProxyActor.
+
+serve.schema.ApplicationStatusOverview
+
+Describes the status of an application and all its deployments.
+
+serve.schema.ServeStatus
+
+Describes the status of Serve.
+
+serve.schema.DeploymentStatusOverview
+
+Describes the status of a deployment.
+
+serve.schema.EncodingType
+
+Encoding type for the serve logs.
+
+serve.schema.AutoscalingMetricsHealth
+
+An enumeration.
+
+serve.schema.AutoscalingStatus
+
+An enumeration.
+
+serve.schema.ScalingDecision
+
+One autoscaling decision with minimal provenance.
+
+serve.schema.DeploymentAutoscalingDetail
+
+Deployment-level autoscaler observability.
+
+serve.schema.ReplicaRank
+
+Replica rank model.
+
+Request Router
+serve.request_router.ReplicaID
+
+A unique identifier for a replica.
+
+serve.request_router.PendingRequest
+
+A request that is pending execution by a replica.
+
+serve.request_router.RunningReplica
+
+Contains info on a running replica.
+
+serve.request_router.FIFOMixin
+
+Mixin for FIFO routing.
+
+serve.request_router.LocalityMixin
+
+Mixin for locality routing.
+
+serve.request_router.MultiplexMixin
+
+Mixin for multiplex routing.
+
+serve.request_router.RequestRouter
+
+Abstract interface for a request router (how the router calls it).
+
+Advanced APIs
+serve.get_replica_context
+
+Returns the deployment and replica tag from within a replica at runtime.
+
+serve.context.ReplicaContext
+
+Stores runtime context info for replicas.
+
+serve.get_multiplexed_model_id
+
+Get the multiplexed model ID for the current request.
+
+serve.get_app_handle
+
+Get a handle to the application's ingress deployment by name.
+
+serve.get_deployment_handle
+
+Get a handle to a deployment by name.
+
+serve.grpc_util.RayServegRPCContext
+
+Context manager to set and get gRPC context.
+
+serve.exceptions.BackPressureError
+
+Raised when max_queued_requests is exceeded on a DeploymentHandle.
+
+serve.exceptions.RayServeException
+
+serve.exceptions.RequestCancelledError
+
+Raise when a Serve request is cancelled.
+
+serve.exceptions.DeploymentUnavailableError
+
+Raised when a Serve deployment is unavailable to receive requests.
\ No newline at end of file
diff --git a/specs/mvp/v3.8/ray_serve_llm.md b/specs/mvp/v3.8/ray_serve_llm.md
new file mode 100644
index 0000000..89e61aa
--- /dev/null
+++ b/specs/mvp/v3.8/ray_serve_llm.md
@@ -0,0 +1,87 @@
+
+基于提供的来源，以下是使用 **Builder Pattern（构建器模式）** 结合 Ray Serve 和 vllm 动态部署**中型大语言模型（Medium-sized LLM）**的原理与操作方案。
+
+### 一、 核心原理
+
+1.  **中型 LLM 定义**：中型模型（如 Llama-3.1-70B）通常具有约 70B 参数。它们通常运行在**单个节点**上，利用 **4 到 8 个 GPU**。
+2.  **Builder Pattern 机制**：该模式通过 `build_openai_app` 函数提供高度抽象。开发者只需定义一个 `LLMConfig` 对象，即可自动构建并链接底层的 `LLMServer` 和 `OpenAiIngress` 组件。
+3.  **高性能后端 (vLLM)**：Ray Serve LLM 使用 vLLM 作为推理引擎，支持高性能推理和显存管理。
+4.  **动态扩缩容与资源调度**：
+    *   **张量并行 (Tensor Parallelism)**：通过 `tensor_parallel_size` 将模型权重均匀分布在单节点的所有 GPU 上。
+    *   **副本缩放 (Autoscaling)**：通过 `autoscaling_config` 动态调整 `min_replicas` 和 `max_replicas`，使服务能根据实时流量增减推理副本。
+
+---
+
+### 二、 操作方案
+
+#### 1. 环境准备
+确保已安装必要的依赖包并配置 Hugging Face 访问令牌（针对 Llama-3.1 等受限模型）。
+```bash
+pip install "ray[serve,llm]"
+export HF_TOKEN=<YOUR_HUGGINGFACE_TOKEN>
+```
+
+#### 2. 编写部署脚本 (`serve_medium_llm.py`)
+使用 **Builder Pattern** 定义配置并构建应用。以下示例配置了一个典型的 70B 模型部署：
+
+```python
+# serve_medium_llm.py
+from ray.serve.llm import LLMConfig, build_openai_app
+import os
+
+llm_config = LLMConfig(
+    model_loading_config=dict(
+        model_id="my-llama-3.1-70b",
+        model_source="meta-llama/Llama-3.1-70B-Instruct",
+    ),
+    accelerator_type="A100-40G",  # 或 L40S
+    deployment_config=dict(
+        autoscaling_config=dict(
+            min_replicas=1,      # 最小副本数
+            max_replicas=4,      # 最大副本数，实现动态扩展
+        )
+    ),
+    runtime_env=dict(env_vars={"HF_TOKEN": os.environ.get("HF_TOKEN")}),
+    engine_kwargs=dict(
+        max_model_len=32768,      # 上下文长度
+        tensor_parallel_size=8,   # 在单节点的 8 个 GPU 间拆分权重
+    ),
+)
+
+# 使用 Builder Pattern 构建应用
+app = build_openai_app({"llm_configs": [llm_config]})
+```
+
+#### 3. 启动部署
+在终端运行以下命令启动服务：
+```bash
+serve run serve_medium_llm:app
+```
+部署过程通常需要几分钟，包括配置集群、启动 vLLM 服务器以及下载模型权重。
+
+#### 4. 发送请求测试
+服务启动后，可以通过符合 OpenAI 标准的接口进行访问。
+```python
+from openai import OpenAI
+
+client = OpenAI(base_url="http://localhost:8000/v1", api_key="FAKE_KEY")
+response = client.chat.completions.create(
+    model="my-llama-3.1-70b",
+    messages=[{"role": "user", "content": "解释一下什么是量子纠缠？"}],
+    stream=True
+)
+for chunk in response:
+    if chunk.choices.delta.content:
+        print(chunk.choices.delta.content, end="", flush=True)
+```
+
+---
+
+### 三、 性能与并发优化建议
+
+*   **提高并发量**：可以通过降低 `max_model_len` 来减少 KV 缓存所需的显存，从而显著提升每个副本支持的最大并发请求数。
+*   **监控指标**：通过 Ray Serve LLM 仪表盘监控 **TTFT（首字延迟）**、**TPOT（单字延迟）** 和 **Token 吞吐量** 来评估服务性能。
+*   **精度折衷**：对于资源受限的场景，可以使用**量化模型**（如 FP8）来减少模型内存占用，为 KV 缓存留出更多空间，进而提高并发能力。
+
+**比喻理解**：
+部署**中型 LLM** 就像是在一个大型车间里组装一台复杂的精密机器（模型权重）。**Builder Pattern** 是你的“全自动组装线”，你只需设定好机器的参数（Config），生产线就会自动帮你把零件固定好并接通电源。而 **vLLM 和张量并行** 就像是让 8 个熟练工人（GPU）共同抬起这台沉重的机器，每个人只负责自己那一部分的力气，从而让机器能够平稳地运转。
\ No newline at end of file
diff --git a/specs/mvp/v3.8/requirements.md b/specs/mvp/v3.8/requirements.md
new file mode 100644
index 0000000..aaaa642
--- /dev/null
+++ b/specs/mvp/v3.8/requirements.md
@@ -0,0 +1,8 @@
+
+1. 通过ray serve（后端vllm）来动态拉起llm，支持多模型application部署，
+2. 默认一个模型只有一个replica，用户配置可以多个
+3. 用户可以删除（下线）模型
+4. 可以指定模型用几张卡
+5. 通过WebUI来进行配置，查看当前部署的模型列表，以及可以查看详情
+6. 模型路径可以使用common，也可以用户自己指定user路径
+7. 
\ No newline at end of file
diff --git a/specs/mvp/v3.8/v3.8_api.md b/specs/mvp/v3.8/v3.8_api.md
new file mode 100644
index 0000000..813f2a9
--- /dev/null
+++ b/specs/mvp/v3.8/v3.8_api.md
@@ -0,0 +1,224 @@
+# MVP v3.8 API Reference（Serving）
+
+> 说明：本节为 v3.8 新增的 **Model Serving** API（Ray Serve LLM / vLLM）。  
+> 认证：Serving 管理 API 复用现有 MVP API 的认证方式（`Authorization: Bearer <user_token>`）。  
+> 推理：对外 OpenAI endpoint **不做鉴权**（v3.8 约定）。
+
+## 0. 基本信息
+
+### 0.1 Base URLs
+
+- MVP API server：`http://<host>:8080`
+- Ray Serve OpenAI ingress（固定端口 8000）：`http://<host>:8000/v1`
+
+### 0.2 认证
+
+所有 `/api/v2/serve/*` 接口要求：
+
+```
+Authorization: Bearer <user_token>
+```
+
+其中 `user_token` 由管理员通过 `/api/v2/users/<user_id>/tokens` 颁发（沿用现有机制）。
+
+### 0.3 命名规则：`model_id = user_id-YYYYMMDDHHMM-<suffix>`
+
+- 用户提交时填写 `model_id`（语义为 suffix，例如 `qwen-0.5b`）
+- 平台生成前缀：
+  - `prefix = "<user_id>-<YYYYMMDDHHMM>"`
+- 平台实际对外暴露的 OpenAI model 名称为：
+  - `model_id = "<prefix>-<suffix>"`
+  - 示例：`alice-202601061235-qwen-0.5b`
+
+## 1. 数据结构
+
+### 1.1 ServingSpec（YAML）
+
+请求体建议使用 YAML（与 TaskSpec 一致），示例：
+
+```yaml
+model_id: qwen-0.5b                      # 必填：suffix（平台自动加 user_id- 前缀）
+model_source: $HOME/common/hf/.../<sha>  # 必填：本地路径或 repo id；平台做 $HOME 宏替换与路径校验
+num_replicas: 1                          # 可选，默认 1
+gpus_per_replica: 1                      # 可选，默认 1
+# engine_kwargs:                         # 可选：vLLM 参数透传（白名单/黑名单由实现决定）
+#   max_model_len: 8192
+#   gpu_memory_utilization: 0.9
+```
+
+说明：
+- `accelerator_type` 不在 ServingSpec 中暴露；由平台配置（`dev.yaml` 的 `serving.llm.accelerator_type`）统一注入到 Ray Serve LLM 的 `LLMConfig.accelerator_type`（dev/h1: `H20`）。
+
+#### 宏替换
+
+- `$HOME` → `/private/users/<user_id>`
+- `$HOME/common/hf` → `/private/hf`
+- `$HOME/common/datasets` → `/private/datasets`（serving 不强依赖，但保留一致语义）
+
+#### 路径校验（v3.8 约定）
+
+`model_source` 允许：
+
+- `/private/hf/...`（common）
+- `/private/users/<user_id>/...`（user）
+
+拒绝：
+
+- 其它用户目录
+- 非 `/private` 下路径
+- 空路径或包含 `..` 的可疑路径
+
+### 1.2 ServingModel（响应体，JSON）
+
+```json
+{
+  "model_key": "svc-alice-20260106-123000-abcd",
+  "user_id": "alice",
+  "model_id": "alice-202601061235-qwen-0.5b",
+  "model_id_suffix": "qwen-0.5b",
+  "model_id_prefix": "alice-202601061235",
+  "model_source": "/private/hf/hub/models--.../snapshots/<sha>",
+  "num_replicas": 1,
+  "gpus_per_replica": 1,
+  "total_gpus": 1,
+  "state": "RUNNING",
+  "endpoint": {
+    "openai_base_url": "http://<host>:8000/v1",
+    "model": "alice-202601061235-qwen-0.5b"
+  },
+  "error_summary": null,
+  "created_at": "2026-01-06T12:30:00Z",
+  "updated_at": "2026-01-06T12:31:02Z"
+}
+```
+
+## 2. 管理 API（MVP API server）
+
+### 2.1 Create / Upsert model
+
+`POST /api/v2/serve/models`
+
+#### Request
+
+- Header: `Content-Type: application/yaml`
+- Body: ServingSpec（YAML）
+
+#### Response (202)
+
+```json
+{
+  "model_key": "svc-alice-20260106-123000-abcd",
+  "state": "QUEUED"
+}
+```
+
+语义：
+- 创建新模型（若 suffix 不存在）
+- 或更新已有模型（若同一用户同一 suffix 已存在）：更新 replicas/gpu 等配置，进入 `QUEUED` 等待 reconciler apply
+
+### 2.2 List models (current user)
+
+`GET /api/v2/serve/models`
+
+#### Response (200)
+
+```json
+{
+  "items": [ ... ServingModel ... ],
+  "openai_base_url": "http://<host>:8000/v1"
+}
+```
+
+### 2.3 Get model detail
+
+`GET /api/v2/serve/models/{model_key}`
+
+#### Response (200)
+
+```json
+{
+  "model": { ... ServingModel ... },
+  "resolved_spec_yaml": "model_id: ...\nmodel_source: ...\n",
+  "events": [
+    { "event_type": "DEPLOY_REQUESTED", "created_at": "...", "payload": {...} }
+  ],
+  "serve_status": {
+    "app_name": "argus_llm_app",
+    "app_status": "RUNNING"
+  }
+}
+```
+
+### 2.4 Scale replicas (PATCH)
+
+`PATCH /api/v2/serve/models/{model_key}`
+
+#### Request (JSON)
+
+```json
+{ "num_replicas": 2 }
+```
+
+#### Response (200)
+
+```json
+{ "model_key": "...", "state": "QUEUED" }
+```
+
+> v3.8 只支持修改 `num_replicas`（以及可选 engine_kwargs）；`gpus_per_replica` 若修改，可能触发重新部署。
+
+### 2.5 Delete / Undeploy model
+
+`DELETE /api/v2/serve/models/{model_key}`
+
+#### Response (200)
+
+```json
+{ "model_key": "...", "state": "DELETING" }
+```
+
+语义：从“声明式配置”中删除该模型，reconciler 会在下一轮 tick 触发 `serve.run(...)` 更新 app 配置并最终使其不可见。
+
+### 2.6 Admin: Serve cluster status（可选）
+
+`GET /api/v2/serve/status`
+
+#### Response (200)
+
+返回 `serve.status()` 摘要（集群级 + app 级）。
+
+> 仅 admin token 可访问（沿用 v3.x admin gate）。
+
+## 3. 推理 API（Ray Serve OpenAI ingress）
+
+> v3.8 不做鉴权：无需 `Authorization`。
+
+### 3.1 List models
+
+`GET http://<host>:8000/v1/models`
+
+返回可用 model 列表（包含 `alice-qwen-0.5b` 这类带前缀名称）。
+
+### 3.2 Chat completions
+
+`POST http://<host>:8000/v1/chat/completions`
+
+```json
+{
+  "model": "alice-202601061235-qwen-0.5b",
+  "messages": [{"role":"user","content":"Hello"}],
+  "stream": false
+}
+```
+
+### 3.3 Completions / Embeddings
+
+按 Ray Serve LLM OpenAI ingress 支持范围提供（v3.8 验收至少覆盖 chat）。
+
+## 4. 错误码约定（MVP API server）
+
+- `400 invalid yaml/spec`：YAML 解析失败、字段缺失、值不合法
+- `403 forbidden`：路径越权（model_source 访问其他用户目录）
+- `409 conflict`：model_id_suffix 冲突（同一用户重复创建且不允许覆盖时；若选择 upsert 则不返回该错误）
+- `422 unprocessable`：资源参数非法（replica/gpu <=0）
+- `500 internal`：reconciler/serve 调用异常（详情记录到 `serve_events`，并写入 `error_summary`）
diff --git a/specs/mvp/v3.8/v3.8_design.md b/specs/mvp/v3.8/v3.8_design.md
new file mode 100644
index 0000000..a69a99c
--- /dev/null
+++ b/specs/mvp/v3.8/v3.8_design.md
@@ -0,0 +1,371 @@
+# MVP v3.8 详细设计方案：Ray Serve（vLLM）模型动态部署与管理
+
+> 基线：当前已具备 v3.7 能力（训练平台 + W&B + SFTPGo + WebUI/API + Ray stateless pool，训练侧默认 rollout=vllm）。  
+> v3.8 目标：在同一套 Ray 集群上，引入 **Ray Serve LLM（后端 vLLM）** 的模型推理服务能力，并通过 WebUI/API 动态管理模型生命周期。
+
+## 0. 需求范围（来自 requirements.md）
+
+1) 通过 Ray Serve（后端 vLLM）动态拉起 LLM，支持**多模型 application** 部署  
+2) 默认一个模型 1 个 replica，用户可配置多个  
+3) 用户可删除（下线）模型  
+4) 用户可指定模型使用几张 GPU  
+5) WebUI 可配置、查看模型列表、查看详情  
+6) 模型路径可用 common，也可用 user 路径（本地路径）
+
+## 1. 总体架构
+
+### 1.1 组件关系
+
+v3.8 在现有“训练平台”之上新增一个 **Serving 子系统**：
+
+- **API server（现有）**
+  - 新增 Serving API（模型部署/删除/扩缩容/状态）
+  - 新增 Serving 后台线程（reconciler）：周期性对齐 DB 与 Ray Serve 实际状态
+- **SQLite（现有）**
+  - 新增 `serve_models`、`serve_events` 等表，保存声明式配置与状态
+- **Ray 集群（现有 stateless pool）**
+  - 复用现有 head/worker 容器
+  - 在集群内启动 Ray Serve（controller + proxy + deployments）
+- **Ray Serve LLM（新增）**
+  - 通过 `ray.serve.llm.build_openai_app` 构建一个 OpenAI-compatible app
+  - app 内包含多个 `LLMConfig`（每个对应一个模型）
+
+### 1.2 为什么选择“单个 multi-model application”
+
+Ray Serve 支持 multi-app，但在 dev/docker 场景下多个 app 的 route_prefix 管理更复杂；同时 requirements 要求“多模型 application 部署”，因此 v3.8 采用：
+
+- 一个固定的 app：`argus_llm_app`（名字可配置）
+- route_prefix 固定为 `/`（对外暴露 `/v1/...` OpenAI 接口）
+- 每个模型对应一个 `LLMConfig`，通过 `model_id` 区分（即 OpenAI API 里的 `model` 字段）
+
+这样对用户而言最直观：
+
+- base_url 固定：`http://<host>:8000/v1`
+- `model=` 选择不同模型（`/v1/models` 自动列出）
+
+## 2. Ray Serve 部署策略（dev/h1 约束）
+
+### 2.1 HTTP 入口端口与 docker compose
+
+Ray Serve 默认 HTTP 端口是 `8000`。v3.8 约定：
+
+- 在 **head 容器** 映射 `8000:8000`
+- API server 仍在 `8080`
+- Ray Dashboard 在 `8265`
+
+原因：在单机多容器 docker 环境里，如果让 proxy “每个节点都起”，会出现多个容器同时想绑定同一个 host 端口的问题（不可行）。因此 v3.8 推荐：
+
+- Serve proxy 位置设为 **HeadOnly**（只在 head 上提供 HTTP 入口）
+- GPU replica 仍运行在 worker 上（proxy 只转发，不跑推理）
+
+> 需要注意：
+> - Serve 的 HTTP 配置（host/port/proxy_location）是 **Ray 集群全局配置**，启动后无法动态修改，因此应当在平台启动时一次性设定并持久化。
+> - proxy Actor 需要 CPU 资源；head 节点的 `num-cpus=0` 策略可能需要在 v3.8 做小幅调整（例如给 head 保留少量 CPU），但仍通过 `entrypoint_resources` 确保训练 driver 不会被调度到 head。
+
+#### 2.1.1 compose 预期改动（v3.8 实现时落地）
+
+- `src/mvp/docker-compose.yaml`（ray_head）新增：
+  - `ports: - "8000:8000"`
+
+> worker 容器不暴露 8000（避免 host 端口冲突），由 head proxy 统一对外提供入口。
+
+### 2.2 启动/配置方式（Python SDK 优先）
+
+v3.8 采用 Ray Serve Python SDK：
+
+- `ray.init(address="auto")`
+- `serve.start(proxy_location="HeadOnly", http_options={"host":"0.0.0.0","port":8000})`（一次性全局配置）
+- `serve.run(app, name=<app_name>, route_prefix="/")`
+- `serve.delete(name=<app_name>)`（必要时）
+- `serve.status()` 查询集群/应用状态
+
+理由：
+
+- 避免在平台内部引入额外 REST client 依赖（并减少跨版本 REST schema 不稳定风险）
+- API server 本身运行在 head 容器内，可直接 `ray.init(address="auto")` 连接现有集群
+
+> 另：Ray Dashboard 暴露 Serve REST API（`PUT /api/serve/applications/` 等）可作为备选方案，但 v3.8 先不以它为主通路。
+
+### 2.3 依赖与镜像假设
+
+v3.8 依赖：
+
+- `ray[serve]`（Serve Controller/Proxy）
+- `ray[llm]`（Ray Serve LLM 的 `ray.serve.llm` 模块）
+- vLLM（推理引擎）
+
+由于 v3.7 已切换到 `verlai/verl:vllm011.latest`，预期镜像内包含 vLLM；但 `ray.serve.llm` 是否开箱即用需要在实现阶段确认。
+若缺失，v3.8 将在 `argus-ray-node` 镜像构建阶段补充 `pip install "ray[serve,llm]"`（或按官方建议的最小依赖）并做版本锁定。
+
+### 2.4 Serving 配置（dev.yaml）
+
+v3.8 新增一段 serving 配置，至少包含：
+
+```yaml
+serving:
+  serve:
+    http_port: 8000              # 固定 8000
+    proxy_location: HeadOnly     # dev/docker 下推荐
+  llm:
+    accelerator_type: H20        # dev 环境填写 H20（对应 ray.serve.llm.LLMConfig.accelerator_type）
+```
+
+说明：
+- `accelerator_type` 是 Ray Serve LLM 的 `LLMConfig.accelerator_type` 字段，用于表达“该模型运行在哪类加速卡上”。在 dev/h1 环境我们固定为 `H20`。
+- v3.8 不把 `accelerator_type` 暴露给普通用户编辑（避免误配）；由部署环境配置统一决定。
+
+## 3. 模型配置与资源映射
+
+### 3.1 关键配置对象：`ray.serve.llm.LLMConfig`
+
+每个模型部署由一个 `LLMConfig` 描述，关键字段（v3.8 用到的子集）：
+
+- `model_loading_config`
+  - `model_id`: 对外展示/请求时用的模型名（唯一 key）
+  - `model_source`: HF repo id / S3 / **local path**
+- `accelerator_type`
+  - 从 `dev.yaml` 的 `serving.llm.accelerator_type` 读取（dev/h1: `H20`）
+- `deployment_config`
+  - `num_replicas` 或 `autoscaling_config`（v3.8 先用固定 `num_replicas`）
+  - `ray_actor_options`（CPU/资源约束）
+- `engine_kwargs`
+  - vLLM 相关参数（`max_model_len`、`gpu_memory_utilization` 等）
+- `placement_group_config`
+  - 控制 vLLM engine workers 使用的资源 bundle（用于多 GPU / 跨节点）
+- `runtime_env`
+  - 注入 HF cache、离线开关等环境变量
+
+### 3.2 GPU 张数（gpus_per_replica）如何落到 LLMConfig
+
+v3.8 把用户输入的：
+
+- `gpus_per_replica = N`
+
+映射为：
+
+- `engine_kwargs.tensor_parallel_size = N`（单机/跨机张量并行，Ray Serve LLM 官方示例写法）
+- `placement_group_config = {"bundles": [{"GPU": 1, "CPU": <cpu_per_gpu>}] * N, "strategy": "PACK"}`
+
+并在 `engine_kwargs` 中保留 vLLM 其他参数（`max_model_len`、`gpu_memory_utilization` 等）。
+
+> 兼容性说明：Ray Serve LLM/Serve LLM 仍处于快速演进阶段；v3.8 会以我们线上实际 Ray 版本为准做最小适配与回归测试。
+
+### 3.2.1 跨节点场景（N > 单机 GPU）
+
+Ray Serve LLM 默认使用 `PACK` 策略，优先把 GPU worker 放在尽量少的节点上；如果单机放不下，会自动 spill 到其它节点，从而支持跨节点张量并行（TP）部署。
+
+### 3.3 replica 数（num_replicas）
+
+v3.8 默认：
+
+- `num_replicas = 1`
+
+允许用户在 UI 中设置为 `>=1`。  
+多 replica 会线性消耗 GPU（`num_replicas * gpus_per_replica`），需要做资源预检查。
+
+### 3.4 模型路径与宏替换（common / user）
+
+v3.8 支持两类模型来源：
+
+1) **common**
+- 典型为 `/private/hf/...`（共享 HF cache / snapshot）
+
+2) **user**
+- `/private/users/<user_id>/models/...`
+- 以及用户训练输出（例如 `jobs/<sid>/checkpoints/.../huggingface`）
+
+为保证 UI 易用，沿用平台已有的宏语义：
+
+- `$HOME` → `/private/users/<user_id>`
+- `$HOME/common/hf` → `/private/hf`
+
+并进行路径校验：
+
+- 允许前缀：`/private/hf`、`/private/users/<user_id>/`
+- 拒绝：越权访问其他用户目录、或访问系统敏感路径
+
+### 3.5 离线模式（避免 HF mirror 429）
+
+v3.7 训练侧已验证 `HF_HUB_OFFLINE=1` 的必要性。v3.8 Serving 侧同样默认注入：
+
+- `HF_HOME=/private/hf`
+- `HUGGINGFACE_HUB_CACHE=/private/hf/hub`
+- `TRANSFORMERS_CACHE=/private/hf/transformers`
+- `HF_HUB_OFFLINE=1`
+- `HF_ENDPOINT=https://hf-mirror.com`（可保留，但离线模式下不应触发网络）
+
+并建议用户在 ServingSpec 中尽量填写 **local path** 作为 `model_source`，而不是直接 repo id。
+
+## 4. 平台数据模型（SQLite）
+
+新增两张主表：
+
+### 4.1 `serve_models`
+
+每一行代表一个“声明式模型部署”：
+
+- `model_key`（平台内部唯一 ID，便于重命名/去重）
+- `user_id`
+- `model_id`（对外 OpenAI model 名称，要求 per-app 唯一）
+- `model_source`（本地路径或 repo id，存 resolved 后的结果）
+- `num_replicas`
+- `gpus_per_replica`
+- `engine_kwargs_json`（可选）
+- `state`：`QUEUED | DEPLOYING | RUNNING | FAILED | DELETING | DELETED`
+- `serve_app_name`（默认 `argus_llm_app`）
+- `created_at / updated_at`
+- `error_summary`
+
+### 4.2 `serve_events`
+
+记录关键事件与排障信息（类似 task_events）：
+
+- `id`
+- `model_key`
+- `event_type`（DEPLOY_REQUESTED/DEPLOY_APPLIED/STATUS_SYNC/DELETE_REQUESTED/...）
+- `payload_json`
+- `created_at`
+
+## 5. API 设计（新增）
+
+在现有 `Authorization: Bearer <user_token>` 的认证体系下，新增 Serving API（路径仅示意，具体在实现时与现有 `api/v2` 对齐）。
+
+### 5.1 用户接口
+
+- `POST /api/v2/serve/models`
+  - body: YAML 或 JSON（v3.8 先用 YAML 与现有 TaskSpec 一致）
+  - 创建/更新（upsert）一个模型配置，进入 `QUEUED`
+- `GET /api/v2/serve/models`
+  - 列出当前用户的模型列表（含 state、资源、endpoint）
+- `GET /api/v2/serve/models/{model_key}`
+  - 详情：完整 spec + 最近事件 + Serve status 摘要
+- `PATCH /api/v2/serve/models/{model_key}`
+  - 修改 `num_replicas`、或 engine_kwargs（可选）
+- `DELETE /api/v2/serve/models/{model_key}`
+  - 下线模型（进入 `DELETING`）
+
+### 5.2 系统接口（admin）
+
+- `GET /api/v2/serve/status`（admin）
+  - 返回 `serve.status()` 的摘要（集群级 / app 级）
+
+### 5.3 对外推理 endpoint
+
+固定输出到 UI/接口中：
+
+- `openai_base_url = http://<host>:8000/v1`
+- 支持：
+  - `/v1/chat/completions`
+  - `/v1/completions`
+  - `/v1/embeddings`
+  - `/v1/models`
+
+> v3.8 不做额外网关与鉴权（保持与现有 dev 环境一致）；若后续需要，可在 v3.9+ 引入 token 校验/反向代理。
+
+### 5.4 `model_id` 前缀策略（user_id-）
+
+为避免多用户冲突并保持可读性：
+
+v3.8 采用“**user_id + 日期小时分钟**”作为稳定前缀，以降低冲突并便于快速定位创建时间：
+
+- 用户在 UI/API 中仅填写 `model_id_suffix`（或仍用字段名 `model_id`，但语义为 suffix）
+- 平台计算实际对外 `model_id`：
+  - `prefix = f"{user_id}-{YYYYMMDDHHMM}"`
+  - `model_id = f"{prefix}-{model_id_suffix}"`
+- 在列表/详情中同时展示：
+  - `model_id_suffix`（用户输入）
+  - `model_id_prefix`（平台生成，例如 `alice-202601061235`）
+  - `model_id`（对外 OpenAI 名称）
+
+## 6. 后台执行模型（Serving Reconciler）
+
+v3.8 参考任务 scheduler 的模式，引入一个轻量的 reconciler：
+
+- tick 周期（例如 5s）
+- 每次 tick：
+  1) 拉取 DB 中 `QUEUED/DEPLOYING/RUNNING/DELETING` 的模型
+  2) 调用 `serve.status()` 读取当前 app 及 deployments 状态
+ 3) 若存在 `QUEUED` 或需要变更的模型：构建新的 multi-model app（包含全部 `RUNNING/DEPLOYING/QUEUED` 的模型配置）并 `serve.run(...)`
+ 4) 若存在 `DELETING`：从 app 配置中移除对应模型，并 `serve.run(...)` 应用变更
+ 5) 更新每个模型的 state（依据 Serve status）
+
+重要行为说明（multi-model app 的代价）：
+- 每次“新增/删除/改 replicas”都会触发对同一个 app 的一次 `serve.run(...)` 更新；
+- Ray Serve 会尽量做增量更新，但在某些版本/配置下可能导致 ingress/router 短暂重启；
+- v3.8 先接受该代价（满足需求闭环优先）；若后续需要“删除某模型不影响其它模型”，可演进为“每模型一个 app + 单独 route_prefix”的方案。
+
+资源预检查：
+- 在 apply 前使用 `ray.available_resources()` 做粗粒度 GPU 预检查：
+  - 需要 GPU 总量 = `sum(num_replicas * gpus_per_replica)`（仅对“新增/扩容的差量”更精确）
+- 若不足：
+  - 模型保持 `QUEUED`，记录事件 `PENDING_RESOURCES`
+  - 用户 UI 显示“资源不足，等待释放”
+
+> v3.8 不引入更复杂的抢占/优先级。Serving 与 Training 会竞争 GPU；用户需要自行规划资源（或后续版本引入统一调度）。
+
+## 7. WebUI 设计（新增 Serving 页面）
+
+新增侧边栏入口：**Serving**
+
+### 7.1 Serving 列表页
+
+- 展示字段：
+  - model_id
+  - user_id（仅 admin 可见）
+  - replicas / gpus_per_replica / total_gpus
+  - state（RUNNING/DEPLOYING/QUEUED/FAILED）
+  - 操作：Scale（修改 replicas）、Delete
+
+### 7.2 Serving 创建/编辑页
+
+两种模式（与 New Task 类似，先做 YAML 模式即可）：
+
+示例 YAML（v3.8）：
+
+```yaml
+model_id: qwen-0.5b
+model_source: $HOME/common/hf/hub/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots/<sha>
+num_replicas: 1
+gpus_per_replica: 1
+# engine_kwargs:
+#   max_model_len: 8192
+#   gpu_memory_utilization: 0.9
+```
+
+### 7.3 Serving 详情页
+
+- 完整配置（resolved spec）
+- Serve status 摘要（deployments 状态、replica 健康）
+- OpenAI 调用示例（python openai client）
+
+## 8. 验收标准（v3.8）
+
+1) 部署：
+- 一键部署一个模型（1 replica、1 GPU）成功，状态变为 RUNNING
+- `/v1/models` 可列出该模型
+
+2) 扩缩容：
+- 修改 `num_replicas` 生效（Serve status 看到副本数变化）
+
+3) 多模型：
+- 同一个 app 内能同时部署 2 个模型（不同 model_id）
+- 通过 OpenAI 接口用不同 `model=` 请求可得到响应
+
+4) 下线：
+- 删除某模型后 `/v1/models` 不再出现
+
+5) 模型路径：
+- 支持 `/private/hf/...`（common）与 `/private/users/<user>/...`（user）两类本地路径
+
+6) 资源不足可解释：
+- 当 GPU 不足时，模型进入 `QUEUED` 并在 UI/详情中提示“资源不足”
+
+## 9. 待确认点（请你评审时确认）
+
+已确认（来自评审）：
+
+1) 推理端口固定使用 `8000`（Ray Serve 默认端口）。
+2) 对外暴露的 OpenAI 接口 **不与现有 token 体系绑定**（v3.8 不做推理侧鉴权）。
+3) `model_id` 命名规则：平台统一加 `user_id + 日期小时分钟` 前缀，用户在 UI 里只填写后缀部分。
+
+> 说明：这样可以避免跨用户 model_id 冲突，同时在 OpenAI API 的 `model=` 字段上自然可读。
diff --git a/specs/mvp/v3.8/v3.8_dev_plan.md b/specs/mvp/v3.8/v3.8_dev_plan.md
new file mode 100644
index 0000000..49950f9
--- /dev/null
+++ b/specs/mvp/v3.8/v3.8_dev_plan.md
@@ -0,0 +1,266 @@
+# MVP v3.8 开发计划（TDD，细化版）
+
+> 目标：在 v3.7 基础上引入 Ray Serve（vLLM）模型动态部署与管理（多模型单 app），并提供 WebUI + API 管理闭环。  
+> 约束（已确认）：
+> - 推理端口固定 `8000`（Serve HTTP）。
+> - 推理侧不接入现有 token 鉴权（对外 OpenAI endpoint 无鉴权）。
+> - 对外 `model_id` 统一加前缀：`<user_id>-<YYYYMMDDHHMM>-<suffix>`（用户只填 suffix）。
+> - `LLMConfig.accelerator_type` 从 `dev.yaml` 读取（dev/h1: `H20`）。
+
+本计划按“测试先行 → 实现 → 回归”的节奏拆分到可验证粒度；每个 milestone 都能单独验收。
+
+---
+
+## M0 - 基线与依赖探测（不改行为）
+
+**目的**：确认 v3.7 baseline 稳定，并明确 Ray Serve LLM 依赖是否已具备（否则后续会卡在镜像/依赖）。
+
+### M0.1 本地回归
+- [ ] `.venv/bin/python -m pytest` 通过（coverage ≥ 90%）
+
+### M0.2 远端回归（h1）
+- [ ] `src/mvp/scripts/run_all_v30_api.sh` 可跑通（确认训练闭环未回退）
+
+### M0.3 head 容器内依赖探测（记录结论）
+- [ ] `python3 -c "import ray; import ray.serve; print(ray.__version__)"`
+- [ ] `python3 -c "from ray.serve.llm import LLMConfig, build_openai_app; print('serve_llm_ok')"`
+- [ ] 若失败（例如缺 `gymnasium`）：记录缺失项，并在 M6 通过补齐 `ray[llm]` 解决
+
+### M0.4 配置探测
+- [ ] `configs/dev.yaml` 中存在：
+  - `serving.llm.accelerator_type: H20`
+  - `serving.serve.http_port: 8000`
+  - `serving.serve.proxy_location: HeadOnly`
+
+**验收**：
+- baseline 无回退；依赖探测结论明确（可用/不可用）
+
+---
+
+## M1 - ServingSpec（解析/校验/宏替换/路径校验）（单测驱动）
+
+**目的**：先把“输入”这层彻底固化（API/UI 复用），避免后期反复改 schema。
+
+### M1.1 新增/扩展数据模型
+- [ ] `ServingSpec`（输入）
+  - `model_id`（suffix）
+  - `model_source`（支持 `$HOME` 宏）
+  - `num_replicas`（default=1）
+  - `gpus_per_replica`（default=1）
+  - `engine_kwargs`（可选 dict，先原样存 DB；实现阶段再做白名单/黑名单）
+- [ ] `ResolvedServingSpec`（内部）
+  - `model_id_suffix`
+  - `model_id_prefix`（由平台生成：`user_id-YYYYMMDDHHMM`）
+  - `model_id`（对外：`<prefix>-<suffix>`）
+  - `model_source`（resolved path）
+
+### M1.2 规则（写成纯函数，便于测）
+- [ ] `validate_model_id_suffix(suffix)`：长度/字符集限制（建议：`[a-zA-Z0-9][a-zA-Z0-9._-]{0,63}`）
+- [ ] `$HOME` 宏替换：`$HOME`、`$HOME/common/hf`、`$HOME/common/datasets`
+- [ ] 路径校验（强制本地路径）：
+  - 允许：`/private/hf/...`、`/private/users/<user_id>/...`
+  - 拒绝：`..`、空、其它用户路径、非 `/private` 路径
+- [ ] `make_model_id_prefix(user_id, now_utc)`：`YYYYMMDDHHMM`（UTC）+ user_id
+
+### M1.3 单测（先写失败用例，再补实现）
+- [ ] `test_serving_spec_validation.py`
+  - suffix 合法/非法
+  - replicas/gpus 边界：0、负数、小数、超大值（按实现决定是否限制上限）
+- [ ] `test_serving_spec_paths.py`
+  - `$HOME` 替换正确
+  - 越权路径返回 403/ValueError（按接口层映射）
+  - `/private/hf` 与 `/private/users/<user>` 均可
+- [ ] `test_serving_model_id_prefix.py`
+  - 固定时间输入 → prefix 输出一致（避免时区/格式问题）
+
+**验收**：
+- 输入 spec 规则稳定；核心校验/替换均有单测覆盖
+
+---
+
+## M2 - SQLite 表结构与 Db 接口（单测驱动）
+
+**目的**：Serving 的声明式状态必须持久化，可审计、可恢复。
+
+### M2.1 DB schema
+- [ ] `serve_models`
+  - 主键：`model_key`（平台生成）
+  - unique：`(user_id, model_id_suffix)`（实现 upsert）
+  - 存储：resolved spec（包含 prefix/full model_id、resolved model_source）
+  - 状态：`QUEUED/DEPLOYING/RUNNING/FAILED/DELETING/DELETED`
+  - `error_summary`
+- [ ] `serve_events`（append-only）
+
+### M2.2 Db 方法
+- [ ] `upsert_serve_model(user_id, spec_yaml, now)` → (model_key, state)
+- [ ] `list_serve_models(user_id, include_deleted=False, limit/offset?)`
+- [ ] `get_serve_model(model_key)`
+- [ ] `set_serve_model_state(model_key, state, error_summary=None)`
+- [ ] `append_serve_event(model_key, event_type, payload_json=None)`
+- [ ] `pick_next_runnable_serve_change()`（给 reconciler 用）
+
+### M2.3 单测
+- [ ] `test_db_serving.py`
+  - upsert 行为（同 suffix 更新不产生新 model_key 或产生新版本——此处需在实现前明确策略）
+  - state 流转 + 事件记录
+  - list 的过滤与排序（按 updated_at）
+
+**验收**：
+- DB 行为可预测；upsert/unique 语义确定并测试覆盖
+
+---
+
+## M3 - Serving 管理 API（FastAPI）（单测驱动）
+
+**目的**：先把管理 API 跑通，Ray Serve 先不接真实（reconciler 之后再接）。
+
+### M3.1 API 路由（用户）
+- [ ] `POST /api/v2/serve/models`（Content-Type: application/yaml）
+  - 入参：ServingSpec YAML
+  - 出参：`{model_key,state}`（202）
+- [ ] `GET /api/v2/serve/models`
+  - 返回 items + `openai_base_url=http://<host>:8000/v1`
+- [ ] `GET /api/v2/serve/models/{model_key}`
+  - 返回 model + resolved_spec_yaml + events（分页可后置）+ serve_status（先空/占位）
+- [ ] `PATCH /api/v2/serve/models/{model_key}`（JSON）
+  - 支持 `num_replicas`（最小闭环）
+- [ ] `DELETE /api/v2/serve/models/{model_key}`
+
+### M3.2 API 路由（admin，可选）
+- [ ] `GET /api/v2/serve/status`（仅 admin token）
+
+### M3.3 错误映射（必须测试）
+- [ ] YAML 解析失败：400
+- [ ] spec 校验失败：422
+- [ ] 越权路径：403
+- [ ] 不存在 model_key：404
+
+### M3.4 单测
+- [ ] `test_app_serving_api.py`
+  - happy path：create → list → get → patch → delete
+  - 多用户隔离：用户只能看到自己的 model
+  - 错误码覆盖：400/403/404/422
+
+**验收**：
+- API reference (`v3.8_api.md`) 中所有管理接口可返回预期结构（Serve 未接入也能工作）
+
+---
+
+## M4 - ServeClient 抽象 + LLMConfig builder（单测驱动）
+
+**目的**：将“如何从 ResolvedServingSpec 构造 LLMConfig”固化，并把 Ray Serve 的依赖隔离到 client 里，便于 mock。
+
+### M4.1 `ServeClient` 接口（可 mock）
+- [ ] `ensure_started(http_port=8000, proxy_location="HeadOnly")`
+- [ ] `apply_app(app_name, llm_configs)`（multi-model）
+- [ ] `get_status()`（serve.status 摘要）
+
+### M4.2 `build_llm_config(resolved_spec, accelerator_type, runtime_env_defaults)` 纯函数
+- [ ] 写入 `LLMConfig.accelerator_type`（来自 dev.yaml：H20）
+- [ ] `deployment_config.num_replicas`
+- [ ] `engine_kwargs.tensor_parallel_size = gpus_per_replica`
+- [ ] `placement_group_config` bundles 按 GPU 张数生成
+- [ ] `runtime_env.env_vars` 注入（至少包含 HF cache + `HF_HUB_OFFLINE=1`）
+
+### M4.3 单测
+- [ ] `test_llm_config_builder.py`
+  - gpus_per_replica=1/2/4 → tensor_parallel_size 与 bundles 数量正确
+  - accelerator_type 注入正确
+  - runtime_env 含 HF_HUB_OFFLINE 等关键 env
+
+**验收**：
+- 从平台 spec 到 Ray Serve LLMConfig 的映射规则稳定，有单测锁定
+
+---
+
+## M5 - Serving Reconciler（状态机 + 资源预检查）（单测驱动）
+
+**目的**：实现声明式对齐：DB → Serve；同时提供可解释的 QUEUED/FAILED 状态。
+
+### M5.1 状态机（最小闭环）
+- [ ] `QUEUED`：等待 apply
+- [ ] `DEPLOYING`：已触发 apply，等待 Serve running/healthy
+- [ ] `RUNNING`：Serve status running
+- [ ] `FAILED`：apply 或 status 失败（写 error_summary + event）
+- [ ] `DELETING`：等待从 app 中移除
+- [ ] `DELETED`：完成删除（可选保留记录）
+
+### M5.2 资源预检查
+- [ ] `needed_total_gpus = sum(num_replicas*gpus_per_replica)`（最小可用预检查）
+- [ ] `ray.available_resources()["GPU"]`（或更稳健的 per-node 统计）不足时：
+  - 保持 `QUEUED`
+  - 记录 `PENDING_RESOURCES` event
+
+### M5.3 reconcile 策略（multi-model app）
+- [ ] tick 读取 active models，构建全量 `llm_configs`
+- [ ] 处理 deleting：从 configs 中移除对应 model，再 apply
+
+### M5.4 单测（mock ServeClient + mock ray resources）
+- [ ] `test_serving_reconciler.py`
+  - 新增模型：apply_app 被调用；state 进入 DEPLOYING
+  - 删除模型：apply_app configs 不包含该模型
+  - GPU 不足：不 apply；state 仍 QUEUED；event 写入
+  - apply 抛异常：state FAILED；error_summary 写入
+
+**验收**：
+- reconciler 行为在纯单测环境可验证；失败可解释
+
+---
+
+## M6 - 真实集成（h1）：Ray Serve 启动 + 推理闭环（E2E）
+
+**目的**：在 dev/h1 环境真正跑通：部署模型 → `/v1/models` 可见 → `chat/completions` 成功 → 删除后消失。
+
+### M6.1 compose/端口
+- [ ] `src/mvp/docker-compose.yaml`：`ray_head` 增加 `8000:8000`
+
+### M6.2 镜像依赖（若 M0 发现缺失）
+- [ ] 在 `argus-ray-node` 镜像中补齐 `ray[serve,llm]`（版本与现有 Ray 对齐，避免升级 Ray 导致不兼容）
+  - 推荐优先补齐 `ray[llm]`（包含 `ray.serve.llm` 依赖闭包，如 `gymnasium`），再按需补 `ray[serve]`
+  - 验证点：`python3 -c "from ray.serve.llm import LLMConfig, build_openai_app; print('serve_llm_ok')"`
+
+### M6.3 E2E 脚本（幂等）
+- [ ] 新增 `scripts/run_all_v38_serving.sh`：
+  - 起 compose（确保 Serve 端口可用）
+  - 起 API
+  - 创建 user + token
+  - `POST /api/v2/serve/models` 创建 1GPU 模型
+  - 轮询模型 state 到 RUNNING
+  - `curl http://127.0.0.1:8000/v1/models` 验证包含 `<prefix>-<suffix>`
+  - `curl http://127.0.0.1:8000/v1/chat/completions` 进行最小推理
+  - `DELETE /api/v2/serve/models/{model_key}` 下线
+  - 再轮询 `/v1/models` 不包含
+
+**验收**：
+- E2E 可重复跑通（至少两次连续跑不需要人工清理）
+
+---
+
+## M7 - WebUI（Serving 页面）（单测驱动）
+
+**目的**：给用户可视化的模型管理页面（最小必要功能）。
+
+### M7.1 页面
+- [ ] Sidebar 增加 Serving
+- [ ] `/ui/serving`：列表 + 状态 + 操作（delete/scale）
+- [ ] `/ui/serving/new`：YAML 输入 + submit
+- [ ] `/ui/serving/{model_key}`：详情（resolved spec、events、OpenAI 调用示例）
+
+### M7.2 单测
+- [ ] `test_ui_serving.py`：路由 200、关键链接存在、包含 openai_base_url=8000
+
+**验收**：
+- WebUI 覆盖 create/list/detail/scale/delete 的主链路
+
+---
+
+## M8 - 文档与验收用例（交付）
+
+**目的**：给用户/运维一套可复用的运行方式与排障路径。
+
+- [ ] 更新 `specs/mvp/v3.8/v3.8_progress.md`（按 milestone 记录）
+- [ ] 补充 README（可选）：端口说明、推理 API 无鉴权警示、模型路径约定
+- [ ] 验收清单（checklist）：
+  - 单测通过
+  - h1 E2E 通过
+  - UI 主链路可操作
diff --git a/specs/mvp/v3.8/v3.8_progress.md b/specs/mvp/v3.8/v3.8_progress.md
new file mode 100644
index 0000000..83eae88
--- /dev/null
+++ b/specs/mvp/v3.8/v3.8_progress.md
@@ -0,0 +1,48 @@
+# MVP v3.8 进展记录
+
+## 2026-01-06
+
+- 完成 v3.8 设计文档：`specs/mvp/v3.8/v3.8_design.md`
+- 完成 v3.8 Serving API reference：`specs/mvp/v3.8/v3.8_api.md`
+- 完成 v3.8 TDD 开发计划：`specs/mvp/v3.8/v3.8_dev_plan.md`
+- 完成 M0：`configs/dev.yaml` 增加 `serving` 配置（http_port=8000, proxy_location=HeadOnly, accelerator_type=H20）
+- 完成 M1：ServingSpec 解析/宏替换/路径校验 + 单测（`src/mvp/py/argus/service/serving_spec.py`）
+- 完成 M2：SQLite 新增 `serve_models`/`serve_events` + Db API + 单测（`src/mvp/py/argus/service/db.py`）
+- 完成 M3：FastAPI Serving 管理 API + 单测（`src/mvp/py/argus/service/app.py`）
+- 完成 M4：ServeClient 抽象 + LLMConfig builder（dict 形态）+ 单测（`src/mvp/py/argus/service/serve_client.py`、`src/mvp/py/argus/service/serve_llm_config.py`）
+- 完成 M5：Serving reconciler（状态机 + 资源预检查 + mock 单测）（`src/mvp/py/argus/service/serving_reconciler.py`）
+
+### M6（h1 真实集成）
+
+- `argus-ray-node` 镜像补齐依赖：`ray[serve,llm]` + `gymnasium` + `dm-tree`（避免 `ray.serve.llm` 导入失败）
+- 修复 Ray 2.49.2 兼容性问题：
+  - `LLMConfig` 不支持 `placement_group_config`，改为使用 `resources_per_bundle`（`src/mvp/py/argus/service/serve_llm_config.py`）
+- 远端 E2E：
+  - `scripts/run_all_v38_serving.sh` 可跑通：create → RUNNING → `/v1/models` → `chat/completions` → delete → DELETED
+  - 修复脚本中 `/v1/models` 解析的 bash heredoc 引号错误（`src/mvp/scripts/run_all_v38_serving.sh`）
+
+### M7（WebUI - Serving）
+
+- WebUI 增加 Serving 页面：
+  - 列表：`/ui/serving`
+  - 创建：`/ui/serving/new`
+  - 详情/事件/缩放/删除：`/ui/serving/{model_key}`
+- 单测覆盖：
+  - `src/mvp/py/tests/test_ui_serving.py`
+
+### M8（文档/验收）
+
+- `src/mvp/README.md` 补充 v3.8 serving 端口与 E2E 脚本说明
+
+### 环境探测（h1 / head 容器）
+
+> 目的：确认 Ray Serve LLM 依赖是否开箱即用，避免后续集成阶段才暴雷。
+
+- `ray`：可用，版本 `2.49.2`
+- `ray.serve`：可 import（Serve 基础可用）
+- `ray.serve.llm`：当前不可 import
+  - 报错：`ModuleNotFoundError: No module named 'gymnasium'`
+  - 原因：`ray.serve.llm` 的导入链路会触发 `ray.rllib`，而 rllib 依赖 `gymnasium`
+
+结论：
+- v3.8 在实现阶段需要在 `argus-ray-node` 镜像中补齐 `ray[llm]`（推荐）或至少补齐 `gymnasium` 等必要依赖，确保 `from ray.serve.llm import ...` 可用。
diff --git a/src/mvp/README.md b/src/mvp/README.md
index 4fd720e..26cd18d 100644
--- a/src/mvp/README.md
+++ b/src/mvp/README.md
@@ -24,3 +24,9 @@ v3.0 访问入口（dev/h1）：
 - SFTPGo：
   - SFTP：`127.0.0.1:2022`
   - Admin API/UI：`http://127.0.0.1:8081`（容器内 8080，host 映射到 8081 避免与 API server 冲突）
+
+v3.8（Ray Serve LLM / vLLM 模型服务）：
+- 推理端口：`8000`（Ray Serve HTTP）
+- OpenAI-compatible endpoint：`http://127.0.0.1:8000/v1`
+  - 注意：v3.8 推理接口**不做鉴权**
+- E2E 脚本：`scripts/run_all_v38_serving.sh`
diff --git a/src/mvp/configs/dev.yaml b/src/mvp/configs/dev.yaml
index 41e5475..4c41416 100644
--- a/src/mvp/configs/dev.yaml
+++ b/src/mvp/configs/dev.yaml
@@ -69,3 +69,11 @@ data:
     jobs_trash_after_days: 3
     jobs_purge_after_days: 7
     janitor_interval_s: 3600
+
+# v3.8: model serving via Ray Serve LLM (vLLM backend)
+serving:
+  serve:
+    http_port: 8000
+    proxy_location: HeadOnly
+  llm:
+    accelerator_type: H20
diff --git a/src/mvp/docker-compose.yaml b/src/mvp/docker-compose.yaml
index 16e73d8..0bb57c7 100644
--- a/src/mvp/docker-compose.yaml
+++ b/src/mvp/docker-compose.yaml
@@ -1,10 +1,16 @@
 services:
   ray_head:
     image: argus/argus-ray-node:vllm011.latest
+    build:
+      context: .
+      dockerfile: images/argus-ray-node/Dockerfile
+      args:
+        BASE_IMAGE: verlai/verl:vllm011.latest
     container_name: argus-ray-head
     ports:
       - "8265:8265"
       - "8080:8080"
+      - "8000:8000"
     volumes:
       # NOTE: this compose file is intended for the dev env layout like:
       #   /home2/argus/infra/mvp/{shared,verl,src/mvp}
@@ -92,6 +98,11 @@ services:
 
   ray_worker_0:
     image: argus/argus-ray-node:vllm011.latest
+    build:
+      context: .
+      dockerfile: images/argus-ray-node/Dockerfile
+      args:
+        BASE_IMAGE: verlai/verl:vllm011.latest
     container_name: argus-ray-worker-0
     volumes:
       - ../../verl:/workspace/verl
@@ -124,6 +135,11 @@ services:
 
   ray_worker_1:
     image: argus/argus-ray-node:vllm011.latest
+    build:
+      context: .
+      dockerfile: images/argus-ray-node/Dockerfile
+      args:
+        BASE_IMAGE: verlai/verl:vllm011.latest
     container_name: argus-ray-worker-1
     volumes:
       - ../../verl:/workspace/verl
diff --git a/src/mvp/images/argus-ray-node/Dockerfile b/src/mvp/images/argus-ray-node/Dockerfile
index 06423f1..54d1be2 100644
--- a/src/mvp/images/argus-ray-node/Dockerfile
+++ b/src/mvp/images/argus-ray-node/Dockerfile
@@ -6,6 +6,15 @@ SHELL ["/bin/bash", "-lc"]
 # Install supervisord (prefer pip to avoid relying on distro package manager).
 RUN python3 -m pip install --no-cache-dir supervisor
 
+# v3.8: Ray Serve LLM deps (keep Ray version pinned to what's already in the base image).
+# NOTE: base image already includes Ray; we only add extras.
+RUN RAY_VER="$(python3 -c 'import ray; print(ray.__version__)')" && \
+    python3 -m pip install --no-cache-dir "ray[serve,llm]==${RAY_VER}"
+# Ray Serve LLM's import chain currently pulls in ray.rllib which requires extra deps.
+# Install them explicitly to make `from ray.serve.llm import ...` work reliably.
+RUN python3 -m pip install --no-cache-dir gymnasium dm-tree && \
+    python3 -c "from ray.serve.llm import LLMConfig, build_openai_app; print('ray_serve_llm_ok')"
+
 RUN mkdir -p /opt/argus/py/argus/ray
 
 # Minimal embedded code for stateless pool (API code is intentionally excluded).
diff --git a/src/mvp/images/argus-ray-node/argus-head-ray.sh b/src/mvp/images/argus-ray-node/argus-head-ray.sh
index 1e873dc..31c5a54 100644
--- a/src/mvp/images/argus-ray-node/argus-head-ray.sh
+++ b/src/mvp/images/argus-ray-node/argus-head-ray.sh
@@ -16,9 +16,8 @@ exec ray start \
   --port="${ray_port}" \
   --dashboard-host=0.0.0.0 \
   --dashboard-port="${dashboard_port}" \
-  --num-cpus=0 \
+  --num-cpus="${ARGUS_HEAD_NUM_CPUS:-1}" \
   --num-gpus=0 \
   --disable-usage-stats \
   --block \
   ${ARGUS_RAY_EXTRA_ARGS:-}
-
diff --git a/src/mvp/py/argus/core/ids.py b/src/mvp/py/argus/core/ids.py
index bd46978..abc7ed8 100644
--- a/src/mvp/py/argus/core/ids.py
+++ b/src/mvp/py/argus/core/ids.py
@@ -26,3 +26,19 @@ def new_task_id(workload: str, *, user_id: str | None = None) -> str:
 
 def attempt_submission_id(task_id: str, attempt_no: int) -> str:
     return f"{task_id}--a{attempt_no:02d}"
+
+
+def new_model_key(*, user_id: str) -> str:
+    """
+    Internal identifier for a serving model record.
+
+    Note:
+    - model_id is the OpenAI-facing name (user_id + timestamp prefix + suffix).
+    - model_key is used for stable DB identity and API resource path.
+    """
+    ts = datetime.now().strftime("%Y%m%d-%H%M%S")
+    suffix = secrets.token_hex(2)
+    u = _normalize_user_id(user_id)
+    if not u:
+        raise ValueError("user_id is required")
+    return f"mvp2-{u}-serve-{ts}-{suffix}"
diff --git a/src/mvp/py/argus/service/app.py b/src/mvp/py/argus/service/app.py
index faa7942..f6ab121 100644
--- a/src/mvp/py/argus/service/app.py
+++ b/src/mvp/py/argus/service/app.py
@@ -4,11 +4,13 @@ import os
 import secrets
 import threading
 from typing import Any
+import json
+from dataclasses import asdict
 
 import yaml
 from fastapi import FastAPI, HTTPException, Request, Response
 
-from argus.core.ids import new_task_id
+from argus.core.ids import new_model_key, new_task_id
 from argus.ray.models import AdvancedTaskSpec, JobSpec, RayConfig, parse_taskspec
 
 from .advanced_command import expand_advanced_command, validate_advanced_command
@@ -16,6 +18,7 @@ from .config import V2Config
 from .db import Db
 from .janitor import JobsJanitor
 from .scheduler import Scheduler
+from .serving_spec import ServingSpec, parse_serving_spec, resolve_serving_spec
 from .sftpgo import SFTPGoAdminClient, SFTPGoError
 from .ui import register_ui_routes
 
@@ -85,6 +88,61 @@ def create_app(config_path: str) -> FastAPI:
             common_root=f"{shared_root}/common",
         )
 
+    def _serving_enabled() -> bool:
+        return bool(v2_cfg.serving.enabled)
+
+    def _openai_base_url(req: Request) -> str:
+        # Prefer forwarded headers if present; otherwise fall back to Host.
+        host = req.headers.get("x-forwarded-host") or req.headers.get("host") or req.url.hostname or "127.0.0.1"
+        # Strip port if present (common for Host header).
+        hostname = host
+        if hostname.startswith("[") and "]" in hostname:
+            # IPv6 like: [::1]:8080
+            hostname = hostname.split("]")[0] + "]"
+        else:
+            hostname = hostname.split(":")[0]
+        scheme = req.headers.get("x-forwarded-proto") or req.url.scheme or "http"
+        port = int(v2_cfg.serving.serve.http_port)
+        return f"{scheme}://{hostname}:{port}/v1"
+
+    def _dump_yaml(obj: Any) -> str:
+        return yaml.safe_dump(obj, sort_keys=False)
+
+    def _serving_spec_to_dict(spec: ServingSpec) -> dict[str, Any]:
+        return {
+            "model_id": spec.model_id,
+            "model_source": spec.model_source,
+            "num_replicas": int(spec.num_replicas),
+            "gpus_per_replica": int(spec.gpus_per_replica),
+            "engine_kwargs": spec.engine_kwargs,
+        }
+
+    def _serve_model_public(row: dict[str, Any], *, req: Request) -> dict[str, Any]:
+        num_replicas = int(row.get("num_replicas") or 0)
+        gpus_per_replica = int(row.get("gpus_per_replica") or 0)
+        total_gpus = num_replicas * gpus_per_replica
+        model_id = str(row.get("model_id") or "")
+        return {
+            "model_key": str(row.get("model_key") or ""),
+            "user_id": str(row.get("user_id") or ""),
+            "model_id": model_id,
+            "model_id_suffix": str(row.get("model_id_suffix") or ""),
+            "model_id_prefix": str(row.get("model_id_prefix") or ""),
+            "model_source": str(row.get("model_source") or ""),
+            "num_replicas": num_replicas,
+            "gpus_per_replica": gpus_per_replica,
+            "total_gpus": total_gpus,
+            "state": str(row.get("state") or ""),
+            "error_summary": row.get("error_summary"),
+            "created_at": str(row.get("created_at") or ""),
+            "updated_at": str(row.get("updated_at") or ""),
+            "deleted_at": row.get("deleted_at"),
+            "endpoint": {
+                "openai_base_url": _openai_base_url(req),
+                "model": model_id,
+            },
+        }
+
     def _auth(req: Request) -> dict[str, Any]:
         token_env = v2_cfg.auth.token_env
         admin_token = os.environ.get(token_env, "")
@@ -565,6 +623,162 @@ def create_app(config_path: str) -> FastAPI:
             return db.list_queue()
         return db.list_queue(user_id=str(subject["user_id"]))
 
+    # v3.8: Model serving (Ray Serve LLM) management APIs.
+    @app.post("/api/v2/serve/models")
+    async def create_serve_model(req: Request) -> dict[str, Any]:
+        subject = _auth(req)
+        if not _serving_enabled():
+            raise HTTPException(status_code=400, detail="serving is not enabled")
+
+        body = (await req.body()).decode("utf-8")
+        try:
+            obj = yaml.safe_load(body) or {}
+        except Exception as e:
+            raise HTTPException(status_code=400, detail=f"invalid YAML: {e!r}")
+        if not isinstance(obj, dict):
+            raise HTTPException(status_code=400, detail="serving spec must be a YAML mapping")
+
+        user_id = str(subject["user_id"]).strip()
+        try:
+            spec = parse_serving_spec(obj)
+            resolved = resolve_serving_spec(spec=spec, user_id=user_id)
+        except PermissionError as e:
+            raise HTTPException(status_code=403, detail=str(e))
+        except ValueError as e:
+            msg = str(e)
+            code = 422 if ("num_replicas" in msg or "gpus_per_replica" in msg) else 400
+            raise HTTPException(status_code=code, detail=f"invalid serving spec: {e!r}")
+
+        model_key = new_model_key(user_id=user_id)
+        try:
+            engine_kwargs_json = json.dumps(resolved.engine_kwargs, sort_keys=True) if resolved.engine_kwargs is not None else None
+        except TypeError as e:
+            raise HTTPException(status_code=400, detail=f"engine_kwargs must be JSON-serializable: {e!r}")
+
+        spec_yaml = _dump_yaml(_serving_spec_to_dict(spec))
+        resolved_spec_yaml = _dump_yaml(asdict(resolved))
+
+        db.create_serve_model(
+            model_key=model_key,
+            user_id=user_id,
+            model_id_suffix=resolved.model_id_suffix,
+            model_id_prefix=resolved.model_id_prefix,
+            model_id=resolved.model_id,
+            model_source=resolved.model_source,
+            num_replicas=resolved.num_replicas,
+            gpus_per_replica=resolved.gpus_per_replica,
+            engine_kwargs_json=engine_kwargs_json,
+            spec_yaml=spec_yaml,
+            resolved_spec_yaml=resolved_spec_yaml,
+        )
+        return {"model_key": model_key, "state": "QUEUED"}
+
+    @app.get("/api/v2/serve/models")
+    async def list_serve_models(req: Request, limit: int = 200, offset: int = 0, include_deleted: int = 0) -> dict[str, Any]:
+        subject = _auth(req)
+        if not _serving_enabled():
+            raise HTTPException(status_code=400, detail="serving is not enabled")
+
+        lim = max(1, min(int(limit), 1000))
+        off = max(0, int(offset))
+        inc = bool(int(include_deleted))
+        user_id = str(subject["user_id"])
+
+        items = db.list_serve_models(user_id=user_id, include_deleted=inc, limit=lim, offset=off)
+        out = [_serve_model_public(i, req=req) for i in items]
+        return {
+            "items": out,
+            "openai_base_url": _openai_base_url(req),
+            "limit": lim,
+            "offset": off,
+            "has_more": bool(len(items) == lim),
+        }
+
+    @app.get("/api/v2/serve/models/{model_key}")
+    async def get_serve_model(model_key: str, req: Request) -> dict[str, Any]:
+        subject = _auth(req)
+        if not _serving_enabled():
+            raise HTTPException(status_code=400, detail="serving is not enabled")
+
+        row = db.get_serve_model(model_key)
+        if not row:
+            raise HTTPException(status_code=404, detail="model not found")
+        if not subject.get("is_admin"):
+            if str(row.get("user_id") or "") != str(subject["user_id"]):
+                raise HTTPException(status_code=404, detail="model not found")
+
+        events = db.list_serve_events(model_key, limit=200, offset=0)
+        ev_out = [
+            {
+                "id": int(e.get("id") or 0),
+                "model_key": str(e.get("model_key") or ""),
+                "created_at": str(e.get("ts") or ""),
+                "event_type": str(e.get("event_type") or ""),
+                "payload_json": e.get("payload_json"),
+            }
+            for e in events
+        ]
+        return {
+            "model": _serve_model_public(row, req=req),
+            "resolved_spec_yaml": str(row.get("resolved_spec_yaml") or ""),
+            "events": ev_out,
+            "serve_status": None,
+        }
+
+    @app.patch("/api/v2/serve/models/{model_key}")
+    async def patch_serve_model(model_key: str, req: Request) -> dict[str, Any]:
+        subject = _auth(req)
+        if not _serving_enabled():
+            raise HTTPException(status_code=400, detail="serving is not enabled")
+
+        row = db.get_serve_model(model_key)
+        if not row:
+            raise HTTPException(status_code=404, detail="model not found")
+        if not subject.get("is_admin"):
+            if str(row.get("user_id") or "") != str(subject["user_id"]):
+                raise HTTPException(status_code=404, detail="model not found")
+
+        obj = await req.json()
+        if not isinstance(obj, dict):
+            raise HTTPException(status_code=400, detail="body must be a JSON object")
+        if "num_replicas" not in obj:
+            raise HTTPException(status_code=400, detail="missing num_replicas")
+        num_replicas = obj.get("num_replicas")
+        if not isinstance(num_replicas, int) or int(num_replicas) < 1:
+            raise HTTPException(status_code=422, detail="num_replicas must be an integer >= 1")
+
+        db.update_serve_model_num_replicas(model_key=model_key, num_replicas=int(num_replicas))
+        return {"model_key": model_key, "state": "QUEUED"}
+
+    @app.delete("/api/v2/serve/models/{model_key}")
+    async def delete_serve_model(model_key: str, req: Request) -> dict[str, Any]:
+        subject = _auth(req)
+        if not _serving_enabled():
+            raise HTTPException(status_code=400, detail="serving is not enabled")
+
+        row = db.get_serve_model(model_key)
+        if not row:
+            raise HTTPException(status_code=404, detail="model not found")
+        if not subject.get("is_admin"):
+            if str(row.get("user_id") or "") != str(subject["user_id"]):
+                raise HTTPException(status_code=404, detail="model not found")
+
+        db.set_serve_model_state(model_key=model_key, state="DELETING", event_type="SERVE_DELETE_REQUESTED")
+        return {"model_key": model_key, "state": "DELETING"}
+
+    @app.get("/api/v2/serve/status")
+    async def serve_status(req: Request) -> dict[str, Any]:
+        _require_admin(req)
+        if not _serving_enabled():
+            raise HTTPException(status_code=400, detail="serving is not enabled")
+        return {
+            "enabled": True,
+            "openai_base_url": _openai_base_url(req),
+            "http_port": int(v2_cfg.serving.serve.http_port),
+            "proxy_location": str(v2_cfg.serving.serve.proxy_location),
+            "accelerator_type": str(v2_cfg.serving.llm.accelerator_type),
+        }
+
     # v3.0: minimal WebUI (no server-side session; token stored in browser localStorage).
     register_ui_routes(app)
 
diff --git a/src/mvp/py/argus/service/config.py b/src/mvp/py/argus/service/config.py
index 193aef2..babee4f 100644
--- a/src/mvp/py/argus/service/config.py
+++ b/src/mvp/py/argus/service/config.py
@@ -57,6 +57,24 @@ class V2SFTPGoConfig:
     admin_password_env: str = "SFTPGO_ADMIN_PASSWORD"
 
 
+@dataclass(frozen=True)
+class V2ServingServeConfig:
+    http_port: int = 8000
+    proxy_location: str = "HeadOnly"
+
+
+@dataclass(frozen=True)
+class V2ServingLLMConfig:
+    accelerator_type: str = ""
+
+
+@dataclass(frozen=True)
+class V2ServingConfig:
+    enabled: bool = False
+    serve: V2ServingServeConfig = V2ServingServeConfig()
+    llm: V2ServingLLMConfig = V2ServingLLMConfig()
+
+
 @dataclass(frozen=True)
 class V2DataConfig:
     user_root: str
@@ -72,6 +90,7 @@ class V2Config:
     scheduler: V2SchedulerConfig
     tracking: V2TrackingConfig
     data: V2DataConfig
+    serving: V2ServingConfig
 
     @staticmethod
     def from_root_dict(root: dict[str, Any]) -> "V2Config":
@@ -112,6 +131,15 @@ class V2Config:
         if not isinstance(sftpgo, dict) or not isinstance(retention, dict):
             raise ValueError("config.data.{sftpgo,retention} must be mappings")
 
+        serving = root.get("serving") or {}
+        if not isinstance(serving, dict):
+            raise ValueError("config.serving must be a mapping")
+        serving_enabled = bool(serving.get("enabled")) if "enabled" in serving else bool(serving)
+        serving_serve = serving.get("serve") or {}
+        serving_llm = serving.get("llm") or {}
+        if not isinstance(serving_serve, dict) or not isinstance(serving_llm, dict):
+            raise ValueError("config.serving.{serve,llm} must be mappings")
+
         default_db_path = f"{shared_root}/common/db/mvp.sqlite3"
         db_path = str(sqlite.get("db_path") or default_db_path)
 
@@ -158,4 +186,14 @@ class V2Config:
                     janitor_interval_s=int(retention.get("janitor_interval_s") or 3600),
                 ),
             ),
+            serving=V2ServingConfig(
+                enabled=serving_enabled,
+                serve=V2ServingServeConfig(
+                    http_port=int(serving_serve.get("http_port") or 8000),
+                    proxy_location=str(serving_serve.get("proxy_location") or "HeadOnly"),
+                ),
+                llm=V2ServingLLMConfig(
+                    accelerator_type=str(serving_llm.get("accelerator_type") or ""),
+                ),
+            ),
         )
diff --git a/src/mvp/py/argus/service/db.py b/src/mvp/py/argus/service/db.py
index 8ca314b..6c18c56 100644
--- a/src/mvp/py/argus/service/db.py
+++ b/src/mvp/py/argus/service/db.py
@@ -117,6 +117,43 @@ class Db:
                 )
                 """
             )
+            conn.execute(
+                """
+                CREATE TABLE IF NOT EXISTS serve_models (
+                  model_key TEXT PRIMARY KEY,
+                  user_id TEXT NOT NULL,
+                  model_id_suffix TEXT NOT NULL,
+                  model_id_prefix TEXT NOT NULL,
+                  model_id TEXT NOT NULL,
+                  model_source TEXT NOT NULL,
+                  num_replicas INTEGER NOT NULL,
+                  gpus_per_replica INTEGER NOT NULL,
+                  engine_kwargs_json TEXT,
+                  state TEXT NOT NULL,
+                  spec_yaml TEXT NOT NULL,
+                  resolved_spec_yaml TEXT NOT NULL,
+                  error_summary TEXT,
+                  created_at TEXT NOT NULL,
+                  updated_at TEXT NOT NULL,
+                  deleted_at TEXT
+                )
+                """
+            )
+            conn.execute(
+                """
+                CREATE TABLE IF NOT EXISTS serve_events (
+                  id INTEGER PRIMARY KEY AUTOINCREMENT,
+                  model_key TEXT NOT NULL,
+                  ts TEXT NOT NULL,
+                  event_type TEXT NOT NULL,
+                  payload_json TEXT,
+                  FOREIGN KEY (model_key) REFERENCES serve_models(model_key) ON DELETE CASCADE
+                )
+                """
+            )
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_serve_models_user ON serve_models(user_id)")
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_serve_models_state ON serve_models(state)")
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_serve_events_model ON serve_events(model_key)")
 
     @contextmanager
     def tx(self) -> Iterator[sqlite3.Connection]:
@@ -493,3 +530,239 @@ class Db:
                 (str(end_time_le), int(limit)),
             ).fetchall()
             return [dict(r) for r in rows]
+
+    def create_serve_model(
+        self,
+        *,
+        model_key: str,
+        user_id: str,
+        model_id_suffix: str,
+        model_id_prefix: str,
+        model_id: str,
+        model_source: str,
+        num_replicas: int,
+        gpus_per_replica: int,
+        spec_yaml: str,
+        resolved_spec_yaml: str,
+        engine_kwargs_json: str | None = None,
+    ) -> dict[str, Any]:
+        now = _utc_now_iso()
+        with self.tx() as conn:
+            conn.execute(
+                """
+                INSERT INTO serve_models (
+                  model_key,
+                  user_id,
+                  model_id_suffix,
+                  model_id_prefix,
+                  model_id,
+                  model_source,
+                  num_replicas,
+                  gpus_per_replica,
+                  engine_kwargs_json,
+                  state,
+                  spec_yaml,
+                  resolved_spec_yaml,
+                  created_at,
+                  updated_at
+                )
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, 'QUEUED', ?, ?, ?, ?)
+                """,
+                (
+                    model_key,
+                    user_id,
+                    model_id_suffix,
+                    model_id_prefix,
+                    model_id,
+                    model_source,
+                    int(num_replicas),
+                    int(gpus_per_replica),
+                    engine_kwargs_json,
+                    spec_yaml,
+                    resolved_spec_yaml,
+                    now,
+                    now,
+                ),
+            )
+            conn.execute(
+                "INSERT INTO serve_events (model_key, ts, event_type, payload_json) VALUES (?, ?, 'SERVE_MODEL_CREATED', ?)",
+                (model_key, now, None),
+            )
+            row = conn.execute("SELECT * FROM serve_models WHERE model_key = ?", (model_key,)).fetchone()
+            return dict(row) if row else {}
+
+    def list_serve_models(
+        self,
+        *,
+        user_id: str,
+        include_deleted: bool = False,
+        limit: int = 200,
+        offset: int = 0,
+    ) -> list[dict[str, Any]]:
+        with self._connect() as conn:
+            where_sql = "WHERE user_id = ?"
+            params: list[Any] = [user_id]
+            if not include_deleted:
+                where_sql += " AND deleted_at IS NULL"
+            params.append(int(limit))
+            params.append(max(0, int(offset)))
+            rows = conn.execute(
+                f"""
+                SELECT
+                  model_key,
+                  user_id,
+                  model_id_suffix,
+                  model_id_prefix,
+                  model_id,
+                  model_source,
+                  num_replicas,
+                  gpus_per_replica,
+                  engine_kwargs_json,
+                  state,
+                  error_summary,
+                  created_at,
+                  updated_at,
+                  deleted_at
+                FROM serve_models
+                {where_sql}
+                ORDER BY created_at DESC, model_key DESC
+                LIMIT ? OFFSET ?
+                """,
+                tuple(params),
+            ).fetchall()
+            return [dict(r) for r in rows]
+
+    def list_all_serve_models(
+        self,
+        *,
+        include_deleted: bool = False,
+        limit: int = 2000,
+        offset: int = 0,
+    ) -> list[dict[str, Any]]:
+        with self._connect() as conn:
+            where_sql = ""
+            if not include_deleted:
+                where_sql = "WHERE deleted_at IS NULL"
+            rows = conn.execute(
+                f"""
+                SELECT
+                  model_key,
+                  user_id,
+                  model_id_suffix,
+                  model_id_prefix,
+                  model_id,
+                  model_source,
+                  num_replicas,
+                  gpus_per_replica,
+                  engine_kwargs_json,
+                  state,
+                  error_summary,
+                  spec_yaml,
+                  resolved_spec_yaml,
+                  created_at,
+                  updated_at,
+                  deleted_at
+                FROM serve_models
+                {where_sql}
+                ORDER BY created_at ASC, model_key ASC
+                LIMIT ? OFFSET ?
+                """,
+                (int(limit), max(0, int(offset))),
+            ).fetchall()
+            return [dict(r) for r in rows]
+
+    def get_serve_model(self, model_key: str) -> dict[str, Any] | None:
+        with self._connect() as conn:
+            row = conn.execute("SELECT * FROM serve_models WHERE model_key = ?", (model_key,)).fetchone()
+            return dict(row) if row else None
+
+    def list_serve_events(self, model_key: str, limit: int = 200, offset: int = 0) -> list[dict[str, Any]]:
+        with self._connect() as conn:
+            rows = conn.execute(
+                """
+                SELECT id, model_key, ts, event_type, payload_json
+                FROM serve_events
+                WHERE model_key = ?
+                ORDER BY id DESC
+                LIMIT ? OFFSET ?
+                """,
+                (model_key, int(limit), max(0, int(offset))),
+            ).fetchall()
+            return [dict(r) for r in rows]
+
+    def append_serve_event(self, *, model_key: str, event_type: str, payload_json: str | None = None) -> None:
+        now = _utc_now_iso()
+        with self.tx() as conn:
+            conn.execute(
+                "INSERT INTO serve_events (model_key, ts, event_type, payload_json) VALUES (?, ?, ?, ?)",
+                (model_key, now, event_type, payload_json),
+            )
+
+    def set_serve_model_state(
+        self,
+        *,
+        model_key: str,
+        state: str,
+        error_summary: str | None = None,
+        event_type: str = "SERVE_STATE_UPDATE",
+        payload_json: str | None = None,
+    ) -> None:
+        now = _utc_now_iso()
+        with self.tx() as conn:
+            sets = ["state = ?", "updated_at = ?"]
+            params: list[Any] = [state, now]
+            if error_summary is not None:
+                sets.append("error_summary = ?")
+                params.append(error_summary)
+            if state == "DELETED":
+                sets.append("deleted_at = ?")
+                params.append(now)
+            params.append(model_key)
+            conn.execute(f"UPDATE serve_models SET {', '.join(sets)} WHERE model_key = ?", tuple(params))
+            conn.execute(
+                "INSERT INTO serve_events (model_key, ts, event_type, payload_json) VALUES (?, ?, ?, ?)",
+                (model_key, now, event_type, payload_json),
+            )
+
+    def update_serve_model_num_replicas(self, *, model_key: str, num_replicas: int) -> None:
+        if not isinstance(num_replicas, int) or num_replicas < 1:
+            raise ValueError("num_replicas must be an integer >= 1")
+        now = _utc_now_iso()
+        with self.tx() as conn:
+            conn.execute(
+                """
+                UPDATE serve_models
+                SET num_replicas = ?, state = 'QUEUED', error_summary = NULL, updated_at = ?
+                WHERE model_key = ?
+                """,
+                (int(num_replicas), now, model_key),
+            )
+            conn.execute(
+                "INSERT INTO serve_events (model_key, ts, event_type, payload_json) VALUES (?, ?, 'SERVE_PATCH_NUM_REPLICAS', ?)",
+                (model_key, now, str(num_replicas)),
+            )
+
+    def pick_next_runnable_serve_change(self) -> dict[str, Any] | None:
+        """
+        Returns the next serve model that needs reconciliation.
+
+        Minimal state machine for now:
+        - QUEUED: needs (re)apply
+        - DELETING: needs removal
+        """
+        with self._connect() as conn:
+            row = conn.execute(
+                """
+                SELECT *
+                FROM serve_models
+                WHERE deleted_at IS NULL
+                  AND state IN ('QUEUED','DELETING')
+                ORDER BY updated_at ASC
+                LIMIT 1
+                """
+            ).fetchone()
+            return dict(row) if row else None
+
+    # Backward compatible naming (v3.8 docs originally used "upsert").
+    def upsert_serve_model(self, **kwargs: Any) -> dict[str, Any]:
+        return self.create_serve_model(**kwargs)
diff --git a/src/mvp/py/argus/service/scheduler.py b/src/mvp/py/argus/service/scheduler.py
index 4fb3fc1..624aca9 100644
--- a/src/mvp/py/argus/service/scheduler.py
+++ b/src/mvp/py/argus/service/scheduler.py
@@ -16,6 +16,8 @@ from argus.ray.ray_job_tool import RayJobTool
 from .config import V2Config
 from .db import Db
 from .ray_resources import ensure_ray_connected, get_cluster_available
+from .serve_client import RayServeClient
+from .serving_reconciler import ServingReconciler
 
 
 _INSUFFICIENT_RE = re.compile(r"Total available GPUs\\s+\\d+\\s+is less than total desired GPUs\\s+\\d+")
@@ -37,6 +39,18 @@ class Scheduler:
 
     def __post_init__(self) -> None:
         self.tool = RayJobTool(self.ray_cfg)
+        self._serving: ServingReconciler | None = None
+        if bool(self.v2_cfg.serving.enabled):
+            self._serving = ServingReconciler(
+                db=self.db,
+                v2_cfg=self.v2_cfg,
+                ray_runtime_env_env_vars=self.ray_cfg.runtime_env_env_vars,
+                serve_client=RayServeClient(
+                    http_port=int(self.v2_cfg.serving.serve.http_port),
+                    proxy_location=str(self.v2_cfg.serving.serve.proxy_location),
+                    ray_init_address="auto",
+                ),
+            )
 
     def _job_dir_for_task(self, *, user_id: str | None, ray_submission_id: str) -> str:
         root = self.ray_cfg.shared_root.rstrip("/")
@@ -251,6 +265,14 @@ class Scheduler:
     def tick(self) -> None:
         ensure_ray_connected()
 
+        # v3.8: reconcile serve_models (best-effort).
+        if self._serving is not None:
+            try:
+                self._serving.tick()
+            except Exception:
+                # Keep scheduler alive even if serving tick fails.
+                pass
+
         # Sync active tasks
         for row in self.db.list_active_tasks(limit=50):
             self._sync_one_running(row)
diff --git a/src/mvp/py/argus/service/serve_client.py b/src/mvp/py/argus/service/serve_client.py
new file mode 100644
index 0000000..a630468
--- /dev/null
+++ b/src/mvp/py/argus/service/serve_client.py
@@ -0,0 +1,45 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any
+
+
+@dataclass(frozen=True)
+class RayServeClient:
+    """
+    Minimal Ray Serve client wrapper.
+
+    This is intentionally tiny and uses runtime imports so that:
+    - unit tests can stub `ray` modules without needing real Ray installed
+    - production can run with the real Ray Serve stack (v3.8+)
+    """
+
+    http_port: int = 8000
+    proxy_location: str = "HeadOnly"
+    ray_init_address: str = "auto"
+
+    def ensure_started(self) -> None:
+        import ray  # runtime import
+
+        # Scheduler already calls ray.init(); make this idempotent.
+        ray.init(address=self.ray_init_address, ignore_reinit_error=True, log_to_driver=False)  # type: ignore[call-arg]
+
+        # Import serve lazily to allow tests to stub it.
+        from ray import serve  # type: ignore
+
+        serve.start(proxy_location=self.proxy_location, http_options={"host": "0.0.0.0", "port": int(self.http_port)})
+
+    def apply_app(self, *, app: Any, app_name: str, route_prefix: str = "/") -> Any:
+        from ray import serve  # type: ignore
+
+        # If Ray Serve LLM isn't available, callers may pass a plain dict placeholder.
+        # Running that through serve.run() results in a confusing TypeError; fail fast.
+        if isinstance(app, dict):
+            raise ValueError("invalid serve app object (Ray Serve LLM not available or build_openai_app failed)")
+
+        return serve.run(app, name=app_name, route_prefix=route_prefix)
+
+    def get_status(self) -> Any:
+        from ray import serve  # type: ignore
+
+        return serve.status()
diff --git a/src/mvp/py/argus/service/serve_llm_config.py b/src/mvp/py/argus/service/serve_llm_config.py
new file mode 100644
index 0000000..0dc661e
--- /dev/null
+++ b/src/mvp/py/argus/service/serve_llm_config.py
@@ -0,0 +1,63 @@
+from __future__ import annotations
+
+from typing import Any
+
+from .serving_spec import ResolvedServingSpec
+
+
+def _ensure_hf_env_defaults(env: dict[str, str]) -> dict[str, str]:
+    out = dict(env or {})
+    # Prefer existing values if present, but always force offline mode in the platform.
+    out.setdefault("HF_HOME", "/private/hf")
+    out.setdefault("HUGGINGFACE_HUB_CACHE", "/private/hf/hub")
+    out.setdefault("TRANSFORMERS_CACHE", "/private/hf/transformers")
+    out["HF_HUB_OFFLINE"] = "1"
+    return out
+
+
+def build_llm_config_dict(
+    resolved: ResolvedServingSpec,
+    *,
+    accelerator_type: str,
+    runtime_env_env_vars: dict[str, str] | None,
+    cpu_per_gpu: float = 1.0,
+) -> dict[str, Any]:
+    """
+    Pure builder: maps a platform ResolvedServingSpec to a Ray Serve LLM-like config.
+
+    We return a plain dict here to keep this layer unit-testable without depending on
+    a specific Ray Serve LLM version. The reconciler (later milestone) can choose to
+    instantiate `ray.serve.llm.LLMConfig` using this dict.
+    """
+    if not accelerator_type:
+        raise ValueError("accelerator_type is required")
+    if resolved.num_replicas < 1:
+        raise ValueError("num_replicas must be >= 1")
+    if resolved.gpus_per_replica < 1:
+        raise ValueError("gpus_per_replica must be >= 1")
+    if cpu_per_gpu <= 0:
+        raise ValueError("cpu_per_gpu must be > 0")
+
+    engine_kwargs: dict[str, Any] = dict(resolved.engine_kwargs or {})
+    # Enforce tensor parallel mapping; user-provided value must not contradict requested GPUs.
+    engine_kwargs["tensor_parallel_size"] = int(resolved.gpus_per_replica)
+
+    # Ray Serve LLM (Ray 2.49.x) exposes `resources_per_bundle` instead of the older
+    # `placement_group_config`. Use a single bundle that reserves the full GPU set
+    # required by tensor-parallel execution.
+    resources_per_bundle = {
+        "GPU": float(resolved.gpus_per_replica),
+        "CPU": float(cpu_per_gpu) * float(resolved.gpus_per_replica),
+    }
+
+    env_vars = _ensure_hf_env_defaults(dict(runtime_env_env_vars or {}))
+
+    return {
+        # Ray Serve LLM expects `model_loading_config` with model_id/model_source.
+        "model_loading_config": {"model_id": resolved.model_id, "model_source": resolved.model_source},
+        "accelerator_type": accelerator_type,
+        "deployment_config": {"num_replicas": int(resolved.num_replicas)},
+        "engine_kwargs": engine_kwargs,
+        "resources_per_bundle": resources_per_bundle,
+        "runtime_env": {"env_vars": env_vars},
+    }
diff --git a/src/mvp/py/argus/service/serving_reconciler.py b/src/mvp/py/argus/service/serving_reconciler.py
new file mode 100644
index 0000000..2c830d5
--- /dev/null
+++ b/src/mvp/py/argus/service/serving_reconciler.py
@@ -0,0 +1,151 @@
+from __future__ import annotations
+
+import json
+import traceback
+from dataclasses import dataclass
+from typing import Any, Protocol
+
+from argus.service.ray_resources import ClusterAvailable, get_cluster_available
+
+from .config import V2Config
+from .db import Db
+from .serve_llm_config import build_llm_config_dict
+from .serving_spec import ResolvedServingSpec
+
+
+class ServeClient(Protocol):
+    def ensure_started(self) -> None: ...
+
+    def apply_app(self, *, app: Any, app_name: str, route_prefix: str = "/") -> Any: ...
+
+    def get_status(self) -> Any: ...
+
+
+def _parse_engine_kwargs(row: dict[str, Any]) -> dict[str, Any] | None:
+    raw = row.get("engine_kwargs_json")
+    if raw in (None, ""):
+        return None
+    try:
+        obj = json.loads(str(raw))
+        return obj if isinstance(obj, dict) else None
+    except Exception:
+        return None
+
+
+def _row_to_resolved_spec(row: dict[str, Any]) -> ResolvedServingSpec:
+    return ResolvedServingSpec(
+        user_id=str(row["user_id"]),
+        model_id_suffix=str(row["model_id_suffix"]),
+        model_id_prefix=str(row["model_id_prefix"]),
+        model_id=str(row["model_id"]),
+        model_source=str(row["model_source"]),
+        num_replicas=int(row["num_replicas"]),
+        gpus_per_replica=int(row["gpus_per_replica"]),
+        engine_kwargs=_parse_engine_kwargs(row),
+    )
+
+
+def _needed_total_gpus(rows: list[dict[str, Any]]) -> int:
+    total = 0
+    for r in rows:
+        total += int(r.get("num_replicas") or 0) * int(r.get("gpus_per_replica") or 0)
+    return total
+
+
+@dataclass
+class ServingReconciler:
+    """
+    v3.8: reconcile declared serve_models (SQLite) into a multi-model Ray Serve app.
+
+    This reconciler is intentionally conservative:
+    - Only acts on models in states QUEUED/DELETING.
+    - Performs a minimal GPU precheck using ray available GPU totals.
+    - Writes events and state transitions for explainability.
+    """
+
+    db: Db
+    v2_cfg: V2Config
+    ray_runtime_env_env_vars: dict[str, str]
+    serve_client: ServeClient
+    app_name: str = "argus_llm_app"
+    route_prefix: str = "/"
+    cpu_per_gpu: float = 1.0
+    get_available_fn: Any = get_cluster_available
+
+    def tick(self) -> None:
+        # Pick the next desired change.
+        change = self.db.pick_next_runnable_serve_change()
+        if not change:
+            return
+
+        model_key = str(change["model_key"])
+        state = str(change.get("state") or "")
+
+        # Ensure Ray (and Serve) can be started before doing anything else.
+        try:
+            self.serve_client.ensure_started()
+        except Exception as e:
+            self.db.append_serve_event(model_key=model_key, event_type="SERVE_START_ERROR", payload_json=repr(e))
+            return
+
+        # Desired set: all non-deleted models except those marked DELETING.
+        all_rows = self.db.list_all_serve_models(include_deleted=False, limit=5000, offset=0)
+        # FAILED models are not part of the desired running set. A user can PATCH to
+        # re-queue a failed model (e.g., after fixing env/deps) which will move it back to QUEUED.
+        desired_rows = [r for r in all_rows if str(r.get("state") or "") not in ("DELETING", "DELETED", "FAILED")]
+
+        # Precheck resources: multi-model app apply needs enough GPUs for the whole desired set.
+        needed = _needed_total_gpus(desired_rows)
+        avail: ClusterAvailable = self.get_available_fn()
+        if float(avail.total_available_gpus) < float(needed):
+            msg = f"Insufficient GPUs: need {needed}, available {avail.total_available_gpus}"
+            self.db.append_serve_event(model_key=model_key, event_type="SERVE_PENDING_RESOURCES", payload_json=msg)
+            return
+
+        # Build per-model LLM configs (dict form in M4).
+        llm_cfg_dicts: list[dict[str, Any]] = []
+        accelerator_type = str(self.v2_cfg.serving.llm.accelerator_type or "")
+        for r in desired_rows:
+            resolved = _row_to_resolved_spec(r)
+            llm_cfg_dicts.append(
+                build_llm_config_dict(
+                    resolved,
+                    accelerator_type=accelerator_type,
+                    runtime_env_env_vars=self.ray_runtime_env_env_vars,
+                    cpu_per_gpu=self.cpu_per_gpu,
+                )
+            )
+
+        # Build a Ray Serve OpenAI-compatible app if Ray Serve LLM is available.
+        # Fall back to a plain dict so unit tests can run without real Ray Serve.
+        app_obj: Any
+        try:
+            from ray.serve.llm import LLMConfig, build_openai_app  # type: ignore
+
+            llm_cfgs = [LLMConfig(**d) for d in llm_cfg_dicts]
+            app_obj = build_openai_app({"llm_configs": llm_cfgs})
+        except Exception as e:
+            self.db.append_serve_event(model_key=model_key, event_type="SERVE_LLM_IMPORT_ERROR", payload_json=repr(e))
+            app_obj = {"llm_configs": llm_cfg_dicts}
+
+        try:
+            self.db.append_serve_event(model_key=model_key, event_type="SERVE_APPLY_REQUESTED", payload_json=str(len(llm_cfg_dicts)))
+            self.serve_client.apply_app(app=app_obj, app_name=self.app_name, route_prefix=self.route_prefix)
+        except Exception as e:
+            err = f"{type(e).__name__}: {e}"
+            tb = traceback.format_exc(limit=10)
+            self.db.set_serve_model_state(model_key=model_key, state="FAILED", error_summary=err, event_type="SERVE_APPLY_FAILED", payload_json=tb)
+            return
+
+        # Apply succeeded. Update the changing model's state.
+        if state == "DELETING":
+            self.db.set_serve_model_state(model_key=model_key, state="DELETED", event_type="SERVE_DELETE_APPLIED")
+            return
+
+        # Mark as deploying; best-effort status probe can promote to RUNNING.
+        self.db.set_serve_model_state(model_key=model_key, state="DEPLOYING", event_type="SERVE_DEPLOYING")
+        try:
+            _ = self.serve_client.get_status()
+            self.db.set_serve_model_state(model_key=model_key, state="RUNNING", event_type="SERVE_RUNNING")
+        except Exception as e:
+            self.db.append_serve_event(model_key=model_key, event_type="SERVE_STATUS_ERROR", payload_json=repr(e))
diff --git a/src/mvp/py/argus/service/serving_spec.py b/src/mvp/py/argus/service/serving_spec.py
new file mode 100644
index 0000000..c04362f
--- /dev/null
+++ b/src/mvp/py/argus/service/serving_spec.py
@@ -0,0 +1,144 @@
+from __future__ import annotations
+
+import re
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from typing import Any
+
+
+_MODEL_ID_SUFFIX_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]{0,63}$")
+
+
+@dataclass(frozen=True)
+class ServingSpec:
+    model_id: str
+    model_source: str
+    num_replicas: int = 1
+    gpus_per_replica: int = 1
+    engine_kwargs: dict[str, Any] | None = None
+
+
+@dataclass(frozen=True)
+class ResolvedServingSpec:
+    user_id: str
+    model_id_suffix: str
+    model_id_prefix: str
+    model_id: str
+    model_source: str
+    num_replicas: int
+    gpus_per_replica: int
+    engine_kwargs: dict[str, Any] | None
+
+
+def validate_model_id_suffix(suffix: str) -> None:
+    if not isinstance(suffix, str):
+        raise ValueError("model_id must be a string")
+    s = suffix.strip()
+    if s != suffix:
+        raise ValueError("model_id must not contain leading/trailing whitespace")
+    if not s:
+        raise ValueError("model_id is required")
+    if not _MODEL_ID_SUFFIX_RE.match(s):
+        raise ValueError("model_id must match regex: ^[A-Za-z0-9][A-Za-z0-9._-]{0,63}$")
+    if ".." in s:
+        raise ValueError("model_id must not contain '..'")
+
+
+def make_model_id_prefix(*, user_id: str, now_utc: datetime | None = None) -> str:
+    if not user_id or not isinstance(user_id, str):
+        raise ValueError("user_id is required")
+    if "/" in user_id:
+        raise ValueError("user_id must not contain '/'")
+
+    dt = now_utc or datetime.now(timezone.utc)
+    if dt.tzinfo is None:
+        dt = dt.replace(tzinfo=timezone.utc)
+    dt = dt.astimezone(timezone.utc)
+    stamp = dt.strftime("%Y%m%d%H%M")
+    return f"{user_id}-{stamp}"
+
+
+def expand_home_macros(*, user_id: str, text: str) -> str:
+    if not isinstance(text, str):
+        raise ValueError("model_source must be a string")
+    if not text:
+        raise ValueError("model_source is required")
+
+    out = text
+    out = out.replace("$HOME/common/hf", "/private/hf")
+    out = out.replace("$HOME/common/datasets", "/private/datasets")
+    out = out.replace("$HOME", f"/private/users/{user_id}")
+    return out
+
+
+def validate_model_source_path(*, user_id: str, model_source: str) -> None:
+    if not isinstance(model_source, str):
+        raise ValueError("model_source must be a string")
+    if not model_source.startswith("/"):
+        raise ValueError("model_source must be an absolute path")
+    if not model_source.startswith("/private/"):
+        raise ValueError("model_source must be under /private")
+    if "\x00" in model_source:
+        raise ValueError("model_source contains null byte")
+    parts = [p for p in model_source.split("/") if p]
+    if any(p == ".." for p in parts):
+        raise ValueError("model_source must not contain '..'")
+
+    allowed_user_prefix = f"/private/users/{user_id}/"
+    allowed = model_source.startswith("/private/hf/") or model_source.startswith(allowed_user_prefix)
+    if not allowed:
+        raise PermissionError("model_source is not allowed (must be under /private/hf or your /private/users/<user_id>)")
+
+
+def parse_serving_spec(obj: Any) -> ServingSpec:
+    if not isinstance(obj, dict):
+        raise ValueError("serving spec must be a mapping")
+
+    model_id = obj.get("model_id")
+    model_source = obj.get("model_source")
+    num_replicas = obj.get("num_replicas", 1)
+    gpus_per_replica = obj.get("gpus_per_replica", 1)
+    engine_kwargs = obj.get("engine_kwargs", None)
+
+    if not isinstance(model_id, str):
+        raise ValueError("missing required field: model_id")
+    validate_model_id_suffix(model_id)
+
+    if not isinstance(model_source, str) or not model_source:
+        raise ValueError("missing required field: model_source")
+
+    if not isinstance(num_replicas, int) or num_replicas < 1:
+        raise ValueError("num_replicas must be an integer >= 1")
+    if not isinstance(gpus_per_replica, int) or gpus_per_replica < 1:
+        raise ValueError("gpus_per_replica must be an integer >= 1")
+
+    if engine_kwargs is not None and not isinstance(engine_kwargs, dict):
+        raise ValueError("engine_kwargs must be a mapping when provided")
+
+    return ServingSpec(
+        model_id=model_id,
+        model_source=model_source,
+        num_replicas=num_replicas,
+        gpus_per_replica=gpus_per_replica,
+        engine_kwargs=engine_kwargs,
+    )
+
+
+def resolve_serving_spec(*, spec: ServingSpec, user_id: str, now_utc: datetime | None = None) -> ResolvedServingSpec:
+    validate_model_id_suffix(spec.model_id)
+    prefix = make_model_id_prefix(user_id=user_id, now_utc=now_utc)
+    full_model_id = f"{prefix}-{spec.model_id}"
+
+    resolved_source = expand_home_macros(user_id=user_id, text=spec.model_source)
+    validate_model_source_path(user_id=user_id, model_source=resolved_source)
+
+    return ResolvedServingSpec(
+        user_id=user_id,
+        model_id_suffix=spec.model_id,
+        model_id_prefix=prefix,
+        model_id=full_model_id,
+        model_source=resolved_source,
+        num_replicas=spec.num_replicas,
+        gpus_per_replica=spec.gpus_per_replica,
+        engine_kwargs=spec.engine_kwargs,
+    )
diff --git a/src/mvp/py/argus/service/ui.py b/src/mvp/py/argus/service/ui.py
index b5a0822..c77b629 100644
--- a/src/mvp/py/argus/service/ui.py
+++ b/src/mvp/py/argus/service/ui.py
@@ -112,6 +112,7 @@ def _nav(active: str) -> str:
     links = [
         ("login", "/ui/login", "Login"),
         ("tasks", "/ui/tasks", "Tasks"),
+        ("serving", "/ui/serving", "Serving"),
         ("new", "/ui/tasks/new", "New Task"),
         ("data", "/ui/data", "Data"),
         ("admin", "/ui/admin", "Admin"),
@@ -992,6 +993,253 @@ refresh();
 """.strip()
         return HTMLResponse(content=_page(f"Logs {task_id}", "tasks", body, script))
 
+    @app.get("/ui/serving")
+    async def ui_serving() -> HTMLResponse:
+        body = """
+<h1>Serving</h1>
+<div class="card">
+  <div class="row">
+    <button class="btn" id="refresh">Refresh</button>
+    <a class="btn" href="/ui/serving/new" style="display:inline-block">New Model</a>
+    <a class="btn" id="openai-models" target="_blank" rel="noopener" href="#">OpenAI /v1/models</a>
+  </div>
+  <div style="height:10px"></div>
+  <div id="out" class="muted">Loading...</div>
+</div>
+""".strip()
+        script = """
+document.getElementById("nav-ray-dashboard").href = curOriginWithPort(8265);
+document.getElementById("openai-models").href = curOriginWithPort(8000) + "/v1/models";
+const out = document.getElementById("out");
+
+function pill(state) {
+  const s = String(state || "");
+  if (s === "RUNNING") return `<span class="pill ok">${s}</span>`;
+  if (s === "FAILED") return `<span class="pill bad">${s}</span>`;
+  return `<span class="pill">${s}</span>`;
+}
+
+async function refresh() {
+  out.textContent = "Loading...";
+  try {
+    const lim = 50;
+    const off = Number(localStorage.getItem("mvp_serving_offset") || "0") || 0;
+    const resp = await apiJson("/api/v2/serve/models?limit=" + lim + "&offset=" + off + "&include_deleted=0");
+    const items = resp.items || [];
+    const hasMore = !!resp.has_more;
+    const pageNo = Math.floor(off / lim) + 1;
+    const prevDisabled = off <= 0;
+    const nextDisabled = !hasMore;
+
+    function row(m) {
+      return `<tr>
+        <td><a href="/ui/serving/${m.model_key}">${m.model_key}</a></td>
+        <td><code>${m.model_id}</code></td>
+        <td>${pill(m.state)}</td>
+        <td>${m.num_replicas} × ${m.gpus_per_replica} GPU</td>
+        <td>${m.updated_at || ""}</td>
+      </tr>`;
+    }
+    const rows = items.map(row).join("");
+
+    out.innerHTML = `
+      <div class="row" style="justify-content: space-between; margin-bottom: 8px;">
+        <div class="muted">OpenAI base: <code>${resp.openai_base_url || curOriginWithPort(8000) + "/v1"}</code></div>
+        <div class="row">
+          <span class="muted">Page ${pageNo}</span>
+          <button class="btn" id="prev" ${prevDisabled ? "disabled" : ""}>Prev</button>
+          <button class="btn" id="next" ${nextDisabled ? "disabled" : ""}>Next</button>
+        </div>
+      </div>
+      <table>
+        <thead><tr><th>Model Key</th><th>Model ID</th><th>State</th><th>Resources</th><th>Updated</th></tr></thead>
+        <tbody>${rows || "<tr><td colspan=5 class=muted>(none)</td></tr>"}</tbody>
+      </table>
+    `;
+
+    const prevBtn = document.getElementById("prev");
+    const nextBtn = document.getElementById("next");
+    if (prevBtn) prevBtn.onclick = () => { localStorage.setItem("mvp_serving_offset", String(Math.max(0, off - lim))); refresh(); };
+    if (nextBtn) nextBtn.onclick = () => { localStorage.setItem("mvp_serving_offset", String(off + lim)); refresh(); };
+  } catch (e) {
+    let text = "Error: " + (e.status || "") + "\\n" + (e.body || String(e));
+    if (e.body && String(e.body).includes("serving is not enabled")) {
+      text = "Serving is not enabled in server config.\\nAsk admin to enable `serving:` in dev.yaml.";
+    }
+    out.textContent = text;
+  }
+}
+
+document.getElementById("refresh").onclick = refresh;
+refresh();
+""".strip()
+        return HTMLResponse(content=_page("Serving", "serving", body, script))
+
+    @app.get("/ui/serving/new")
+    async def ui_serving_new() -> HTMLResponse:
+        example = """# ServingSpec (YAML)
+# 说明：
+# - model_id: 这里是 suffix（平台会自动加前缀：<user_id>-<YYYYMMDDHHMM>-<suffix>）
+# - model_source: 本地模型路径（支持 $HOME 宏；推荐使用 $HOME/common/hf 指向共享 HF cache）
+#
+# 常用路径：
+# - $HOME/common/hf -> /private/hf
+# - $HOME -> /private/users/<user_id>
+#
+model_id: qwen-0.5b
+model_source: $HOME/common/hf/hub/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots/<SNAPSHOT_HASH>
+num_replicas: 1
+gpus_per_replica: 1
+
+# engine_kwargs:  # 可选：透传给 vLLM
+#   gpu_memory_utilization: 0.4
+""".strip()
+        body = f"""
+<h1>New Model</h1>
+<div class="card">
+  <div class="muted">Paste ServingSpec YAML and submit to <code>/api/v2/serve/models</code>.</div>
+  <div style="height:10px"></div>
+  <textarea id="yaml" rows="14">{html.escape(example)}</textarea>
+  <div style="height:10px"></div>
+  <div class="row">
+    <button class="btn" id="submit">Submit</button>
+    <a class="btn" href="/ui/serving" style="display:inline-block">Back</a>
+  </div>
+  <div style="height:10px"></div>
+  <pre id="out" class="muted"></pre>
+</div>
+""".strip()
+        script = """
+document.getElementById("nav-ray-dashboard").href = curOriginWithPort(8265);
+const out = document.getElementById("out");
+document.getElementById("submit").onclick = async () => {
+  out.textContent = "Submitting...";
+  const yaml = document.getElementById("yaml").value || "";
+  try {
+    const resp = await apiJson("/api/v2/serve/models", { method: "POST", headers: { "Content-Type": "application/yaml" }, body: yaml });
+    out.textContent = "Created: " + resp.model_key + "\\nState: " + resp.state;
+    if (resp.model_key) window.location.href = "/ui/serving/" + encodeURIComponent(resp.model_key);
+  } catch (e) {
+    out.textContent = "Error: " + (e.status || "") + "\\n" + (e.body || String(e));
+  }
+};
+""".strip()
+        return HTMLResponse(content=_page("New Model", "serving", body, script))
+
+    @app.get("/ui/serving/{model_key}")
+    async def ui_serving_detail(model_key: str) -> HTMLResponse:
+        body = f"""
+<h1>Model</h1>
+<div class="card">
+  <div class="row" style="justify-content: space-between;">
+    <div class="muted">model_key: <code>{html.escape(model_key)}</code></div>
+    <div class="row">
+      <a class="btn" href="/ui/serving" style="display:inline-block">Back</a>
+      <a class="btn" id="openai-models" target="_blank" rel="noopener" href="#">OpenAI /v1/models</a>
+    </div>
+  </div>
+  <div style="height:10px"></div>
+  <div class="row">
+    <label class="muted" style="min-width:120px">Scale replicas</label>
+    <input id="replicas" type="number" min="1" step="1" value="1" style="max-width: 180px" />
+    <button class="btn" id="scale">Apply</button>
+    <button class="btn danger" id="delete">Delete</button>
+  </div>
+  <div style="height:10px"></div>
+  <div id="meta" class="muted">Loading...</div>
+  <div style="height:12px"></div>
+  <h3 style="margin-top:0">Resolved Spec (YAML)</h3>
+  <pre id="spec" class="muted">(loading)</pre>
+  <div style="height:12px"></div>
+  <h3 style="margin-top:0">Events</h3>
+  <div id="events" class="muted">(loading)</div>
+  <div style="height:12px"></div>
+  <h3 style="margin-top:0">OpenAI Example</h3>
+  <pre id="example" class="muted">(loading)</pre>
+</div>
+""".strip()
+        script = f"""
+document.getElementById("nav-ray-dashboard").href = curOriginWithPort(8265);
+document.getElementById("openai-models").href = curOriginWithPort(8000) + "/v1/models";
+const modelKey = {json.dumps(model_key)};
+const meta = document.getElementById("meta");
+const spec = document.getElementById("spec");
+const eventsEl = document.getElementById("events");
+const example = document.getElementById("example");
+const replicas = document.getElementById("replicas");
+
+function pill(state) {{
+  const s = String(state || "");
+  if (s === "RUNNING") return `<span class="pill ok">${{s}}</span>`;
+  if (s === "FAILED") return `<span class="pill bad">${{s}}</span>`;
+  return `<span class="pill">${{s}}</span>`;
+}}
+
+function renderEvents(events) {{
+  if (!events || !events.length) return "<div class=muted>(none)</div>";
+  const rows = events.map(e => {{
+    const payload = (e.payload_json || "");
+    const short = String(payload).length > 240 ? String(payload).slice(0, 240) + "..." : String(payload);
+    return `<tr><td>${{e.created_at || ""}}</td><td><code>${{e.event_type}}</code></td><td><pre class=muted style=\\"margin:0\\">${{short}}</pre></td></tr>`;
+  }}).join("");
+  return `<table><thead><tr><th>Time</th><th>Type</th><th>Payload</th></tr></thead><tbody>${{rows}}</tbody></table>`;
+}}
+
+async function refresh() {{
+  meta.textContent = "Loading...";
+  spec.textContent = "(loading)";
+  eventsEl.textContent = "(loading)";
+  example.textContent = "(loading)";
+  try {{
+    const obj = await apiJson("/api/v2/serve/models/" + encodeURIComponent(modelKey));
+    const m = obj.model || {{}};
+    replicas.value = String(m.num_replicas || 1);
+    meta.innerHTML = `
+      <div class=row>
+        <div>state: ${{pill(m.state)}}</div>
+        <div class=muted>model_id: <code>${{m.model_id || ""}}</code></div>
+        <div class=muted>source: <code>${{m.model_source || ""}}</code></div>
+      </div>
+      <div class=muted>endpoint: <code>${{(m.endpoint && m.endpoint.openai_base_url) || (curOriginWithPort(8000) + "/v1")}}</code></div>
+    `;
+    spec.textContent = obj.resolved_spec_yaml || "";
+    eventsEl.innerHTML = renderEvents(obj.events || []);
+    const base = (m.endpoint && m.endpoint.openai_base_url) || (curOriginWithPort(8000) + "/v1");
+    const mid = m.model_id || "";
+    example.textContent = `curl -sS -H 'Content-Type: application/json' -H 'Authorization: Bearer FAKE_KEY' \\\\\\n  -X POST ${{base}}/chat/completions \\\\\\n  --data-binary '{{\\"model\\":\\"${{mid}}\\",\\"messages\\":[{{\\"role\\":\\"user\\",\\"content\\":\\"hello\\"}}],\\"max_tokens\\":16,\\"stream\\":false}}' | python3 -m json.tool`;
+  }} catch (e) {{
+    meta.textContent = "Error: " + (e.status || "") + "\\n" + (e.body || String(e));
+    spec.textContent = "";
+    eventsEl.textContent = "";
+    example.textContent = "";
+  }}
+}}
+
+document.getElementById("scale").onclick = async () => {{
+  const n = Number(replicas.value || "1");
+  if (!Number.isFinite(n) || n < 1) return;
+  try {{
+    await apiJson("/api/v2/serve/models/" + encodeURIComponent(modelKey), {{ method: "PATCH", headers: {{ "Content-Type": "application/json" }}, body: JSON.stringify({{ num_replicas: n }}) }});
+    await refresh();
+  }} catch (e) {{
+    meta.textContent = "Error: " + (e.status || "") + "\\n" + (e.body || String(e));
+  }}
+}};
+
+document.getElementById("delete").onclick = async () => {{
+  if (!confirm("Delete this model?")) return;
+  try {{
+    await apiJson("/api/v2/serve/models/" + encodeURIComponent(modelKey), {{ method: "DELETE" }});
+    await refresh();
+  }} catch (e) {{
+    meta.textContent = "Error: " + (e.status || "") + "\\n" + (e.body || String(e));
+  }}
+}};
+
+refresh();
+""".strip()
+        return HTMLResponse(content=_page("Model", "serving", body, script))
+
     @app.get("/ui/data")
     async def ui_data() -> HTMLResponse:
         body = """
diff --git a/src/mvp/py/tests/test_app_serving_api.py b/src/mvp/py/tests/test_app_serving_api.py
new file mode 100644
index 0000000..4f9167f
--- /dev/null
+++ b/src/mvp/py/tests/test_app_serving_api.py
@@ -0,0 +1,282 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+import yaml
+from fastapi.testclient import TestClient
+
+
+def _write_config(tmp_path: Path) -> Path:
+    cfg = {
+        "ray": {
+            "address": "http://127.0.0.1:8265",
+            "shared_root": "/private",
+            "entrypoint_resources": {"worker_node": 1},
+            "runtime_env": {"env_vars": {}},
+        },
+        "data": {
+            "user_root": str(tmp_path / "users"),
+        },
+        "service": {
+            "api": {"host": "127.0.0.1", "port": 0},
+            "auth": {"token_env": "MVP_INTERNAL_TOKEN"},
+            "sqlite": {"db_path": str(tmp_path / "mvp.sqlite3")},
+            "scheduler": {"tick_s": 1, "retry_interval_s": 1, "max_running_tasks": 1},
+        },
+        "serving": {
+            "serve": {"http_port": 8000, "proxy_location": "HeadOnly"},
+            "llm": {"accelerator_type": "H20"},
+        },
+    }
+    p = tmp_path / "cfg.yaml"
+    p.write_text(yaml.safe_dump(cfg), encoding="utf-8")
+    return p
+
+
+def test_serving_api_crud_flow(tmp_path: Path, monkeypatch):
+    from argus.service import app as app_mod
+
+    cfg_path = _write_config(tmp_path)
+    monkeypatch.setenv("MVP_INTERNAL_TOKEN", "admin-token")
+
+    class _Scheduler:
+        def __init__(self, **kwargs):
+            self.tool = object()
+
+        def run_forever(self, stop_flag):
+            return None
+
+    monkeypatch.setattr(app_mod, "Scheduler", _Scheduler)
+    monkeypatch.setattr(app_mod, "new_model_key", lambda user_id: f"mk-{user_id}")
+
+    app = app_mod.create_app(str(cfg_path))
+
+    admin_headers = {"authorization": "Bearer admin-token"}
+    with TestClient(app) as c:
+        r = c.post("/api/v2/users", headers=admin_headers, json={"user_id": "alice"})
+        assert r.status_code == 200
+        r2 = c.post("/api/v2/users/alice/tokens", headers=admin_headers)
+        assert r2.status_code == 200
+        user_token = r2.json()["token"]
+
+        headers = {"authorization": f"Bearer {user_token}"}
+
+        spec_yaml = (
+            "model_id: qwen-0.5b\n"
+            "model_source: $HOME/common/hf/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots/sha\n"
+            "num_replicas: 1\n"
+            "gpus_per_replica: 1\n"
+        )
+        r3 = c.post("/api/v2/serve/models", headers=headers, data=spec_yaml)
+        assert r3.status_code == 200
+        assert r3.json()["model_key"] == "mk-alice"
+        assert r3.json()["state"] == "QUEUED"
+
+        r4 = c.get("/api/v2/serve/models?limit=10&offset=0", headers=headers)
+        assert r4.status_code == 200
+        obj = r4.json()
+        assert obj["openai_base_url"] == "http://testserver:8000/v1"
+        assert len(obj["items"]) == 1
+        assert obj["items"][0]["model_key"] == "mk-alice"
+
+        r5 = c.get("/api/v2/serve/models/mk-alice", headers=headers)
+        assert r5.status_code == 200
+        detail = r5.json()
+        assert detail["model"]["model_key"] == "mk-alice"
+        assert "model_id_prefix" in detail["model"]
+        assert "resolved_spec_yaml" in detail
+        assert isinstance(detail.get("events"), list)
+
+        r6 = c.patch("/api/v2/serve/models/mk-alice", headers=headers, json={"num_replicas": 2})
+        assert r6.status_code == 200
+        assert r6.json()["state"] == "QUEUED"
+
+        r7 = c.delete("/api/v2/serve/models/mk-alice", headers=headers)
+        assert r7.status_code == 200
+        assert r7.json()["state"] == "DELETING"
+
+        # Admin status endpoint
+        r8 = c.get("/api/v2/serve/status", headers=admin_headers)
+        assert r8.status_code == 200
+        assert r8.json()["http_port"] == 8000
+
+
+def test_serving_api_rejects_path_outside_user_and_hf(tmp_path: Path, monkeypatch):
+    from argus.service import app as app_mod
+
+    cfg_path = _write_config(tmp_path)
+    monkeypatch.setenv("MVP_INTERNAL_TOKEN", "admin-token")
+
+    class _Scheduler:
+        def __init__(self, **kwargs):
+            self.tool = object()
+
+        def run_forever(self, stop_flag):
+            return None
+
+    monkeypatch.setattr(app_mod, "Scheduler", _Scheduler)
+    monkeypatch.setattr(app_mod, "new_model_key", lambda user_id: f"mk-{user_id}")
+
+    app = app_mod.create_app(str(cfg_path))
+
+    admin_headers = {"authorization": "Bearer admin-token"}
+    with TestClient(app) as c:
+        c.post("/api/v2/users", headers=admin_headers, json={"user_id": "alice"})
+        r2 = c.post("/api/v2/users/alice/tokens", headers=admin_headers)
+        user_token = r2.json()["token"]
+        headers = {"authorization": f"Bearer {user_token}"}
+
+        spec_yaml = (
+            "model_id: x\n"
+            "model_source: /private/users/bob/models/evil\n"
+            "num_replicas: 1\n"
+            "gpus_per_replica: 1\n"
+        )
+        r3 = c.post("/api/v2/serve/models", headers=headers, data=spec_yaml)
+        assert r3.status_code == 403
+
+
+def test_serving_api_invalid_yaml_and_non_mapping(tmp_path: Path, monkeypatch):
+    from argus.service import app as app_mod
+
+    cfg_path = _write_config(tmp_path)
+    monkeypatch.setenv("MVP_INTERNAL_TOKEN", "admin-token")
+
+    class _Scheduler:
+        def __init__(self, **kwargs):
+            self.tool = object()
+
+        def run_forever(self, stop_flag):
+            return None
+
+    monkeypatch.setattr(app_mod, "Scheduler", _Scheduler)
+    monkeypatch.setattr(app_mod, "new_model_key", lambda user_id: f"mk-{user_id}")
+    app = app_mod.create_app(str(cfg_path))
+
+    with TestClient(app) as c:
+        # Create a user token
+        admin_headers = {"authorization": "Bearer admin-token"}
+        c.post("/api/v2/users", headers=admin_headers, json={"user_id": "alice"})
+        token = c.post("/api/v2/users/alice/tokens", headers=admin_headers).json()["token"]
+        headers = {"authorization": f"Bearer {token}"}
+
+        r = c.post("/api/v2/serve/models", headers=headers, data=": bad\n")
+        assert r.status_code == 400
+
+        r2 = c.post("/api/v2/serve/models", headers=headers, data="- 1\n- 2\n")
+        assert r2.status_code == 400
+
+
+def test_serving_api_engine_kwargs_binary_rejected(tmp_path: Path, monkeypatch):
+    """
+    yaml !!binary is parsed as bytes, which is not JSON-serializable.
+    """
+    from argus.service import app as app_mod
+
+    cfg_path = _write_config(tmp_path)
+    monkeypatch.setenv("MVP_INTERNAL_TOKEN", "admin-token")
+
+    class _Scheduler:
+        def __init__(self, **kwargs):
+            self.tool = object()
+
+        def run_forever(self, stop_flag):
+            return None
+
+    monkeypatch.setattr(app_mod, "Scheduler", _Scheduler)
+    monkeypatch.setattr(app_mod, "new_model_key", lambda user_id: f"mk-{user_id}")
+    app = app_mod.create_app(str(cfg_path))
+
+    admin_headers = {"authorization": "Bearer admin-token"}
+    with TestClient(app) as c:
+        c.post("/api/v2/users", headers=admin_headers, json={"user_id": "alice"})
+        token = c.post("/api/v2/users/alice/tokens", headers=admin_headers).json()["token"]
+        headers = {"authorization": f"Bearer {token}"}
+
+        spec_yaml = (
+            "model_id: x\n"
+            "model_source: $HOME/common/hf/x\n"
+            "engine_kwargs:\n"
+            "  blob: !!binary \"AQID\"\n"
+        )
+        r = c.post("/api/v2/serve/models", headers=headers, data=spec_yaml)
+        assert r.status_code == 400
+
+
+def test_serving_api_list_include_deleted_and_forwarded_base_url(tmp_path: Path, monkeypatch):
+    from argus.service import app as app_mod
+    from argus.service.config import V2Config
+    from argus.service.db import Db
+
+    cfg_path = _write_config(tmp_path)
+    monkeypatch.setenv("MVP_INTERNAL_TOKEN", "admin-token")
+
+    class _Scheduler:
+        def __init__(self, **kwargs):
+            self.tool = object()
+
+        def run_forever(self, stop_flag):
+            return None
+
+    monkeypatch.setattr(app_mod, "Scheduler", _Scheduler)
+    keys = iter(["mk-alice-1", "mk-alice-2"])
+    monkeypatch.setattr(app_mod, "new_model_key", lambda user_id: next(keys))
+
+    app = app_mod.create_app(str(cfg_path))
+
+    admin_headers = {"authorization": "Bearer admin-token"}
+    with TestClient(app) as c:
+        c.post("/api/v2/users", headers=admin_headers, json={"user_id": "alice"})
+        token = c.post("/api/v2/users/alice/tokens", headers=admin_headers).json()["token"]
+        headers = {"authorization": f"Bearer {token}"}
+
+        spec_yaml = "model_id: x\nmodel_source: $HOME/common/hf/x\n"
+        c.post("/api/v2/serve/models", headers=headers, data=spec_yaml)
+        c.post("/api/v2/serve/models", headers=headers, data=spec_yaml)
+
+        # Mark one model as DELETED directly in DB (sets deleted_at).
+        root = yaml.safe_load(cfg_path.read_text(encoding="utf-8"))
+        v2_cfg = V2Config.from_root_dict(root)
+        db = Db(v2_cfg.sqlite.db_path)
+        db.set_serve_model_state(model_key="mk-alice-2", state="DELETED")
+
+        r1 = c.get(
+            "/api/v2/serve/models?limit=10&offset=0&include_deleted=0",
+            headers={**headers, "x-forwarded-host": "example.com:8080", "x-forwarded-proto": "https"},
+        )
+        assert r1.status_code == 200
+        assert r1.json()["openai_base_url"] == "https://example.com:8000/v1"
+        assert {m["model_key"] for m in r1.json()["items"]} == {"mk-alice-1"}
+
+        r2 = c.get("/api/v2/serve/models?include_deleted=1", headers=headers)
+        assert r2.status_code == 200
+        assert {m["model_key"] for m in r2.json()["items"]} == {"mk-alice-1", "mk-alice-2"}
+
+
+def test_serving_api_patch_invalid_num_replicas(tmp_path: Path, monkeypatch):
+    from argus.service import app as app_mod
+
+    cfg_path = _write_config(tmp_path)
+    monkeypatch.setenv("MVP_INTERNAL_TOKEN", "admin-token")
+
+    class _Scheduler:
+        def __init__(self, **kwargs):
+            self.tool = object()
+
+        def run_forever(self, stop_flag):
+            return None
+
+    monkeypatch.setattr(app_mod, "Scheduler", _Scheduler)
+    monkeypatch.setattr(app_mod, "new_model_key", lambda user_id: "mk-alice")
+
+    app = app_mod.create_app(str(cfg_path))
+
+    admin_headers = {"authorization": "Bearer admin-token"}
+    with TestClient(app) as c:
+        c.post("/api/v2/users", headers=admin_headers, json={"user_id": "alice"})
+        token = c.post("/api/v2/users/alice/tokens", headers=admin_headers).json()["token"]
+        headers = {"authorization": f"Bearer {token}"}
+
+        c.post("/api/v2/serve/models", headers=headers, data="model_id: x\nmodel_source: $HOME/common/hf/x\n")
+        r = c.patch("/api/v2/serve/models/mk-alice", headers=headers, json={"num_replicas": 0})
+        assert r.status_code == 422
diff --git a/src/mvp/py/tests/test_db_serving.py b/src/mvp/py/tests/test_db_serving.py
new file mode 100644
index 0000000..1a1c829
--- /dev/null
+++ b/src/mvp/py/tests/test_db_serving.py
@@ -0,0 +1,79 @@
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+
+def test_db_serving_model_crud_and_events(tmp_path: Path) -> None:
+    from argus.service.db import Db
+
+    db = Db(str(tmp_path / "mvp.sqlite3"))
+    db.init()
+
+    m1 = db.create_serve_model(
+        model_key="svc-001",
+        user_id="alice",
+        model_id_suffix="qwen-0.5b",
+        model_id_prefix="alice-202601061235",
+        model_id="alice-202601061235-qwen-0.5b",
+        model_source="/private/hf/hub/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots/sha",
+        num_replicas=1,
+        gpus_per_replica=1,
+        engine_kwargs_json=json.dumps({"max_model_len": 8192}),
+        spec_yaml="model_id: qwen-0.5b\nmodel_source: $HOME/common/hf/...\n",
+        resolved_spec_yaml="model_id: alice-202601061235-qwen-0.5b\nmodel_source: /private/hf/...\n",
+    )
+    assert m1["model_key"] == "svc-001"
+    assert m1["state"] == "QUEUED"
+
+    # Same suffix may be created again; model_key is the identity.
+    m2 = db.create_serve_model(
+        model_key="svc-002",
+        user_id="alice",
+        model_id_suffix="qwen-0.5b",
+        model_id_prefix="alice-202601061236",
+        model_id="alice-202601061236-qwen-0.5b",
+        model_source="/private/hf/hub/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots/sha",
+        num_replicas=1,
+        gpus_per_replica=2,
+        engine_kwargs_json=None,
+        spec_yaml="model_id: qwen-0.5b\nmodel_source: $HOME/common/hf/...\n",
+        resolved_spec_yaml="model_id: alice-202601061236-qwen-0.5b\nmodel_source: /private/hf/...\n",
+    )
+    assert m2["model_key"] == "svc-002"
+    assert m2["model_id"] != m1["model_id"]
+
+    got = db.get_serve_model("svc-001")
+    assert got is not None
+    assert got["gpus_per_replica"] == 1
+
+    items = db.list_serve_models(user_id="alice")
+    assert {i["model_key"] for i in items} == {"svc-001", "svc-002"}
+
+    # State transition writes a serve event.
+    db.set_serve_model_state(model_key="svc-001", state="DEPLOYING")
+    got2 = db.get_serve_model("svc-001")
+    assert got2 is not None
+    assert got2["state"] == "DEPLOYING"
+
+    events = db.list_serve_events("svc-001", limit=50)
+    assert len(events) >= 2
+    assert {e["event_type"] for e in events}.issuperset({"SERVE_MODEL_CREATED", "SERVE_STATE_UPDATE"})
+
+    # Reconciler pick: QUEUED/DELETING only.
+    picked = db.pick_next_runnable_serve_change()
+    assert picked is not None
+    assert picked["state"] == "QUEUED"
+
+    db.set_serve_model_state(model_key="svc-002", state="DELETING")
+    picked2 = db.pick_next_runnable_serve_change()
+    assert picked2 is not None
+    assert picked2["state"] in ("QUEUED", "DELETING")
+
+    # Deleted models are hidden unless include_deleted.
+    db.set_serve_model_state(model_key="svc-002", state="DELETED")
+    items2 = db.list_serve_models(user_id="alice", include_deleted=False)
+    assert {i["model_key"] for i in items2} == {"svc-001"}
+    items3 = db.list_serve_models(user_id="alice", include_deleted=True)
+    assert {i["model_key"] for i in items3} == {"svc-001", "svc-002"}
+
diff --git a/src/mvp/py/tests/test_ids.py b/src/mvp/py/tests/test_ids.py
index ad2a8fe..fd422e8 100644
--- a/src/mvp/py/tests/test_ids.py
+++ b/src/mvp/py/tests/test_ids.py
@@ -44,3 +44,32 @@ def test_attempt_submission_id_format():
 
     assert attempt_submission_id("t", 1) == "t--a01"
     assert attempt_submission_id("t", 12) == "t--a12"
+
+
+def test_new_model_key_includes_user(monkeypatch):
+    import argus.core.ids as ids
+
+    class _FakeDatetime:
+        @staticmethod
+        def now():
+            class _DT:
+                def strftime(self, fmt: str) -> str:
+                    assert fmt == "%Y%m%d-%H%M%S"
+                    return "20250101-010203"
+
+            return _DT()
+
+    monkeypatch.setattr(ids, "datetime", _FakeDatetime)
+    monkeypatch.setattr(ids.secrets, "token_hex", lambda n: "abcd")
+
+    assert ids.new_model_key(user_id="Alice_01") == "mvp2-alice_01-serve-20250101-010203-abcd"
+
+
+def test_new_model_key_requires_user_id():
+    from argus.core.ids import new_model_key
+
+    try:
+        new_model_key(user_id="")
+        assert False, "expected ValueError"
+    except ValueError as e:
+        assert "user_id is required" in str(e)
diff --git a/src/mvp/py/tests/test_llm_config_builder.py b/src/mvp/py/tests/test_llm_config_builder.py
new file mode 100644
index 0000000..78aeac7
--- /dev/null
+++ b/src/mvp/py/tests/test_llm_config_builder.py
@@ -0,0 +1,78 @@
+from __future__ import annotations
+
+import pytest
+
+
+def test_build_llm_config_dict_maps_tp_and_bundles():
+    from argus.service.serve_llm_config import build_llm_config_dict
+    from argus.service.serving_spec import ResolvedServingSpec
+
+    resolved = ResolvedServingSpec(
+        user_id="alice",
+        model_id_suffix="qwen-0.5b",
+        model_id_prefix="alice-202601061235",
+        model_id="alice-202601061235-qwen-0.5b",
+        model_source="/private/hf/x",
+        num_replicas=2,
+        gpus_per_replica=4,
+        engine_kwargs={"gpu_memory_utilization": 0.9},
+    )
+
+    cfg = build_llm_config_dict(
+        resolved,
+        accelerator_type="H20",
+        runtime_env_env_vars={"HF_ENDPOINT": "https://hf-mirror.com"},
+        cpu_per_gpu=2.0,
+    )
+    assert cfg["model_loading_config"]["model_id"] == "alice-202601061235-qwen-0.5b"
+    assert cfg["model_loading_config"]["model_source"] == "/private/hf/x"
+    assert cfg["accelerator_type"] == "H20"
+    assert cfg["deployment_config"]["num_replicas"] == 2
+
+    # gpus_per_replica -> tensor_parallel_size
+    assert cfg["engine_kwargs"]["tensor_parallel_size"] == 4
+    assert cfg["engine_kwargs"]["gpu_memory_utilization"] == 0.9
+
+    # resources_per_bundle reserves the full TP GPU set for each replica.
+    bundle = cfg["resources_per_bundle"]
+    assert bundle["GPU"] == 4.0
+    assert bundle["CPU"] == 8.0
+
+
+def test_build_llm_config_dict_injects_hf_offline_defaults():
+    from argus.service.serve_llm_config import build_llm_config_dict
+    from argus.service.serving_spec import ResolvedServingSpec
+
+    resolved = ResolvedServingSpec(
+        user_id="alice",
+        model_id_suffix="x",
+        model_id_prefix="alice-202601061235",
+        model_id="alice-202601061235-x",
+        model_source="/private/users/alice/models/x",
+        num_replicas=1,
+        gpus_per_replica=1,
+        engine_kwargs=None,
+    )
+    cfg = build_llm_config_dict(resolved, accelerator_type="H20", runtime_env_env_vars={})
+    env = cfg["runtime_env"]["env_vars"]
+    assert env["HF_HUB_OFFLINE"] == "1"
+    assert env["HF_HOME"] == "/private/hf"
+    assert env["HUGGINGFACE_HUB_CACHE"].startswith("/private/hf/")
+
+
+def test_build_llm_config_dict_requires_accelerator_type():
+    from argus.service.serve_llm_config import build_llm_config_dict
+    from argus.service.serving_spec import ResolvedServingSpec
+
+    resolved = ResolvedServingSpec(
+        user_id="alice",
+        model_id_suffix="x",
+        model_id_prefix="alice-202601061235",
+        model_id="alice-202601061235-x",
+        model_source="/private/hf/x",
+        num_replicas=1,
+        gpus_per_replica=1,
+        engine_kwargs=None,
+    )
+    with pytest.raises(ValueError, match="accelerator_type is required"):
+        build_llm_config_dict(resolved, accelerator_type="", runtime_env_env_vars={})
diff --git a/src/mvp/py/tests/test_serve_client.py b/src/mvp/py/tests/test_serve_client.py
new file mode 100644
index 0000000..1f79d60
--- /dev/null
+++ b/src/mvp/py/tests/test_serve_client.py
@@ -0,0 +1,55 @@
+from __future__ import annotations
+
+import sys
+import types
+
+
+def test_ray_serve_client_calls_start_run_status(monkeypatch):
+    import ray  # provided by conftest stub
+
+    calls: list[tuple[str, object]] = []
+
+    def _init(*args, **kwargs):
+        calls.append(("ray.init", {"args": args, "kwargs": kwargs}))
+
+    monkeypatch.setattr(ray, "init", _init, raising=False)
+
+    serve = types.ModuleType("ray.serve")
+
+    def _start(**kwargs):
+        calls.append(("serve.start", kwargs))
+        return None
+
+    def _run(app, name=None, route_prefix=None):
+        calls.append(("serve.run", {"app": app, "name": name, "route_prefix": route_prefix}))
+        return {"deployed": True}
+
+    def _status():
+        calls.append(("serve.status", None))
+        return {"ok": True}
+
+    serve.start = _start  # type: ignore[attr-defined]
+    serve.run = _run  # type: ignore[attr-defined]
+    serve.status = _status  # type: ignore[attr-defined]
+
+    sys.modules["ray.serve"] = serve
+    ray.serve = serve  # type: ignore[attr-defined]
+
+    from argus.service.serve_client import RayServeClient
+
+    client = RayServeClient(http_port=8000, proxy_location="HeadOnly", ray_init_address="auto")
+    client.ensure_started()
+    out = client.apply_app(app="APP", app_name="argus_llm_app", route_prefix="/")
+    st = client.get_status()
+
+    assert out == {"deployed": True}
+    assert st == {"ok": True}
+
+    # Verify call order and key args.
+    assert calls[0][0] == "ray.init"
+    assert calls[0][1]["kwargs"].get("ignore_reinit_error") is True
+    assert calls[1][0] == "serve.start"
+    assert calls[1][1]["http_options"]["port"] == 8000
+    assert calls[2][0] == "serve.run"
+    assert calls[2][1]["name"] == "argus_llm_app"
+    assert calls[3][0] == "serve.status"
diff --git a/src/mvp/py/tests/test_service_config.py b/src/mvp/py/tests/test_service_config.py
index d75b93f..219dfd2 100644
--- a/src/mvp/py/tests/test_service_config.py
+++ b/src/mvp/py/tests/test_service_config.py
@@ -23,6 +23,7 @@ def test_v2_config_from_root_dict_new_format_defaults():
     assert cfg.sqlite.db_path.endswith(".sqlite3")
     assert cfg.scheduler.max_running_tasks == 3
     assert cfg.tracking.wandb.enabled is False
+    assert cfg.serving.enabled is False
 
 
 def test_v2_config_backward_compat_v2_section_and_default_db_path():
@@ -57,6 +58,27 @@ def test_v2_config_requires_data_mappings():
         V2Config.from_root_dict({**base, "data": {"sftpgo": ["x"], "retention": {}}})
 
 
+def test_v2_config_requires_tracking_and_serving_mappings():
+    from argus.service.config import V2Config
+
+    base = {
+        "ray": {"shared_root": "/private"},
+        "service": {"api": {}, "auth": {}, "sqlite": {}, "scheduler": {}},
+        "data": {"sftpgo": {}, "retention": {}},
+    }
+
+    with pytest.raises(ValueError, match="config\\.tracking must be a mapping"):
+        V2Config.from_root_dict({**base, "tracking": ["nope"]})
+
+    with pytest.raises(ValueError, match="config\\.tracking\\.wandb must be a mapping"):
+        V2Config.from_root_dict({**base, "tracking": {"wandb": ["nope"]}})
+
+    with pytest.raises(ValueError, match="config\\.serving must be a mapping"):
+        V2Config.from_root_dict({**base, "serving": ["nope"]})
+
+    with pytest.raises(ValueError, match="config\\.serving\\.\\{serve,llm\\} must be mappings"):
+        V2Config.from_root_dict({**base, "serving": {"serve": ["x"], "llm": {}}})
+
 def test_tracking_wandb_defaults_disabled():
     from argus.service.config import V2Config
 
diff --git a/src/mvp/py/tests/test_serving_model_id_prefix.py b/src/mvp/py/tests/test_serving_model_id_prefix.py
new file mode 100644
index 0000000..a7790d5
--- /dev/null
+++ b/src/mvp/py/tests/test_serving_model_id_prefix.py
@@ -0,0 +1,23 @@
+from __future__ import annotations
+
+from datetime import datetime, timezone
+
+import pytest
+
+from argus.service.serving_spec import make_model_id_prefix
+
+
+def test_make_model_id_prefix_uses_utc_minutes():
+    dt = datetime(2026, 1, 6, 12, 35, 59, tzinfo=timezone.utc)
+    assert make_model_id_prefix(user_id="alice", now_utc=dt) == "alice-202601061235"
+
+
+def test_make_model_id_prefix_rejects_empty_user_id():
+    with pytest.raises(ValueError, match="user_id is required"):
+        make_model_id_prefix(user_id="", now_utc=datetime.now(timezone.utc))
+
+
+def test_make_model_id_prefix_rejects_slash():
+    with pytest.raises(ValueError, match="must not contain"):
+        make_model_id_prefix(user_id="bad/user", now_utc=datetime.now(timezone.utc))
+
diff --git a/src/mvp/py/tests/test_serving_reconciler.py b/src/mvp/py/tests/test_serving_reconciler.py
new file mode 100644
index 0000000..0859f20
--- /dev/null
+++ b/src/mvp/py/tests/test_serving_reconciler.py
@@ -0,0 +1,207 @@
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+
+class _FakeServeClient:
+    def __init__(self):
+        self.started = 0
+        self.applied = []
+        self.status_calls = 0
+        self.fail_apply = False
+        self.fail_status = False
+
+    def ensure_started(self) -> None:
+        self.started += 1
+
+    def apply_app(self, *, app, app_name: str, route_prefix: str = "/"):
+        if self.fail_apply:
+            raise RuntimeError("boom")
+        self.applied.append({"app": app, "app_name": app_name, "route_prefix": route_prefix})
+        return {"ok": True}
+
+    def get_status(self):
+        self.status_calls += 1
+        if self.fail_status:
+            raise RuntimeError("status boom")
+        return {"ok": True}
+
+
+def _seed_model(db, *, model_key: str, user_id: str, state: str, num_replicas: int = 1, gpus_per_replica: int = 1):
+    spec_yaml = "model_id: x\nmodel_source: $HOME/common/hf/x\n"
+    resolved_yaml = f"user_id: {user_id}\nmodel_id: {user_id}-202601061235-x\n"
+    db.create_serve_model(
+        model_key=model_key,
+        user_id=user_id,
+        model_id_suffix="x",
+        model_id_prefix=f"{user_id}-202601061235",
+        model_id=f"{user_id}-202601061235-x",
+        model_source="/private/hf/x",
+        num_replicas=num_replicas,
+        gpus_per_replica=gpus_per_replica,
+        engine_kwargs_json=json.dumps({"gpu_memory_utilization": 0.9}),
+        spec_yaml=spec_yaml,
+        resolved_spec_yaml=resolved_yaml,
+    )
+    db.set_serve_model_state(model_key=model_key, state=state, event_type="TEST_SEED")
+
+
+def test_reconciler_skips_when_no_changes(tmp_path: Path):
+    from argus.service.config import V2Config
+    from argus.service.db import Db
+    from argus.service.serving_reconciler import ServingReconciler
+
+    root = {
+        "ray": {"shared_root": "/private"},
+        "service": {"api": {}, "auth": {}, "sqlite": {"db_path": str(tmp_path / "mvp.sqlite3")}, "scheduler": {}},
+        "data": {"sftpgo": {}, "retention": {}},
+        "serving": {"serve": {"http_port": 8000}, "llm": {"accelerator_type": "H20"}},
+    }
+    cfg = V2Config.from_root_dict(root)
+    db = Db(cfg.sqlite.db_path)
+    db.init()
+
+    client = _FakeServeClient()
+    rec = ServingReconciler(db=db, v2_cfg=cfg, ray_runtime_env_env_vars={}, serve_client=client, get_available_fn=lambda: type("A", (), {"total_available_gpus": 8, "total_available_npus": 0})())
+    rec.tick()
+    assert client.started == 0
+    assert client.applied == []
+
+
+def test_reconciler_pending_resources_no_apply(tmp_path: Path):
+    from argus.service.config import V2Config
+    from argus.service.db import Db
+    from argus.service.serving_reconciler import ServingReconciler
+
+    cfg = V2Config.from_root_dict(
+        {
+            "ray": {"shared_root": "/private"},
+            "service": {"api": {}, "auth": {}, "sqlite": {"db_path": str(tmp_path / "mvp.sqlite3")}, "scheduler": {}},
+            "data": {"sftpgo": {}, "retention": {}},
+            "serving": {"serve": {"http_port": 8000}, "llm": {"accelerator_type": "H20"}},
+        }
+    )
+    db = Db(cfg.sqlite.db_path)
+    db.init()
+    _seed_model(db, model_key="mk1", user_id="alice", state="QUEUED", num_replicas=2, gpus_per_replica=4)
+
+    client = _FakeServeClient()
+    rec = ServingReconciler(
+        db=db,
+        v2_cfg=cfg,
+        ray_runtime_env_env_vars={},
+        serve_client=client,
+        get_available_fn=lambda: type("A", (), {"total_available_gpus": 1, "total_available_npus": 0})(),
+    )
+    rec.tick()
+    # Serve may be started even when resources are insufficient, but apply should not happen.
+    assert client.started == 1
+    assert client.applied == []
+    # State remains QUEUED.
+    row = db.get_serve_model("mk1")
+    assert row and row["state"] == "QUEUED"
+    ev = db.list_serve_events("mk1", limit=50)
+    assert any(e["event_type"] == "SERVE_PENDING_RESOURCES" for e in ev)
+
+
+def test_reconciler_apply_success_marks_running(tmp_path: Path):
+    from argus.service.config import V2Config
+    from argus.service.db import Db
+    from argus.service.serving_reconciler import ServingReconciler
+
+    cfg = V2Config.from_root_dict(
+        {
+            "ray": {"shared_root": "/private"},
+            "service": {"api": {}, "auth": {}, "sqlite": {"db_path": str(tmp_path / "mvp.sqlite3")}, "scheduler": {}},
+            "data": {"sftpgo": {}, "retention": {}},
+            "serving": {"serve": {"http_port": 8000}, "llm": {"accelerator_type": "H20"}},
+        }
+    )
+    db = Db(cfg.sqlite.db_path)
+    db.init()
+    _seed_model(db, model_key="mk1", user_id="alice", state="QUEUED", num_replicas=1, gpus_per_replica=1)
+
+    client = _FakeServeClient()
+    rec = ServingReconciler(
+        db=db,
+        v2_cfg=cfg,
+        ray_runtime_env_env_vars={"HF_ENDPOINT": "https://hf-mirror.com"},
+        serve_client=client,
+        get_available_fn=lambda: type("A", (), {"total_available_gpus": 8, "total_available_npus": 0})(),
+    )
+    rec.tick()
+    assert client.started == 1
+    assert len(client.applied) == 1
+    applied = client.applied[0]["app"]["llm_configs"]
+    assert applied[0]["engine_kwargs"]["tensor_parallel_size"] == 1
+    assert applied[0]["runtime_env"]["env_vars"]["HF_HUB_OFFLINE"] == "1"
+    row = db.get_serve_model("mk1")
+    assert row and row["state"] == "RUNNING"
+
+
+def test_reconciler_delete_removes_and_marks_deleted(tmp_path: Path):
+    from argus.service.config import V2Config
+    from argus.service.db import Db
+    from argus.service.serving_reconciler import ServingReconciler
+
+    cfg = V2Config.from_root_dict(
+        {
+            "ray": {"shared_root": "/private"},
+            "service": {"api": {}, "auth": {}, "sqlite": {"db_path": str(tmp_path / "mvp.sqlite3")}, "scheduler": {}},
+            "data": {"sftpgo": {}, "retention": {}},
+            "serving": {"serve": {"http_port": 8000}, "llm": {"accelerator_type": "H20"}},
+        }
+    )
+    db = Db(cfg.sqlite.db_path)
+    db.init()
+    _seed_model(db, model_key="keep", user_id="alice", state="RUNNING", num_replicas=1, gpus_per_replica=1)
+    _seed_model(db, model_key="del", user_id="alice", state="DELETING", num_replicas=1, gpus_per_replica=1)
+
+    client = _FakeServeClient()
+    rec = ServingReconciler(
+        db=db,
+        v2_cfg=cfg,
+        ray_runtime_env_env_vars={},
+        serve_client=client,
+        get_available_fn=lambda: type("A", (), {"total_available_gpus": 8, "total_available_npus": 0})(),
+    )
+    rec.tick()
+    assert len(client.applied) == 1
+    cfgs = client.applied[0]["app"]["llm_configs"]
+    assert {c["model_loading_config"]["model_id"] for c in cfgs} == {"alice-202601061235-x"}  # only keep remains
+    row = db.get_serve_model("del")
+    assert row and row["state"] == "DELETED"
+    assert row.get("deleted_at")
+
+
+def test_reconciler_apply_failure_marks_failed(tmp_path: Path):
+    from argus.service.config import V2Config
+    from argus.service.db import Db
+    from argus.service.serving_reconciler import ServingReconciler
+
+    cfg = V2Config.from_root_dict(
+        {
+            "ray": {"shared_root": "/private"},
+            "service": {"api": {}, "auth": {}, "sqlite": {"db_path": str(tmp_path / "mvp.sqlite3")}, "scheduler": {}},
+            "data": {"sftpgo": {}, "retention": {}},
+            "serving": {"serve": {"http_port": 8000}, "llm": {"accelerator_type": "H20"}},
+        }
+    )
+    db = Db(cfg.sqlite.db_path)
+    db.init()
+    _seed_model(db, model_key="mk1", user_id="alice", state="QUEUED")
+
+    client = _FakeServeClient()
+    client.fail_apply = True
+    rec = ServingReconciler(
+        db=db,
+        v2_cfg=cfg,
+        ray_runtime_env_env_vars={},
+        serve_client=client,
+        get_available_fn=lambda: type("A", (), {"total_available_gpus": 8, "total_available_npus": 0})(),
+    )
+    rec.tick()
+    row = db.get_serve_model("mk1")
+    assert row and row["state"] == "FAILED"
+    assert row.get("error_summary")
diff --git a/src/mvp/py/tests/test_serving_spec_paths.py b/src/mvp/py/tests/test_serving_spec_paths.py
new file mode 100644
index 0000000..2e6e903
--- /dev/null
+++ b/src/mvp/py/tests/test_serving_spec_paths.py
@@ -0,0 +1,47 @@
+from __future__ import annotations
+
+from datetime import datetime, timezone
+
+import pytest
+
+from argus.service.serving_spec import ServingSpec, resolve_serving_spec
+
+
+def test_expand_home_macro_and_validate_user_path_ok():
+    spec = ServingSpec(
+        model_id="qwen-0.5b",
+        model_source="$HOME/models/my_model",
+        num_replicas=1,
+        gpus_per_replica=1,
+    )
+    r = resolve_serving_spec(spec=spec, user_id="alice", now_utc=datetime(2026, 1, 6, 12, 35, tzinfo=timezone.utc))
+    assert r.model_source == "/private/users/alice/models/my_model"
+    assert r.model_id == "alice-202601061235-qwen-0.5b"
+
+
+def test_expand_common_hf_macro_ok():
+    spec = ServingSpec(
+        model_id="qwen-0.5b",
+        model_source="$HOME/common/hf/hub/models--Qwen--Qwen2.5/snapshots/abc",
+        num_replicas=1,
+        gpus_per_replica=1,
+    )
+    r = resolve_serving_spec(spec=spec, user_id="alice", now_utc=datetime(2026, 1, 6, 12, 35, tzinfo=timezone.utc))
+    assert r.model_source.startswith("/private/hf/")
+
+
+@pytest.mark.parametrize(
+    "src",
+    [
+        "/etc/passwd",
+        "relative/path",
+        "/private/users/bob/models/x",
+        "/private/users/alice/../bob/x",
+        "/private/common/hf/x",
+    ],
+)
+def test_model_source_path_rejected(src: str):
+    spec = ServingSpec(model_id="qwen-0.5b", model_source=src, num_replicas=1, gpus_per_replica=1)
+    with pytest.raises((ValueError, PermissionError)):
+        resolve_serving_spec(spec=spec, user_id="alice", now_utc=datetime(2026, 1, 6, 12, 35, tzinfo=timezone.utc))
+
diff --git a/src/mvp/py/tests/test_serving_spec_validation.py b/src/mvp/py/tests/test_serving_spec_validation.py
new file mode 100644
index 0000000..066cc45
--- /dev/null
+++ b/src/mvp/py/tests/test_serving_spec_validation.py
@@ -0,0 +1,72 @@
+from __future__ import annotations
+
+import pytest
+
+from argus.service.serving_spec import ServingSpec, parse_serving_spec, validate_model_id_suffix
+
+
+@pytest.mark.parametrize(
+    "suffix",
+    [
+        "a",
+        "qwen-0.5b",
+        "Qwen2.5-0.5B",
+        "a_b",
+        "a.b-c",
+        "a" * 64,
+    ],
+)
+def test_validate_model_id_suffix_accepts(suffix: str):
+    validate_model_id_suffix(suffix)
+
+
+@pytest.mark.parametrize(
+    "suffix",
+    [
+        "",
+        " a",
+        "a ",
+        "-bad",
+        ".bad",
+        "bad/",
+        "bad..",
+        "bad\n",
+        "bad\t",
+        "a" * 65,
+    ],
+)
+def test_validate_model_id_suffix_rejects(suffix: str):
+    with pytest.raises(ValueError):
+        validate_model_id_suffix(suffix)
+
+
+def test_parse_serving_spec_smoke_defaults():
+    spec = parse_serving_spec(
+        {
+            "model_id": "qwen-0.5b",
+            "model_source": "/private/hf/x",
+        }
+    )
+    assert isinstance(spec, ServingSpec)
+    assert spec.num_replicas == 1
+    assert spec.gpus_per_replica == 1
+    assert spec.engine_kwargs is None
+
+
+def test_parse_serving_spec_rejects_missing_fields():
+    with pytest.raises(ValueError, match="missing required field: model_id"):
+        parse_serving_spec({"model_source": "/private/hf/x"})
+    with pytest.raises(ValueError, match="missing required field: model_source"):
+        parse_serving_spec({"model_id": "x"})
+
+
+def test_parse_serving_spec_rejects_bad_types():
+    with pytest.raises(ValueError, match="serving spec must be a mapping"):
+        parse_serving_spec(["nope"])
+    with pytest.raises(ValueError, match="num_replicas"):
+        parse_serving_spec({"model_id": "x", "model_source": "/private/hf/x", "num_replicas": 0})
+    with pytest.raises(ValueError, match="gpus_per_replica"):
+        parse_serving_spec({"model_id": "x", "model_source": "/private/hf/x", "gpus_per_replica": 0})
+    with pytest.raises(ValueError, match="engine_kwargs"):
+        parse_serving_spec({"model_id": "x", "model_source": "/private/hf/x", "engine_kwargs": "nope"})
+
diff --git a/src/mvp/py/tests/test_ui.py b/src/mvp/py/tests/test_ui.py
index 7b86cf7..111f4a6 100644
--- a/src/mvp/py/tests/test_ui.py
+++ b/src/mvp/py/tests/test_ui.py
@@ -42,10 +42,13 @@ def test_ui_routes_render_200(tmp_path, monkeypatch):
         "/ui/login",
         "/ui/tasks",
         "/ui/tasks/new",
+        "/ui/serving",
+        "/ui/serving/new",
         "/ui/data",
         "/ui/admin",
         "/ui/tasks/any-task-id",
         "/ui/tasks/any-task-id/logs",
+        "/ui/serving/any-model-key",
     ):
         r = c.get(path, allow_redirects=True)
         assert r.status_code == 200
@@ -60,7 +63,7 @@ def test_ui_contains_sidebar_links(tmp_path, monkeypatch):
 
     r = c.get("/ui/tasks")
     assert r.status_code == 200
-    for link in ("/ui/tasks", "/ui/tasks/new", "/ui/data", "/ui/login", "/ui/admin"):
+    for link in ("/ui/tasks", "/ui/tasks/new", "/ui/serving", "/ui/data", "/ui/login", "/ui/admin"):
         assert link in r.text
     assert "Ray Dashboard" in r.text
 
diff --git a/src/mvp/py/tests/test_ui_serving.py b/src/mvp/py/tests/test_ui_serving.py
new file mode 100644
index 0000000..8b022cd
--- /dev/null
+++ b/src/mvp/py/tests/test_ui_serving.py
@@ -0,0 +1,56 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+from fastapi.testclient import TestClient
+
+from argus.service.app import create_app
+
+
+def _write_config(tmp_path: Path) -> Path:
+    p = tmp_path / "cfg.yaml"
+    p.write_text(
+        """
+ray:
+  address: "http://127.0.0.1:8265"
+  shared_root: "/private"
+  entrypoint_num_cpus: 1
+  entrypoint_resources: { worker_node: 1 }
+  runtime_env: { env_vars: { PYTHONUNBUFFERED: "1" } }
+service:
+  api: { host: "127.0.0.1", port: 8080 }
+  auth: { token_env: "MVP_INTERNAL_TOKEN" }
+  sqlite: { db_path: "%(db)s" }
+data:
+  user_root: "%(users)s"
+  sftpgo: { enabled: false }
+  retention: { jobs_trash_after_days: 3, jobs_purge_after_days: 7, janitor_interval_s: 3600 }
+serving: {}
+"""
+        % {"db": str(tmp_path / "mvp.sqlite3"), "users": str(tmp_path / "users")}
+    )
+    return p
+
+
+def test_ui_serving_pages_render(tmp_path, monkeypatch):
+    cfg = _write_config(tmp_path)
+    monkeypatch.setenv("MVP_INTERNAL_TOKEN", "admin-token")
+    app = create_app(str(cfg))
+    c = TestClient(app)
+
+    for path in ("/ui/serving", "/ui/serving/new", "/ui/serving/any-model-key"):
+        r = c.get(path)
+        assert r.status_code == 200
+        assert "<html" in r.text.lower()
+
+
+def test_ui_serving_contains_openai_port_8000(tmp_path, monkeypatch):
+    cfg = _write_config(tmp_path)
+    monkeypatch.setenv("MVP_INTERNAL_TOKEN", "admin-token")
+    app = create_app(str(cfg))
+    c = TestClient(app)
+
+    r = c.get("/ui/serving")
+    assert r.status_code == 200
+    assert "curOriginWithPort(8000)" in r.text
+    assert "/v1/models" in r.text
diff --git a/src/mvp/scripts/01_up.sh b/src/mvp/scripts/01_up.sh
index 6196ecb..bedfe55 100755
--- a/src/mvp/scripts/01_up.sh
+++ b/src/mvp/scripts/01_up.sh
@@ -11,10 +11,11 @@ fi
 
 echo "[host] docker compose up -d (mvp)"
 BUILD="${BUILD:-0}"
+RAY_NODE_IMAGE="${RAY_NODE_IMAGE:-argus/argus-ray-node:vllm011.latest}"
 
 # If the image isn't present locally, force build once.
 if [[ "${BUILD}" != "1" ]]; then
-  if ! docker image inspect argus/argus-ray-node:v2.5 >/dev/null 2>&1; then
+  if ! docker image inspect "${RAY_NODE_IMAGE}" >/dev/null 2>&1; then
     BUILD="1"
   fi
 fi
diff --git a/src/mvp/scripts/debug_serve_llm_smoke.sh b/src/mvp/scripts/debug_serve_llm_smoke.sh
new file mode 100644
index 0000000..2269164
--- /dev/null
+++ b/src/mvp/scripts/debug_serve_llm_smoke.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+container="${MVP_HEAD_CONTAINER:-argus-ray-head}"
+model_source="${MODEL_SOURCE:-}"
+if [[ -n "${1:-}" ]]; then
+  model_source="$1"
+fi
+
+argv=(python3 /workspace/mvp/scripts/serve_llm_smoke.py)
+if [[ -n "${model_source}" ]]; then
+  argv+=(--model-source "${model_source}")
+fi
+argv+=(--accelerator-type "${ARGUS_ACCELERATOR_TYPE:-H20}")
+
+echo "[host] run Ray Serve LLM smoke test in container: ${container}" >&2
+docker exec -it "${container}" bash -lc "$(printf '%q ' "${argv[@]}")"
+
diff --git a/src/mvp/scripts/run_all_v38_serving.sh b/src/mvp/scripts/run_all_v38_serving.sh
new file mode 100755
index 0000000..2c6f172
--- /dev/null
+++ b/src/mvp/scripts/run_all_v38_serving.sh
@@ -0,0 +1,193 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+# shellcheck source=lib.sh
+source "${SCRIPT_DIR}/lib.sh"
+
+API_ADDR="${API_ADDR:-http://127.0.0.1:8080}"
+OPENAI_BASE_URL="${OPENAI_BASE_URL:-http://127.0.0.1:8000/v1}"
+ADMIN_TOKEN="${MVP_INTERNAL_TOKEN:-}"
+USER_ID="${USER_ID:-alice}"
+EXPECTED_RAY_NODES="${EXPECTED_RAY_NODES:-3}" # head + 2 workers
+
+CONFIG_IN_CONTAINER="${CONFIG_IN_CONTAINER:-/workspace/mvp/configs/dev.yaml}"
+SFTPGO_ADMIN_PASSWORD="${SFTPGO_ADMIN_PASSWORD:-my-dev-sftpgo-admin}"
+export SFTPGO_ADMIN_PASSWORD
+
+if [[ -z "${ADMIN_TOKEN}" ]]; then
+  echo "ERROR: MVP_INTERNAL_TOKEN must be set in host env (admin token)" >&2
+  exit 1
+fi
+
+api_curl_admin() {
+  curl -sS -H "Authorization: Bearer ${ADMIN_TOKEN}" "$@"
+}
+
+api_wait_ready() {
+  local tries="${1:-60}"
+  for i in $(seq 1 "${tries}"); do
+    if curl -sS -m 2 "${API_ADDR}/docs" >/dev/null 2>&1; then
+      echo "[host] api_ready: ${API_ADDR}"
+      return 0
+    fi
+    echo "[host] waiting api... (${i}/${tries})"
+    sleep 2
+  done
+  echo "ERROR: api not ready: ${API_ADDR}" >&2
+  return 1
+}
+
+ray_wait_ready() {
+  local tries="${1:-60}"
+  for i in $(seq 1 "${tries}"); do
+    if curl -sS -m 2 "${RAY_DASHBOARD_ADDR}/api/version" >/dev/null 2>&1; then
+      echo "[host] ray_dashboard_ready: ${RAY_DASHBOARD_ADDR}"
+      return 0
+    fi
+    echo "[host] waiting ray dashboard... (${i}/${tries})"
+    sleep 2
+  done
+  echo "ERROR: ray dashboard not ready: ${RAY_DASHBOARD_ADDR}" >&2
+  return 1
+}
+
+ray_wait_nodes() {
+  local want="${1:-3}"
+  local tries="${2:-60}"
+  for i in $(seq 1 "${tries}"); do
+    local out n
+    out="$(docker exec -i "${HEAD_CONTAINER}" python3 -c "import ray; ray.init(address='auto', ignore_reinit_error=True, log_to_driver=False, logging_level='ERROR'); print(sum(1 for n in ray.nodes() if n.get('Alive')))" 2>/dev/null || true)"
+    n="$(printf '%s\n' "${out}" | tail -n 1 | tr -cd '0-9' || true)"
+    if [[ "${n}" =~ ^[0-9]+$ ]]; then
+      echo "[host] ray_nodes_alive=${n} (want>=${want})"
+      if [[ "${n}" -ge "${want}" ]]; then
+        return 0
+      fi
+    else
+      echo "[host] waiting ray nodes... (${i}/${tries})"
+    fi
+    sleep 2
+  done
+  echo "ERROR: ray nodes not ready (want>=${want})" >&2
+  docker exec -i "${HEAD_CONTAINER}" bash -lc "ray status || true" >&2 || true
+  return 1
+}
+
+openai_wait_ready() {
+  local tries="${1:-120}"
+  for i in $(seq 1 "${tries}"); do
+    if curl -sS -m 2 "${OPENAI_BASE_URL}/models" >/dev/null 2>&1; then
+      echo "[host] openai_ready: ${OPENAI_BASE_URL}"
+      return 0
+    fi
+    echo "[host] waiting openai... (${i}/${tries})"
+    sleep 2
+  done
+  echo "ERROR: openai not ready: ${OPENAI_BASE_URL}" >&2
+  return 1
+}
+
+wait_model_state() {
+  local token="$1"
+  local model_key="$2"
+  local want="$3"
+  local tries="${4:-120}"
+  for i in $(seq 1 "${tries}"); do
+    local body state
+    body="$(curl -sS -H "Authorization: Bearer ${token}" "${API_ADDR}/api/v2/serve/models/${model_key}")"
+    state="$(printf '%s' "${body}" | python3 -c 'import sys,json; print(json.load(sys.stdin)["model"]["state"])' 2>/dev/null || true)"
+    echo "[host] model ${model_key}: ${state}"
+    if [[ "${state}" == "${want}" ]]; then
+      return 0
+    fi
+    if [[ "${state}" == "FAILED" ]]; then
+      echo "[host] model failed; detail:" >&2
+      printf '%s\n' "${body}" | python3 -m json.tool >&2 || true
+      return 1
+    fi
+    sleep 2
+  done
+  echo "ERROR: model not in state ${want} after timeout" >&2
+  return 1
+}
+
+echo "[host] ===== run_all_v38_serving.sh begin ====="
+
+"${SCRIPT_DIR}/00_prereq_check.sh"
+"${SCRIPT_DIR}/03_cleanup_v1_legacy.sh"
+"${SCRIPT_DIR}/04_cleanup_v2_legacy.sh"
+
+echo "[host] bring down existing containers (best-effort)"
+"${SCRIPT_DIR}/02_down.sh" || true
+
+echo "[host] (re)create containers (Ray + SFTPGo + W&B)"
+# For v3.8, we need the latest ray-node image (ray[llm] deps). Force build once.
+BUILD="${BUILD:-1}" "${SCRIPT_DIR}/01_up.sh"
+
+echo "[host] wait ray ready"
+ray_wait_ready 60
+ray_wait_nodes "${EXPECTED_RAY_NODES}" 120
+
+echo "[host] prepare data/model (best-effort; uses shared caches)"
+"${SCRIPT_DIR}/30_prepare_data_and_model.sh" || true
+
+echo "[host] start api"
+CONFIG_IN_CONTAINER="${CONFIG_IN_CONTAINER}" MVP_INTERNAL_TOKEN="${ADMIN_TOKEN}" "${SCRIPT_DIR}/60_start_api.sh"
+api_wait_ready 60
+
+echo "[host] create user (idempotent)"
+api_curl_admin -X POST "${API_ADDR}/api/v2/users" -H "Content-Type: application/json" --data-binary "{\"user_id\":\"${USER_ID}\"}" >/dev/null || true
+
+echo "[host] issue user token"
+USER_TOKEN="$(api_curl_admin -X POST "${API_ADDR}/api/v2/users/${USER_ID}/tokens" | python3 -c 'import sys,json; print(json.load(sys.stdin)["token"])')"
+
+echo "[host] resolve local model snapshot path (offline)"
+LOCAL_MODEL_PATH="$(dexec "${HEAD_CONTAINER}" bash -lc "python3 -c \"import os; from huggingface_hub import snapshot_download; os.environ.setdefault('HF_HOME','/private/hf'); print(snapshot_download(repo_id='Qwen/Qwen2.5-0.5B-Instruct', local_files_only=True))\" " | tail -n 1)"
+if [[ -z "${LOCAL_MODEL_PATH}" || "${LOCAL_MODEL_PATH}" != /* ]]; then
+  echo "ERROR: failed to resolve LOCAL_MODEL_PATH: ${LOCAL_MODEL_PATH}" >&2
+  exit 1
+fi
+echo "[host] local_model_path: ${LOCAL_MODEL_PATH}"
+
+echo "[host] submit serving model via API"
+SERVE_SPEC=$'model_id: qwen-0.5b\nmodel_source: '"${LOCAL_MODEL_PATH}"$'\nnum_replicas: 1\ngpus_per_replica: 1\n'
+CREATE_RESP="$(curl -sS -H "Authorization: Bearer ${USER_TOKEN}" -H "Content-Type: application/yaml" --data-binary "${SERVE_SPEC}" "${API_ADDR}/api/v2/serve/models")"
+echo "[host] create_model_resp: ${CREATE_RESP}"
+MODEL_KEY="$(printf '%s' "${CREATE_RESP}" | python3 -c 'import sys,json; print(json.load(sys.stdin)["model_key"])')"
+
+echo "[host] wait model RUNNING"
+wait_model_state "${USER_TOKEN}" "${MODEL_KEY}" "RUNNING" 300
+
+echo "[host] wait OpenAI ingress ready"
+openai_wait_ready 120
+
+echo "[host] verify /v1/models contains model"
+MODEL_ID="$(
+  curl -sS "${OPENAI_BASE_URL}/models" \
+    | python3 -c 'import sys,json; obj=json.load(sys.stdin); print("\n".join([m.get("id","") for m in obj.get("data",[]) if isinstance(m,dict)]))' \
+    | grep -E "^${USER_ID}-[0-9]{12}-qwen-0\\.5b$" \
+    | head -n1 \
+    || true
+)"
+if [[ -z "${MODEL_ID}" ]]; then
+  echo "ERROR: model id not found in /v1/models" >&2
+  curl -sS "${OPENAI_BASE_URL}/models" | python3 -m json.tool >&2 || true
+  exit 1
+fi
+echo "[host] model_id: ${MODEL_ID}"
+
+echo "[host] chat completion (best-effort)"
+CHAT_RESP="$(curl -sS -H "Content-Type: application/json" -H "Authorization: Bearer FAKE_KEY" -X POST "${OPENAI_BASE_URL}/chat/completions" --data-binary "{\"model\":\"${MODEL_ID}\",\"messages\":[{\"role\":\"user\",\"content\":\"hello\"}],\"max_tokens\":16,\"stream\":false}")"
+printf '%s\n' "${CHAT_RESP}" | python3 -m json.tool >/dev/null 2>&1 || {
+  echo "ERROR: invalid chat response" >&2
+  printf '%s\n' "${CHAT_RESP}" >&2
+  exit 1
+}
+echo "[host] chat_ok"
+
+echo "[host] delete model"
+curl -sS -H "Authorization: Bearer ${USER_TOKEN}" -X DELETE "${API_ADDR}/api/v2/serve/models/${MODEL_KEY}" >/dev/null
+wait_model_state "${USER_TOKEN}" "${MODEL_KEY}" "DELETED" 300
+
+echo "[host] ===== run_all_v38_serving.sh done ====="
diff --git a/src/mvp/scripts/serve_llm_smoke.py b/src/mvp/scripts/serve_llm_smoke.py
new file mode 100644
index 0000000..17b0e61
--- /dev/null
+++ b/src/mvp/scripts/serve_llm_smoke.py
@@ -0,0 +1,102 @@
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import time
+import urllib.request
+from pathlib import Path
+from typing import Any
+
+
+def _pick_qwen_snapshot() -> str | None:
+    base = Path("/private/hf/hub/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots")
+    if not base.exists():
+        return None
+    snaps = sorted([p for p in base.iterdir() if p.is_dir()], reverse=True)
+    return str(snaps[0]) if snaps else None
+
+
+def _http_get_json(url: str) -> Any:
+    with urllib.request.urlopen(url, timeout=10) as resp:
+        raw = resp.read().decode("utf-8")
+        return json.loads(raw)
+
+
+def _wait_http_json(url: str, *, timeout_s: int) -> Any:
+    deadline = time.time() + float(timeout_s)
+    last_err: Exception | None = None
+    while time.time() < deadline:
+        try:
+            return _http_get_json(url)
+        except Exception as e:
+            last_err = e
+            time.sleep(2)
+    raise RuntimeError(f"timeout waiting for {url}: {last_err!r}")
+
+
+def main(argv: list[str] | None = None) -> int:
+    ap = argparse.ArgumentParser(description="Ray Serve LLM smoke test (deploy + /v1/models probe).")
+    ap.add_argument("--ray-address", default="auto")
+    ap.add_argument("--http-port", type=int, default=8000)
+    ap.add_argument("--app-name", default="argus_llm_smoke")
+    ap.add_argument("--route-prefix", default="/")
+    ap.add_argument("--accelerator-type", default=os.environ.get("ARGUS_ACCELERATOR_TYPE") or "H20")
+    ap.add_argument("--model-id", default="smoke-qwen-0.5b")
+    ap.add_argument("--model-source", default=None, help="Local path or HF id. Default: cached Qwen snapshot under /private/hf.")
+    ap.add_argument("--tensor-parallel-size", type=int, default=1)
+    ap.add_argument("--num-replicas", type=int, default=1)
+    ap.add_argument("--wait-s", type=int, default=600)
+    args = ap.parse_args(argv)
+
+    model_source = str(args.model_source or _pick_qwen_snapshot() or "")
+    if not model_source:
+        raise SystemExit("missing --model-source and no cached Qwen snapshot found under /private/hf")
+
+    # Force offline HF behavior for the smoke test.
+    os.environ.setdefault("HF_HOME", "/private/hf")
+    os.environ.setdefault("HUGGINGFACE_HUB_CACHE", "/private/hf/hub")
+    os.environ.setdefault("TRANSFORMERS_CACHE", "/private/hf/transformers")
+    os.environ["HF_HUB_OFFLINE"] = "1"
+
+    import ray
+
+    ray.init(address=str(args.ray_address), ignore_reinit_error=True, log_to_driver=False)
+
+    from ray import serve
+
+    try:
+        serve.start(proxy_location="HeadOnly", http_options={"host": "0.0.0.0", "port": int(args.http_port)})
+    except Exception:
+        # Best-effort: Serve may already be running in the container (e.g., started by the MVP API scheduler).
+        pass
+
+    from ray.serve.llm import LLMConfig, build_openai_app
+
+    # Build a config dict and filter by the current Ray's LLMConfig schema, since fields
+    # may differ between Ray versions.
+    cfg_dict: dict[str, Any] = {
+        "model_loading_config": {"model_id": str(args.model_id), "model_source": model_source},
+        "accelerator_type": str(args.accelerator_type),
+        "deployment_config": {"num_replicas": int(args.num_replicas)},
+        "engine_kwargs": {"tensor_parallel_size": int(args.tensor_parallel_size)},
+        "runtime_env": {"env_vars": {"HF_HUB_OFFLINE": "1", "HF_HOME": "/private/hf"}},
+    }
+    allowed = set(getattr(LLMConfig, "model_fields", {}).keys())
+    if allowed:
+        cfg_dict = {k: v for k, v in cfg_dict.items() if k in allowed}
+
+    llm_cfg = LLMConfig(**cfg_dict)
+    app = build_openai_app({"llm_configs": [llm_cfg]})
+
+    serve.run(app, name=str(args.app_name), route_prefix=str(args.route_prefix))
+
+    models_url = f"http://127.0.0.1:{int(args.http_port)}/v1/models"
+    payload = _wait_http_json(models_url, timeout_s=int(args.wait_s))
+    print(json.dumps(payload, indent=2, sort_keys=True))
+
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())