From 686739fea263ba1097c1d42f219bd02b6e19fcd7 Mon Sep 17 00:00:00 2001 From: yuyr Date: Tue, 6 Jan 2026 22:43:29 +0800 Subject: [PATCH] =?UTF-8?q?v3.8=20model=20serving=20=E9=83=A8=E7=BD=B2?= =?UTF-8?q?=E6=88=90=E5=8A=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- specs/mvp/sw_arch.excalidraw | 4755 ++++++++++------- specs/mvp/v3.8/ray_serve.md | 314 ++ specs/mvp/v3.8/ray_serve_llm.md | 87 + specs/mvp/v3.8/requirements.md | 8 + specs/mvp/v3.8/v3.8_api.md | 224 + specs/mvp/v3.8/v3.8_design.md | 371 ++ specs/mvp/v3.8/v3.8_dev_plan.md | 266 + specs/mvp/v3.8/v3.8_progress.md | 48 + src/mvp/README.md | 6 + src/mvp/configs/dev.yaml | 8 + src/mvp/docker-compose.yaml | 16 + src/mvp/images/argus-ray-node/Dockerfile | 9 + .../images/argus-ray-node/argus-head-ray.sh | 3 +- src/mvp/py/argus/core/ids.py | 16 + src/mvp/py/argus/service/app.py | 216 +- src/mvp/py/argus/service/config.py | 38 + src/mvp/py/argus/service/db.py | 273 + src/mvp/py/argus/service/scheduler.py | 22 + src/mvp/py/argus/service/serve_client.py | 45 + src/mvp/py/argus/service/serve_llm_config.py | 63 + .../py/argus/service/serving_reconciler.py | 151 + src/mvp/py/argus/service/serving_spec.py | 144 + src/mvp/py/argus/service/ui.py | 248 + src/mvp/py/tests/test_app_serving_api.py | 282 + src/mvp/py/tests/test_db_serving.py | 79 + src/mvp/py/tests/test_ids.py | 29 + src/mvp/py/tests/test_llm_config_builder.py | 78 + src/mvp/py/tests/test_serve_client.py | 55 + src/mvp/py/tests/test_service_config.py | 22 + .../py/tests/test_serving_model_id_prefix.py | 23 + src/mvp/py/tests/test_serving_reconciler.py | 207 + src/mvp/py/tests/test_serving_spec_paths.py | 47 + .../py/tests/test_serving_spec_validation.py | 72 + src/mvp/py/tests/test_ui.py | 5 +- src/mvp/py/tests/test_ui_serving.py | 56 + src/mvp/scripts/01_up.sh | 3 +- src/mvp/scripts/debug_serve_llm_smoke.sh | 18 + src/mvp/scripts/run_all_v38_serving.sh | 193 + src/mvp/scripts/serve_llm_smoke.py | 102 + 39 files changed, 6772 insertions(+), 1830 deletions(-) create mode 100644 specs/mvp/v3.8/ray_serve.md create mode 100644 specs/mvp/v3.8/ray_serve_llm.md create mode 100644 specs/mvp/v3.8/requirements.md create mode 100644 specs/mvp/v3.8/v3.8_api.md create mode 100644 specs/mvp/v3.8/v3.8_design.md create mode 100644 specs/mvp/v3.8/v3.8_dev_plan.md create mode 100644 specs/mvp/v3.8/v3.8_progress.md create mode 100644 src/mvp/py/argus/service/serve_client.py create mode 100644 src/mvp/py/argus/service/serve_llm_config.py create mode 100644 src/mvp/py/argus/service/serving_reconciler.py create mode 100644 src/mvp/py/argus/service/serving_spec.py create mode 100644 src/mvp/py/tests/test_app_serving_api.py create mode 100644 src/mvp/py/tests/test_db_serving.py create mode 100644 src/mvp/py/tests/test_llm_config_builder.py create mode 100644 src/mvp/py/tests/test_serve_client.py create mode 100644 src/mvp/py/tests/test_serving_model_id_prefix.py create mode 100644 src/mvp/py/tests/test_serving_reconciler.py create mode 100644 src/mvp/py/tests/test_serving_spec_paths.py create mode 100644 src/mvp/py/tests/test_serving_spec_validation.py create mode 100644 src/mvp/py/tests/test_ui_serving.py create mode 100644 src/mvp/scripts/debug_serve_llm_smoke.sh create mode 100755 src/mvp/scripts/run_all_v38_serving.sh create mode 100644 src/mvp/scripts/serve_llm_smoke.py diff --git a/specs/mvp/sw_arch.excalidraw b/specs/mvp/sw_arch.excalidraw index c026691..3a9ba1a 100644 --- a/specs/mvp/sw_arch.excalidraw +++ b/specs/mvp/sw_arch.excalidraw @@ -3331,8 +3331,8 @@ { "id": "hfyGYREhwfOiJ0NmS7nWJ", "type": "rectangle", - "x": 184.78887213940638, - "y": 4494.097308130087, + "x": 201.45553880607312, + "y": 5354.097308130087, "width": 647.3203086953981, "height": 641.2700333334831, "angle": 0, @@ -3350,19 +3350,19 @@ "type": 3 }, "seed": 1547009185, - "version": 1063, - "versionNonce": 2017335811, + "version": 1118, + "versionNonce": 2081816253, "isDeleted": false, "boundElements": [], - "updated": 1767580421422, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "kcnXooRmgv12d40l7DX1I", "type": "rectangle", - "x": 390.4424022703514, - "y": 4674.763964624228, + "x": 407.10906893701815, + "y": 5534.763964624228, "width": 156.66668701171875, "height": 85, "angle": 0, @@ -3378,8 +3378,8 @@ "index": "b0T", "roundness": null, "seed": 142557313, - "version": 781, - "versionNonce": 1578230179, + "version": 836, + "versionNonce": 1304502045, "isDeleted": false, "boundElements": [ { @@ -3395,15 +3395,15 @@ "type": "arrow" } ], - "updated": 1767580421422, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "-kTPtQM7ZjAJg_O72uvP0", "type": "text", - "x": 411.38576927474594, - "y": 4692.263964624228, + "x": 428.0524359414127, + "y": 5552.263964624228, "width": 114.77995300292969, "height": 50, "angle": 0, @@ -3419,11 +3419,11 @@ "index": "b0U", "roundness": null, "seed": 849709153, - "version": 771, - "versionNonce": 1363916099, + "version": 826, + "versionNonce": 300495741, "isDeleted": false, "boundElements": [], - "updated": 1767580421422, + "updated": 1767684541917, "link": null, "locked": false, "text": "ray job tool\n(ray client)", @@ -3439,8 +3439,8 @@ { "id": "syy4HE220r-q_RMUUSsel", "type": "rectangle", - "x": 635.4424022703514, - "y": 4674.263964624228, + "x": 652.1090689370182, + "y": 5534.263964624228, "width": 156.66668701171875, "height": 85, "angle": 0, @@ -3456,8 +3456,8 @@ "index": "b0V", "roundness": null, "seed": 1954010177, - "version": 918, - "versionNonce": 1838907523, + "version": 973, + "versionNonce": 512112701, "isDeleted": false, "boundElements": [ { @@ -3469,15 +3469,15 @@ "type": "arrow" } ], - "updated": 1767580421422, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "CA5CuK3pSeEixp1YwKENQ", "type": "text", - "x": 646.3258022337303, - "y": 4691.763964624228, + "x": 662.9924689003971, + "y": 5551.763964624228, "width": 134.89988708496094, "height": 50, "angle": 0, @@ -3493,11 +3493,11 @@ "index": "b0W", "roundness": null, "seed": 30778401, - "version": 922, - "versionNonce": 1073641507, + "version": 977, + "versionNonce": 2028873885, "isDeleted": false, "boundElements": [], - "updated": 1767580421422, + "updated": 1767684541917, "link": null, "locked": false, "text": "VerlTaskSpec \nyaml", @@ -3513,8 +3513,8 @@ { "id": "BfGeVEKVLbnnxWatX-CuX", "type": "arrow", - "x": 636.7757457762108, - "y": 4712.763964624228, + "x": 653.4424124428775, + "y": 5572.763964624228, "width": 90.33331298828125, "height": 4, "angle": 0, @@ -3532,11 +3532,11 @@ "type": 2 }, "seed": 1722710017, - "version": 2068, - "versionNonce": 243331309, + "version": 2235, + "versionNonce": 245651379, "isDeleted": false, "boundElements": [], - "updated": 1767580422053, + "updated": 1767684541921, "link": null, "locked": false, "points": [ @@ -3552,12 +3552,12 @@ "lastCommittedPoint": null, "startBinding": { "elementId": "syy4HE220r-q_RMUUSsel", - "focus": 0.1611880517201622, + "focus": 0.1611880517201682, "gap": 1.333343505859375 }, "endBinding": { "elementId": "kcnXooRmgv12d40l7DX1I", - "focus": 0.06393742185083832, + "focus": 0.06393742185081888, "gap": 1 }, "startArrowhead": null, @@ -3567,8 +3567,8 @@ { "id": "XwaSYITVD0m2YORWBiATj", "type": "text", - "x": 363.57537092345547, - "y": 4461.430651635947, + "x": 380.2420375901222, + "y": 5321.430651635947, "width": 480.5596923828125, "height": 25, "angle": 0, @@ -3584,11 +3584,11 @@ "index": "b0Y", "roundness": null, "seed": 84004833, - "version": 852, - "versionNonce": 399615747, + "version": 907, + "versionNonce": 1749922237, "isDeleted": false, "boundElements": [], - "updated": 1767580421422, + "updated": 1767684541917, "link": null, "locked": false, "text": "v4.0 observability (prom, grafana, alert, ELK, etc)", @@ -3604,8 +3604,8 @@ { "id": "hO23c7cUKniC0ShSkbYHY", "type": "rectangle", - "x": 576.2663503435255, - "y": 4516.930560083212, + "x": 592.9330170101922, + "y": 5376.930560083212, "width": 210.1763265850289, "height": 55.000000000000014, "angle": 0, @@ -3621,8 +3621,8 @@ "index": "b0Z", "roundness": null, "seed": 1390357441, - "version": 1070, - "versionNonce": 1034669731, + "version": 1125, + "versionNonce": 1586131485, "isDeleted": false, "boundElements": [ { @@ -3630,15 +3630,15 @@ "id": "WvC3BnzbmedjQgc_rcGOG" } ], - "updated": 1767580421422, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "WvC3BnzbmedjQgc_rcGOG", "type": "text", - "x": 628.5045533088916, - "y": 4531.930560083212, + "x": 645.1712199755583, + "y": 5391.930560083212, "width": 105.69992065429688, "height": 25, "angle": 0, @@ -3654,11 +3654,11 @@ "index": "b0a", "roundness": null, "seed": 1483082657, - "version": 1109, - "versionNonce": 1629487683, + "version": 1164, + "versionNonce": 68167293, "isDeleted": false, "boundElements": [], - "updated": 1767580421422, + "updated": 1767684541917, "link": null, "locked": false, "text": "API server", @@ -3674,8 +3674,8 @@ { "id": "QTuaauYBrKurb9Uym-vJ0", "type": "rectangle", - "x": 390.2757457762108, - "y": 4587.930560083212, + "x": 406.9424124428775, + "y": 5447.930560083212, "width": 154.6436510018092, "height": 60, "angle": 0, @@ -3691,8 +3691,8 @@ "index": "b0b", "roundness": null, "seed": 1841688449, - "version": 1300, - "versionNonce": 1527562723, + "version": 1355, + "versionNonce": 1805668061, "isDeleted": false, "boundElements": [ { @@ -3700,15 +3700,15 @@ "id": "wds-TjwVp7NzSUYdwGMzH" } ], - "updated": 1767580421422, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "wds-TjwVp7NzSUYdwGMzH", "type": "text", - "x": 406.4776143069005, - "y": 4592.930560083212, + "x": 423.1442809735672, + "y": 5452.930560083212, "width": 122.23991394042969, "height": 50, "angle": 0, @@ -3724,11 +3724,11 @@ "index": "b0c", "roundness": null, "seed": 960386913, - "version": 1359, - "versionNonce": 1686146435, + "version": 1414, + "versionNonce": 1405905725, "isDeleted": false, "boundElements": [], - "updated": 1767580421422, + "updated": 1767684541917, "link": null, "locked": false, "text": "task\nmanagement ", @@ -3744,8 +3744,8 @@ { "id": "h-InDEDqJ6h9usuQPtO1A", "type": "rectangle", - "x": 574.3832843618806, - "y": 4588.504868395925, + "x": 591.0499510285473, + "y": 5448.504868395925, "width": 207.59447102997498, "height": 60, "angle": 0, @@ -3761,8 +3761,8 @@ "index": "b0d", "roundness": null, "seed": 1830732609, - "version": 1391, - "versionNonce": 280618275, + "version": 1446, + "versionNonce": 754318237, "isDeleted": false, "boundElements": [ { @@ -3770,15 +3770,15 @@ "id": "B1uYGWk1HK8JtizGFaaGC" } ], - "updated": 1767580421422, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "B1uYGWk1HK8JtizGFaaGC", "type": "text", - "x": 590.650589762122, - "y": 4593.504868395925, + "x": 607.3172564287887, + "y": 5453.504868395925, "width": 175.0598602294922, "height": 50, "angle": 0, @@ -3794,11 +3794,11 @@ "index": "b0e", "roundness": null, "seed": 1170649889, - "version": 1481, - "versionNonce": 120015043, + "version": 1536, + "versionNonce": 1598980093, "isDeleted": false, "boundElements": [], - "updated": 1767580421422, + "updated": 1767684541917, "link": null, "locked": false, "text": "node management\n(ssh, ray cluster) ", @@ -3814,8 +3814,8 @@ { "id": "XS4hxHD7hkjqqdIPoHNyM", "type": "rectangle", - "x": 911.7293968820569, - "y": 4726.479066772403, + "x": 928.3960635487236, + "y": 5586.479066772403, "width": 163.1357446724801, "height": 106.45708709635272, "angle": 0, @@ -3831,8 +3831,8 @@ "index": "b0f", "roundness": null, "seed": 1202038529, - "version": 716, - "versionNonce": 2103916643, + "version": 771, + "versionNonce": 1356298333, "isDeleted": false, "boundElements": [ { @@ -3840,15 +3840,15 @@ "id": "ilnIrrhwIwdl5yMP0J_-o" } ], - "updated": 1767580421422, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "ilnIrrhwIwdl5yMP0J_-o", "type": "text", - "x": 942.6473134687852, - "y": 4754.707610320578, + "x": 959.313980135452, + "y": 5614.707610320578, "width": 101.29991149902344, "height": 50, "angle": 0, @@ -3864,11 +3864,11 @@ "index": "b0g", "roundness": null, "seed": 1027284705, - "version": 779, - "versionNonce": 1900557315, + "version": 834, + "versionNonce": 727868605, "isDeleted": false, "boundElements": [], - "updated": 1767580421422, + "updated": 1767684541917, "link": null, "locked": false, "text": "ray worker\nnode", @@ -3884,8 +3884,8 @@ { "id": "qYCyZgffVShRdxYkZZEQC", "type": "rectangle", - "x": 912.5312001672861, - "y": 4533.440830751391, + "x": 929.1978668339528, + "y": 5393.440830751391, "width": 163.1357446724801, "height": 85, "angle": 0, @@ -3901,8 +3901,8 @@ "index": "b0h", "roundness": null, "seed": 1964415681, - "version": 737, - "versionNonce": 583580579, + "version": 792, + "versionNonce": 1653654813, "isDeleted": false, "boundElements": [ { @@ -3910,15 +3910,15 @@ "id": "7cAjOCsL26YdZ9huXHSvG" } ], - "updated": 1767580421422, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "7cAjOCsL26YdZ9huXHSvG", "type": "text", - "x": 943.4491167540144, - "y": 4550.940830751391, + "x": 960.1157834206812, + "y": 5410.940830751391, "width": 101.29991149902344, "height": 50, "angle": 0, @@ -3934,11 +3934,11 @@ "index": "b0i", "roundness": null, "seed": 1071094433, - "version": 803, - "versionNonce": 1310150467, + "version": 858, + "versionNonce": 2043943293, "isDeleted": false, "boundElements": [], - "updated": 1767580421422, + "updated": 1767684541917, "link": null, "locked": false, "text": "ray worker\nnode", @@ -3954,8 +3954,8 @@ { "id": "Ov7Ctp7PvJITFoQy7Yb4E", "type": "rectangle", - "x": 393.0812552422981, - "y": 4514.717667417468, + "x": 409.7479219089648, + "y": 5374.717667417468, "width": 147.4799234681481, "height": 55.000000000000014, "angle": 0, @@ -3971,8 +3971,8 @@ "index": "b0j", "roundness": null, "seed": 774741633, - "version": 1188, - "versionNonce": 630353635, + "version": 1243, + "versionNonce": 174973405, "isDeleted": false, "boundElements": [ { @@ -3980,15 +3980,15 @@ "id": "hkWZM79d15pmmlJnpM5GC" } ], - "updated": 1767580421422, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "hkWZM79d15pmmlJnpM5GC", "type": "text", - "x": 435.59124413701664, - "y": 4529.717667417468, + "x": 452.2579108036834, + "y": 5389.717667417468, "width": 62.45994567871094, "height": 25, "angle": 0, @@ -4004,11 +4004,11 @@ "index": "b0k", "roundness": null, "seed": 249046625, - "version": 1238, - "versionNonce": 197696131, + "version": 1293, + "versionNonce": 158309949, "isDeleted": false, "boundElements": [], - "updated": 1767580421422, + "updated": 1767684541917, "link": null, "locked": false, "text": "WebUI", @@ -4024,8 +4024,8 @@ { "id": "hJ1qbW1rf0n6-WrDss2Ro", "type": "rectangle", - "x": 636.7530370032708, - "y": 4789.813057433881, + "x": 653.4197036699376, + "y": 5649.813057433881, "width": 156.66668701171875, "height": 85, "angle": 0, @@ -4041,8 +4041,8 @@ "index": "b0l", "roundness": null, "seed": 749122113, - "version": 970, - "versionNonce": 225989155, + "version": 1025, + "versionNonce": 1304088221, "isDeleted": false, "boundElements": [ { @@ -4054,15 +4054,15 @@ "type": "arrow" } ], - "updated": 1767580421422, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "w68ybhw27soUMyLsMu-vA", "type": "text", - "x": 641.7964329993646, - "y": 4794.813057433881, + "x": 658.4630996660313, + "y": 5654.813057433881, "width": 146.57989501953125, "height": 75, "angle": 0, @@ -4078,11 +4078,11 @@ "index": "b0m", "roundness": null, "seed": 1282982433, - "version": 984, - "versionNonce": 462427587, + "version": 1039, + "versionNonce": 1442902781, "isDeleted": false, "boundElements": [], - "updated": 1767580421422, + "updated": 1767684541917, "link": null, "locked": false, "text": "Advanced\nVerlTaskSpec \n(code, ckpt, ib)", @@ -4098,8 +4098,8 @@ { "id": "OJ2YvI_4pyTp3tvnog5HM", "type": "arrow", - "x": 636.2892302063543, - "y": 4834.178049948081, + "x": 652.9558968730211, + "y": 5694.178049948081, "width": 80.66214281697523, "height": 84.85848269127973, "angle": 0, @@ -4117,11 +4117,11 @@ "type": 2 }, "seed": 1799214593, - "version": 831, - "versionNonce": 74337101, + "version": 942, + "versionNonce": 574941523, "isDeleted": false, "boundElements": [], - "updated": 1767580422053, + "updated": 1767684541921, "link": null, "locked": false, "points": [ @@ -4137,7 +4137,7 @@ "lastCommittedPoint": null, "startBinding": { "elementId": "hJ1qbW1rf0n6-WrDss2Ro", - "focus": -0.6785882314731579, + "focus": -0.6785882314731589, "gap": 1 }, "endBinding": null, @@ -4148,8 +4148,8 @@ { "id": "ozgVxBsj7snNecNunrp11", "type": "rectangle", - "x": 207.87720669617303, - "y": 4587.835588719463, + "x": 224.54387336283978, + "y": 5447.835588719463, "width": 154.6436510018092, "height": 60, "angle": 0, @@ -4165,8 +4165,8 @@ "index": "b0o", "roundness": null, "seed": 1005940193, - "version": 1362, - "versionNonce": 443066531, + "version": 1417, + "versionNonce": 215226397, "isDeleted": false, "boundElements": [ { @@ -4174,15 +4174,15 @@ "id": "i3wz2t7mbs8b39oPir_Gf" } ], - "updated": 1767580421422, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "i3wz2t7mbs8b39oPir_Gf", "type": "text", - "x": 224.07907522686276, - "y": 4592.835588719463, + "x": 240.7457418935295, + "y": 5452.835588719463, "width": 122.23991394042969, "height": 50, "angle": 0, @@ -4198,11 +4198,11 @@ "index": "b0p", "roundness": null, "seed": 1705970113, - "version": 1430, - "versionNonce": 87137347, + "version": 1485, + "versionNonce": 1177881725, "isDeleted": false, "boundElements": [], - "updated": 1767580421422, + "updated": 1767684541917, "link": null, "locked": false, "text": "user\nmanagement ", @@ -4218,8 +4218,8 @@ { "id": "-6-EuXJhEm2mWx5ISTwFk", "type": "rectangle", - "x": 213.93860311509013, - "y": 4781.420420372215, + "x": 230.60526978175687, + "y": 5641.420420372215, "width": 154.6436510018092, "height": 85, "angle": 0, @@ -4235,8 +4235,8 @@ "index": "b0q", "roundness": null, "seed": 25831841, - "version": 1594, - "versionNonce": 2111511523, + "version": 1649, + "versionNonce": 607679709, "isDeleted": false, "boundElements": [ { @@ -4244,15 +4244,15 @@ "id": "GabFf-T0sdW_Oq8DrJ2I3" } ], - "updated": 1767580421422, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "GabFf-T0sdW_Oq8DrJ2I3", "type": "text", - "x": 234.14047164577983, - "y": 4786.420420372215, + "x": 250.80713831244657, + "y": 5646.420420372215, "width": 114.23991394042969, "height": 75, "angle": 0, @@ -4268,11 +4268,11 @@ "index": "b0r", "roundness": null, "seed": 1488342401, - "version": 1676, - "versionNonce": 276014979, + "version": 1731, + "versionNonce": 1383059773, "isDeleted": false, "boundElements": [], - "updated": 1767580421422, + "updated": 1767684541917, "link": null, "locked": false, "text": "data\nmanagement\nSFTPGo ", @@ -4288,8 +4288,8 @@ { "id": "OkOW_h1bpRwbXvHRE98qO", "type": "rectangle", - "x": 229.40224214693444, - "y": 5023.3894549921515, + "x": 246.06890881360118, + "y": 5883.3894549921515, "width": 154.6436510018092, "height": 60, "angle": 0, @@ -4305,8 +4305,8 @@ "index": "b0s", "roundness": null, "seed": 905236833, - "version": 1466, - "versionNonce": 1127857955, + "version": 1521, + "versionNonce": 238854557, "isDeleted": false, "boundElements": [ { @@ -4314,15 +4314,15 @@ "id": "dr1KmTO-yRI3g5vju74ZK" } ], - "updated": 1767580421422, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "dr1KmTO-yRI3g5vju74ZK", "type": "text", - "x": 251.75410457410862, - "y": 5040.8894549921515, + "x": 268.42077124077537, + "y": 5900.8894549921515, "width": 109.93992614746094, "height": 25, "angle": 0, @@ -4338,11 +4338,11 @@ "index": "b0t", "roundness": null, "seed": 1089862977, - "version": 1546, - "versionNonce": 1352393411, + "version": 1601, + "versionNonce": 1825596925, "isDeleted": false, "boundElements": [], - "updated": 1767580421422, + "updated": 1767684541917, "link": null, "locked": false, "text": "prometheus", @@ -4358,8 +4358,8 @@ { "id": "PwNRlpy3F_V5w-3sfAYAQ", "type": "rectangle", - "x": 427.0943513641362, - "y": 5021.524547851837, + "x": 443.7610180308029, + "y": 5881.524547851837, "width": 154.6436510018092, "height": 60, "angle": 0, @@ -4375,8 +4375,8 @@ "index": "b0u", "roundness": null, "seed": 1448322511, - "version": 1510, - "versionNonce": 557355619, + "version": 1565, + "versionNonce": 1630314077, "isDeleted": false, "boundElements": [ { @@ -4384,15 +4384,15 @@ "id": "kWS_uGtXbrBusArKpVdGc" } ], - "updated": 1767580421422, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "kWS_uGtXbrBusArKpVdGc", "type": "text", - "x": 467.63621623271655, - "y": 5039.024547851837, + "x": 484.3028828993833, + "y": 5899.024547851837, "width": 73.55992126464844, "height": 25, "angle": 0, @@ -4408,11 +4408,11 @@ "index": "b0v", "roundness": null, "seed": 1134302191, - "version": 1599, - "versionNonce": 98455043, + "version": 1654, + "versionNonce": 35834557, "isDeleted": false, "boundElements": [], - "updated": 1767580421422, + "updated": 1767684541917, "link": null, "locked": false, "text": "grafana", @@ -4428,8 +4428,8 @@ { "id": "DHdIrKA2SmPRr2rTtJJ8E", "type": "rectangle", - "x": 940.088203276161, - "y": 4825.921889114552, + "x": 956.7548699428278, + "y": 5685.921889114552, "width": 106.83929336612698, "height": 60, "angle": 0, @@ -4445,8 +4445,8 @@ "index": "b1a", "roundness": null, "seed": 1809008399, - "version": 1585, - "versionNonce": 831231395, + "version": 1640, + "versionNonce": 273967901, "isDeleted": false, "boundElements": [ { @@ -4454,15 +4454,15 @@ "id": "HbJRSZyHsD6NLPzLVoa9C" } ], - "updated": 1767580421422, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "HbJRSZyHsD6NLPzLVoa9C", "type": "text", - "x": 951.4178765095176, - "y": 4843.421889114552, + "x": 968.0845431761843, + "y": 5703.421889114552, "width": 84.17994689941406, "height": 25, "angle": 0, @@ -4478,11 +4478,11 @@ "index": "b1b", "roundness": null, "seed": 1662356783, - "version": 1676, - "versionNonce": 1936648515, + "version": 1731, + "versionNonce": 1009975165, "isDeleted": false, "boundElements": [], - "updated": 1767580421422, + "updated": 1767684541917, "link": null, "locked": false, "text": "exporter", @@ -4498,8 +4498,8 @@ { "id": "SYvgt1uhLv6KU1Snpyha0", "type": "rectangle", - "x": 941.8090593134823, - "y": 4614.817365416434, + "x": 958.475725980149, + "y": 5474.817365416434, "width": 106.83929336612698, "height": 60, "angle": 0, @@ -4515,8 +4515,8 @@ "index": "b1c", "roundness": null, "seed": 1250827695, - "version": 1599, - "versionNonce": 820431075, + "version": 1654, + "versionNonce": 738199517, "isDeleted": false, "boundElements": [ { @@ -4524,15 +4524,15 @@ "id": "zbUHQA7llONcizbRp8kkI" } ], - "updated": 1767580421422, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "zbUHQA7llONcizbRp8kkI", "type": "text", - "x": 953.1387325468388, - "y": 4632.317365416434, + "x": 969.8053992135056, + "y": 5492.317365416434, "width": 84.17994689941406, "height": 25, "angle": 0, @@ -4548,11 +4548,11 @@ "index": "b1d", "roundness": null, "seed": 1568444367, - "version": 1690, - "versionNonce": 528594051, + "version": 1745, + "versionNonce": 1103955005, "isDeleted": false, "boundElements": [], - "updated": 1767580421422, + "updated": 1767684541917, "link": null, "locked": false, "text": "exporter", @@ -4568,8 +4568,8 @@ { "id": "ND0kaXREeIHTrNlSqY34p", "type": "rectangle", - "x": 629.4389274863165, - "y": 5021.045738109877, + "x": 646.1055941529833, + "y": 5881.045738109877, "width": 154.6436510018092, "height": 60, "angle": 0, @@ -4585,8 +4585,8 @@ "index": "b1i", "roundness": null, "seed": 814939215, - "version": 1566, - "versionNonce": 1056844835, + "version": 1621, + "versionNonce": 561738909, "isDeleted": false, "boundElements": [ { @@ -4594,15 +4594,15 @@ "id": "e8-1MnFz0QWrPJXZ-ZnQ3" } ], - "updated": 1767580421422, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "e8-1MnFz0QWrPJXZ-ZnQ3", "type": "text", - "x": 688.1307633631977, - "y": 5038.545738109877, + "x": 704.7974300298645, + "y": 5898.545738109877, "width": 37.259979248046875, "height": 25, "angle": 0, @@ -4618,11 +4618,11 @@ "index": "b1j", "roundness": null, "seed": 2024577647, - "version": 1659, - "versionNonce": 323661763, + "version": 1714, + "versionNonce": 1992087805, "isDeleted": false, "boundElements": [], - "updated": 1767580421422, + "updated": 1767684541917, "link": null, "locked": false, "text": "ELK", @@ -4638,8 +4638,8 @@ { "id": "UOlgJuhEaQm81KRHgyY-0", "type": "rectangle", - "x": 179.07220592595837, - "y": 5244.209019024574, + "x": 195.7388725926251, + "y": 6104.209019024574, "width": 647.3203086953981, "height": 717.4711059065564, "angle": 0, @@ -4657,19 +4657,19 @@ "type": 3 }, "seed": 790237583, - "version": 1166, - "versionNonce": 1146533539, + "version": 1221, + "versionNonce": 2064822621, "isDeleted": false, "boundElements": [], - "updated": 1767580076534, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "ExBkrVpyvo_OXAog8ivWn", "type": "rectangle", - "x": 384.7257360569034, - "y": 5424.875675518715, + "x": 401.3924027235701, + "y": 6284.875675518715, "width": 156.66668701171875, "height": 85, "angle": 0, @@ -4685,8 +4685,8 @@ "index": "b1n", "roundness": null, "seed": 1478428591, - "version": 819, - "versionNonce": 573434435, + "version": 874, + "versionNonce": 1003388349, "isDeleted": false, "boundElements": [ { @@ -4702,15 +4702,15 @@ "type": "arrow" } ], - "updated": 1767580076534, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "hqH-l-CJ15NjmjoZtiq-9", "type": "text", - "x": 405.6691030612979, - "y": 5442.375675518715, + "x": 422.33576972796465, + "y": 6302.375675518715, "width": 114.77995300292969, "height": 50, "angle": 0, @@ -4726,11 +4726,11 @@ "index": "b1o", "roundness": null, "seed": 492845519, - "version": 809, - "versionNonce": 303933923, + "version": 864, + "versionNonce": 538033693, "isDeleted": false, "boundElements": [], - "updated": 1767580076534, + "updated": 1767684541917, "link": null, "locked": false, "text": "ray job tool\n(ray client)", @@ -4746,8 +4746,8 @@ { "id": "kurhiArg2sM7XFjEdTyrI", "type": "rectangle", - "x": 629.7257360569034, - "y": 5424.375675518715, + "x": 646.3924027235702, + "y": 6284.375675518715, "width": 156.66668701171875, "height": 85, "angle": 0, @@ -4763,8 +4763,8 @@ "index": "b1p", "roundness": null, "seed": 603334639, - "version": 958, - "versionNonce": 380685603, + "version": 1013, + "versionNonce": 348429021, "isDeleted": false, "boundElements": [ { @@ -4776,15 +4776,15 @@ "type": "arrow" } ], - "updated": 1767580076534, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "7k75o8I7t4NCGdyrtenGs", "type": "text", - "x": 640.6091360202823, - "y": 5441.875675518715, + "x": 657.2758026869491, + "y": 6301.875675518715, "width": 134.89988708496094, "height": 50, "angle": 0, @@ -4800,11 +4800,11 @@ "index": "b1q", "roundness": null, "seed": 1508463119, - "version": 960, - "versionNonce": 2114007235, + "version": 1015, + "versionNonce": 1787246397, "isDeleted": false, "boundElements": [], - "updated": 1767580076534, + "updated": 1767684541917, "link": null, "locked": false, "text": "VerlTaskSpec \nyaml", @@ -4820,8 +4820,8 @@ { "id": "qO3lO1bGwrfyKuWKhDJ1J", "type": "arrow", - "x": 631.0590795627628, - "y": 5462.875675518715, + "x": 647.7257462294295, + "y": 6322.875675518715, "width": 90.33331298828125, "height": 4, "angle": 0, @@ -4839,11 +4839,11 @@ "type": 2 }, "seed": 1479686191, - "version": 2170, - "versionNonce": 668673901, + "version": 2337, + "versionNonce": 1203957907, "isDeleted": false, "boundElements": [], - "updated": 1767580076859, + "updated": 1767684541922, "link": null, "locked": false, "points": [ @@ -4859,12 +4859,12 @@ "lastCommittedPoint": null, "startBinding": { "elementId": "kurhiArg2sM7XFjEdTyrI", - "focus": 0.16118805172015774, + "focus": 0.1611880517201637, "gap": 1.333343505859375 }, "endBinding": { "elementId": "ExBkrVpyvo_OXAog8ivWn", - "focus": 0.06393742185084282, + "focus": 0.0639374218508417, "gap": 1 }, "startArrowhead": null, @@ -4874,8 +4874,8 @@ { "id": "oZv8__WhJ3c5fkGqXVl2_", "type": "text", - "x": 357.3924352379864, - "y": 5210.609823586392, + "x": 374.0591019046531, + "y": 6070.609823586392, "width": 385.8797302246094, "height": 25, "angle": 0, @@ -4891,11 +4891,11 @@ "index": "b1s", "roundness": null, "seed": 79256143, - "version": 977, - "versionNonce": 571188131, + "version": 1032, + "versionNonce": 259376221, "isDeleted": false, "boundElements": [], - "updated": 1767580076534, + "updated": 1767684541917, "link": null, "locked": false, "text": "v5.0 operability (statistics, sop, agent)", @@ -4911,8 +4911,8 @@ { "id": "WNodxBjaE-CYp8dK1fMQN", "type": "rectangle", - "x": 570.5496841300775, - "y": 5267.042270977699, + "x": 587.2163507967442, + "y": 6127.042270977699, "width": 210.1763265850289, "height": 55.000000000000014, "angle": 0, @@ -4928,8 +4928,8 @@ "index": "b1t", "roundness": null, "seed": 1951873135, - "version": 1108, - "versionNonce": 884815683, + "version": 1163, + "versionNonce": 408367293, "isDeleted": false, "boundElements": [ { @@ -4937,15 +4937,15 @@ "id": "G2DrVh_u1lhQlxAkO611N" } ], - "updated": 1767580076534, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "G2DrVh_u1lhQlxAkO611N", "type": "text", - "x": 622.7878870954435, - "y": 5282.042270977699, + "x": 639.4545537621102, + "y": 6142.042270977699, "width": 105.69992065429688, "height": 25, "angle": 0, @@ -4961,11 +4961,11 @@ "index": "b1u", "roundness": null, "seed": 215241359, - "version": 1149, - "versionNonce": 255547107, + "version": 1204, + "versionNonce": 1863061789, "isDeleted": false, "boundElements": [], - "updated": 1767580076534, + "updated": 1767684541917, "link": null, "locked": false, "text": "API server", @@ -4981,8 +4981,8 @@ { "id": "XxNyZL0flCwhAO9zT3UQy", "type": "rectangle", - "x": 384.55907956276275, - "y": 5338.042270977699, + "x": 401.2257462294295, + "y": 6198.042270977699, "width": 154.6436510018092, "height": 60, "angle": 0, @@ -4998,8 +4998,8 @@ "index": "b1v", "roundness": null, "seed": 1726556335, - "version": 1338, - "versionNonce": 538215043, + "version": 1393, + "versionNonce": 1896211837, "isDeleted": false, "boundElements": [ { @@ -5007,15 +5007,15 @@ "id": "O2dtUuIqb0fs_uv7D29cW" } ], - "updated": 1767580076534, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "O2dtUuIqb0fs_uv7D29cW", "type": "text", - "x": 400.7609480934525, - "y": 5343.042270977699, + "x": 417.42761476011924, + "y": 6203.042270977699, "width": 122.23991394042969, "height": 50, "angle": 0, @@ -5031,11 +5031,11 @@ "index": "b1w", "roundness": null, "seed": 1883126479, - "version": 1399, - "versionNonce": 473635, + "version": 1454, + "versionNonce": 1367748061, "isDeleted": false, "boundElements": [], - "updated": 1767580076534, + "updated": 1767684541917, "link": null, "locked": false, "text": "task\nmanagement ", @@ -5051,8 +5051,8 @@ { "id": "UK3M2EByn3haJdkH9Z21T", "type": "rectangle", - "x": 568.6666181484326, - "y": 5338.616579290413, + "x": 585.3332848150993, + "y": 6198.616579290413, "width": 207.59447102997498, "height": 60, "angle": 0, @@ -5068,8 +5068,8 @@ "index": "b1x", "roundness": null, "seed": 1298925807, - "version": 1429, - "versionNonce": 738328003, + "version": 1484, + "versionNonce": 1604217405, "isDeleted": false, "boundElements": [ { @@ -5077,15 +5077,15 @@ "id": "_erL3NkeChZxbmMXnWyQ8" } ], - "updated": 1767580076534, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "_erL3NkeChZxbmMXnWyQ8", "type": "text", - "x": 584.933923548674, - "y": 5343.616579290413, + "x": 601.6005902153407, + "y": 6203.616579290413, "width": 175.0598602294922, "height": 50, "angle": 0, @@ -5101,11 +5101,11 @@ "index": "b1y", "roundness": null, "seed": 2080051983, - "version": 1519, - "versionNonce": 2133020003, + "version": 1574, + "versionNonce": 204479133, "isDeleted": false, "boundElements": [], - "updated": 1767580076534, + "updated": 1767684541917, "link": null, "locked": false, "text": "node management\n(ssh, ray cluster) ", @@ -5121,8 +5121,8 @@ { "id": "lQT5-sydwGN_vbadu61qE", "type": "rectangle", - "x": 906.7776256001704, - "y": 5472.766443061252, + "x": 923.4442922668371, + "y": 6332.766443061252, "width": 163.1357446724801, "height": 106.45708709635272, "angle": 0, @@ -5138,8 +5138,8 @@ "index": "b1z", "roundness": null, "seed": 878958895, - "version": 760, - "versionNonce": 1824106755, + "version": 815, + "versionNonce": 804044541, "isDeleted": false, "boundElements": [ { @@ -5147,15 +5147,15 @@ "id": "yqwCJHk3GkcnyEzFHSU2z" } ], - "updated": 1767580076534, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "yqwCJHk3GkcnyEzFHSU2z", "type": "text", - "x": 937.6955421868987, - "y": 5500.994986609428, + "x": 954.3622088535654, + "y": 6360.994986609428, "width": 101.29991149902344, "height": 50, "angle": 0, @@ -5171,11 +5171,11 @@ "index": "b20", "roundness": null, "seed": 202300239, - "version": 824, - "versionNonce": 2051248291, + "version": 879, + "versionNonce": 290838365, "isDeleted": false, "boundElements": [], - "updated": 1767580076534, + "updated": 1767684541917, "link": null, "locked": false, "text": "ray worker\nnode", @@ -5191,8 +5191,8 @@ { "id": "n74P3TAm78jOVPnYqFccD", "type": "rectangle", - "x": 906.8145339538381, - "y": 5283.5525416458795, + "x": 923.4812006205049, + "y": 6143.5525416458795, "width": 163.1357446724801, "height": 85, "angle": 0, @@ -5208,8 +5208,8 @@ "index": "b21", "roundness": null, "seed": 1465162095, - "version": 775, - "versionNonce": 2037351491, + "version": 830, + "versionNonce": 717022141, "isDeleted": false, "boundElements": [ { @@ -5217,15 +5217,15 @@ "id": "BTIKScHoPW_m2eS-Ro8GX" } ], - "updated": 1767580076534, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "BTIKScHoPW_m2eS-Ro8GX", "type": "text", - "x": 937.7324505405664, - "y": 5301.0525416458795, + "x": 954.3991172072332, + "y": 6161.0525416458795, "width": 101.29991149902344, "height": 50, "angle": 0, @@ -5241,11 +5241,11 @@ "index": "b22", "roundness": null, "seed": 1266520975, - "version": 841, - "versionNonce": 35390435, + "version": 896, + "versionNonce": 809722909, "isDeleted": false, "boundElements": [], - "updated": 1767580076534, + "updated": 1767684541917, "link": null, "locked": false, "text": "ray worker\nnode", @@ -5261,8 +5261,8 @@ { "id": "L9owxY-Ly_UhJE3U0jLgC", "type": "rectangle", - "x": 387.36458902885005, - "y": 5264.829378311956, + "x": 404.0312556955168, + "y": 6124.829378311956, "width": 147.4799234681481, "height": 55.000000000000014, "angle": 0, @@ -5278,8 +5278,8 @@ "index": "b23", "roundness": null, "seed": 1623345583, - "version": 1226, - "versionNonce": 743935875, + "version": 1281, + "versionNonce": 1440405629, "isDeleted": false, "boundElements": [ { @@ -5287,15 +5287,15 @@ "id": "8S3xYcC5-56RoTln81Vps" } ], - "updated": 1767580076534, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "8S3xYcC5-56RoTln81Vps", "type": "text", - "x": 429.8745779235686, - "y": 5279.829378311956, + "x": 446.54124459023535, + "y": 6139.829378311956, "width": 62.45994567871094, "height": 25, "angle": 0, @@ -5311,11 +5311,11 @@ "index": "b24", "roundness": null, "seed": 1951087567, - "version": 1278, - "versionNonce": 174106403, + "version": 1333, + "versionNonce": 1430569181, "isDeleted": false, "boundElements": [], - "updated": 1767580076534, + "updated": 1767684541917, "link": null, "locked": false, "text": "WebUI", @@ -5331,8 +5331,8 @@ { "id": "r5vmCFPIVwxZGcbV8EtDP", "type": "rectangle", - "x": 631.0363707898229, - "y": 5539.924768328369, + "x": 647.7030374564896, + "y": 6399.924768328369, "width": 156.66668701171875, "height": 85, "angle": 0, @@ -5348,8 +5348,8 @@ "index": "b25", "roundness": null, "seed": 707848687, - "version": 1010, - "versionNonce": 1655032515, + "version": 1065, + "versionNonce": 873635133, "isDeleted": false, "boundElements": [ { @@ -5361,15 +5361,15 @@ "type": "arrow" } ], - "updated": 1767580076534, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "BPGRZQXUil0Bmwl2yz1n9", "type": "text", - "x": 641.9197707532018, - "y": 5557.424768328369, + "x": 658.5864374198685, + "y": 6417.424768328369, "width": 134.89988708496094, "height": 50, "angle": 0, @@ -5385,11 +5385,11 @@ "index": "b26", "roundness": null, "seed": 259773455, - "version": 1023, - "versionNonce": 700807779, + "version": 1078, + "versionNonce": 1235611037, "isDeleted": false, "boundElements": [], - "updated": 1767580076534, + "updated": 1767684541917, "link": null, "locked": false, "text": "Advanced\nVerlTaskSpec ", @@ -5405,8 +5405,8 @@ { "id": "DS3tViVPwVs_FojIMqBSU", "type": "arrow", - "x": 630.5725639929063, - "y": 5584.289760842569, + "x": 647.2392306595731, + "y": 6444.289760842569, "width": 80.66214281697523, "height": 84.85848269127973, "angle": 0, @@ -5424,11 +5424,11 @@ "type": 2 }, "seed": 2082765359, - "version": 1017, - "versionNonce": 2023747629, + "version": 1128, + "versionNonce": 1910335027, "isDeleted": false, "boundElements": [], - "updated": 1767580076859, + "updated": 1767684541922, "link": null, "locked": false, "points": [ @@ -5444,7 +5444,7 @@ "lastCommittedPoint": null, "startBinding": { "elementId": "r5vmCFPIVwxZGcbV8EtDP", - "focus": -0.6785882314731624, + "focus": -0.6785882314731602, "gap": 1 }, "endBinding": null, @@ -5455,8 +5455,8 @@ { "id": "AUK2tr7zEw-7pPSMh0Ric", "type": "rectangle", - "x": 202.16054048272503, - "y": 5337.947299613951, + "x": 218.82720714939177, + "y": 6197.947299613951, "width": 154.6436510018092, "height": 60, "angle": 0, @@ -5472,8 +5472,8 @@ "index": "b28", "roundness": null, "seed": 890461263, - "version": 1400, - "versionNonce": 1564791107, + "version": 1455, + "versionNonce": 100636349, "isDeleted": false, "boundElements": [ { @@ -5481,15 +5481,15 @@ "id": "B-9nH3-XOZNOEeCLd2Fu-" } ], - "updated": 1767580076534, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "B-9nH3-XOZNOEeCLd2Fu-", "type": "text", - "x": 218.36240901341478, - "y": 5342.947299613951, + "x": 235.02907568008152, + "y": 6202.947299613951, "width": 122.23991394042969, "height": 50, "angle": 0, @@ -5505,11 +5505,11 @@ "index": "b29", "roundness": null, "seed": 1310784111, - "version": 1468, - "versionNonce": 935309539, + "version": 1523, + "versionNonce": 381138717, "isDeleted": false, "boundElements": [], - "updated": 1767580076534, + "updated": 1767684541917, "link": null, "locked": false, "text": "user\nmanagement ", @@ -5525,8 +5525,8 @@ { "id": "rfGCTpcFlf1T-8NxoJELc", "type": "rectangle", - "x": 208.2219369016421, - "y": 5531.532131266703, + "x": 224.88860356830884, + "y": 6391.532131266703, "width": 154.6436510018092, "height": 85, "angle": 0, @@ -5542,8 +5542,8 @@ "index": "b2A", "roundness": null, "seed": 1276499087, - "version": 1632, - "versionNonce": 721263747, + "version": 1687, + "versionNonce": 810339197, "isDeleted": false, "boundElements": [ { @@ -5551,15 +5551,15 @@ "id": "12ZSYQo1l8nLuktF_M13y" } ], - "updated": 1767580076534, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "12ZSYQo1l8nLuktF_M13y", "type": "text", - "x": 228.42380543233185, - "y": 5536.532131266703, + "x": 245.0904720989986, + "y": 6396.532131266703, "width": 114.23991394042969, "height": 75, "angle": 0, @@ -5575,11 +5575,11 @@ "index": "b2B", "roundness": null, "seed": 111831727, - "version": 1714, - "versionNonce": 1119687715, + "version": 1769, + "versionNonce": 318788573, "isDeleted": false, "boundElements": [], - "updated": 1767580076534, + "updated": 1767684541917, "link": null, "locked": false, "text": "data\nmanagement\nSFTPGo ", @@ -5595,8 +5595,8 @@ { "id": "kvfUPQgQ5eG_Fxn-tGm59", "type": "rectangle", - "x": 217.25149426722095, - "y": 5786.938428579538, + "x": 233.9181609338877, + "y": 6646.938428579538, "width": 154.6436510018092, "height": 60, "angle": 0, @@ -5612,8 +5612,8 @@ "index": "b2C", "roundness": null, "seed": 1250376911, - "version": 1501, - "versionNonce": 965575715, + "version": 1556, + "versionNonce": 2061750333, "isDeleted": false, "boundElements": [ { @@ -5621,15 +5621,15 @@ "id": "a5mX2A_-o1LoWTFedzdqg" } ], - "updated": 1767580554066, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "a5mX2A_-o1LoWTFedzdqg", "type": "text", - "x": 239.60335669439507, - "y": 5804.438428579538, + "x": 256.2700233610618, + "y": 6664.438428579538, "width": 109.93992614746094, "height": 25, "angle": 0, @@ -5645,11 +5645,11 @@ "index": "b2D", "roundness": null, "seed": 994364143, - "version": 1580, - "versionNonce": 841942979, + "version": 1635, + "versionNonce": 634962077, "isDeleted": false, "boundElements": [], - "updated": 1767580554066, + "updated": 1767684541917, "link": null, "locked": false, "text": "prometheus", @@ -5665,8 +5665,8 @@ { "id": "_M4VtkTCC_G6rAalQU_8m", "type": "rectangle", - "x": 414.9436034844226, - "y": 5785.0735214392225, + "x": 431.61027015108937, + "y": 6645.0735214392225, "width": 154.6436510018092, "height": 60, "angle": 0, @@ -5682,8 +5682,8 @@ "index": "b2E", "roundness": null, "seed": 1969984783, - "version": 1545, - "versionNonce": 253695843, + "version": 1600, + "versionNonce": 428909821, "isDeleted": false, "boundElements": [ { @@ -5691,15 +5691,15 @@ "id": "xQD6qQi-05xLjq0nbHiqd" } ], - "updated": 1767580554066, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "xQD6qQi-05xLjq0nbHiqd", "type": "text", - "x": 455.485468353003, - "y": 5802.5735214392225, + "x": 472.15213501966974, + "y": 6662.5735214392225, "width": 73.55992126464844, "height": 25, "angle": 0, @@ -5715,11 +5715,11 @@ "index": "b2F", "roundness": null, "seed": 1886657327, - "version": 1634, - "versionNonce": 1588343555, + "version": 1689, + "versionNonce": 697105757, "isDeleted": false, "boundElements": [], - "updated": 1767580554066, + "updated": 1767684541917, "link": null, "locked": false, "text": "grafana", @@ -5735,8 +5735,8 @@ { "id": "aCuSAi4oJ_KJdRKAHJ5y_", "type": "rectangle", - "x": 617.2984501087772, - "y": 5783.208443551139, + "x": 633.9651167754439, + "y": 6643.208443551139, "width": 154.6436510018092, "height": 60, "angle": 0, @@ -5752,8 +5752,8 @@ "index": "b2G", "roundness": null, "seed": 549903695, - "version": 1593, - "versionNonce": 1632950947, + "version": 1648, + "versionNonce": 1025460669, "isDeleted": false, "boundElements": [ { @@ -5761,15 +5761,15 @@ "id": "25Rm9RG-UnIb_WWemRDJY" } ], - "updated": 1767580554066, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "25Rm9RG-UnIb_WWemRDJY", "type": "text", - "x": 675.9902859856581, - "y": 5800.708443551139, + "x": 692.6569526523249, + "y": 6660.708443551139, "width": 37.259979248046875, "height": 25, "angle": 0, @@ -5785,11 +5785,11 @@ "index": "b2H", "roundness": null, "seed": 1505516399, - "version": 1702, - "versionNonce": 1137151555, + "version": 1757, + "versionNonce": 1432947229, "isDeleted": false, "boundElements": [], - "updated": 1767580554066, + "updated": 1767684541917, "link": null, "locked": false, "text": "ELK", @@ -5805,8 +5805,8 @@ { "id": "aVk5d_J2lP8Z8cTxcLino", "type": "rectangle", - "x": 392.3807843726648, - "y": 5539.532434700749, + "x": 409.0474510393315, + "y": 6399.532434700749, "width": 156.66668701171875, "height": 85, "angle": 0, @@ -5822,8 +5822,8 @@ "index": "b2I", "roundness": null, "seed": 1872565647, - "version": 1034, - "versionNonce": 686514829, + "version": 1089, + "versionNonce": 1398521469, "isDeleted": false, "boundElements": [ { @@ -5831,15 +5831,15 @@ "id": "-7mLziiSO10QZS0oA7Zmp" } ], - "updated": 1767580076859, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "-7mLziiSO10QZS0oA7Zmp", "type": "text", - "x": 436.5041593116296, - "y": 5557.032434700749, + "x": 453.17082597829636, + "y": 6417.032434700749, "width": 68.41993713378906, "height": 50, "angle": 0, @@ -5855,11 +5855,11 @@ "index": "b2J", "roundness": null, "seed": 430017455, - "version": 1060, - "versionNonce": 907837731, + "version": 1115, + "versionNonce": 2109269725, "isDeleted": false, "boundElements": [], - "updated": 1767580076534, + "updated": 1767684541917, "link": null, "locked": false, "text": "model\nServing", @@ -5875,8 +5875,8 @@ { "id": "Kwettsi3WP-Qkpn71oRV_", "type": "rectangle", - "x": 930.4278663822328, - "y": 5574.9785648802235, + "x": 947.0945330488995, + "y": 6434.9785648802235, "width": 106.83929336612698, "height": 60, "angle": 0, @@ -5892,8 +5892,8 @@ "index": "b2K", "roundness": null, "seed": 334642639, - "version": 1609, - "versionNonce": 668128355, + "version": 1664, + "versionNonce": 1192362813, "isDeleted": false, "boundElements": [ { @@ -5901,15 +5901,15 @@ "id": "Ws50jhW1M-jJ3lnoEAvY2" } ], - "updated": 1767580076534, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "Ws50jhW1M-jJ3lnoEAvY2", "type": "text", - "x": 941.7575396155892, - "y": 5592.4785648802235, + "x": 958.4242062822559, + "y": 6452.4785648802235, "width": 84.17994689941406, "height": 25, "angle": 0, @@ -5925,11 +5925,11 @@ "index": "b2L", "roundness": null, "seed": 27765743, - "version": 1700, - "versionNonce": 65032195, + "version": 1755, + "versionNonce": 1405653917, "isDeleted": false, "boundElements": [], - "updated": 1767580076534, + "updated": 1767684541917, "link": null, "locked": false, "text": "exporter", @@ -5945,8 +5945,8 @@ { "id": "uYm1lMA4TITqp8EHogQDU", "type": "rectangle", - "x": 932.1487224195542, - "y": 5363.874041182105, + "x": 948.815389086221, + "y": 6223.874041182105, "width": 106.83929336612698, "height": 60, "angle": 0, @@ -5962,8 +5962,8 @@ "index": "b2M", "roundness": null, "seed": 248141327, - "version": 1623, - "versionNonce": 1677547427, + "version": 1678, + "versionNonce": 1074665469, "isDeleted": false, "boundElements": [ { @@ -5971,15 +5971,15 @@ "id": "tX9OTModeIVpu_FonBF3E" } ], - "updated": 1767580076534, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "tX9OTModeIVpu_FonBF3E", "type": "text", - "x": 943.4783956529106, - "y": 5381.374041182105, + "x": 960.1450623195774, + "y": 6241.374041182105, "width": 84.17994689941406, "height": 25, "angle": 0, @@ -5995,11 +5995,11 @@ "index": "b2N", "roundness": null, "seed": 1381153839, - "version": 1714, - "versionNonce": 911764291, + "version": 1769, + "versionNonce": 635703389, "isDeleted": false, "boundElements": [], - "updated": 1767580076534, + "updated": 1767684541917, "link": null, "locked": false, "text": "exporter", @@ -6015,8 +6015,8 @@ { "id": "yGh_yLim4aBl8oGqW_oU1", "type": "rectangle", - "x": 222.20371819421865, - "y": 5873.0398650179195, + "x": 238.8703848608854, + "y": 6733.0398650179195, "width": 154.6436510018092, "height": 60, "angle": 0, @@ -6032,8 +6032,8 @@ "index": "b2Q", "roundness": null, "seed": 170223553, - "version": 1511, - "versionNonce": 1904143907, + "version": 1566, + "versionNonce": 1662778557, "isDeleted": false, "boundElements": [ { @@ -6041,15 +6041,15 @@ "id": "lqdb6MLxScVe5ns9aqeeu" } ], - "updated": 1767580076534, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "lqdb6MLxScVe5ns9aqeeu", "type": "text", - "x": 250.96559191289668, - "y": 5890.5398650179195, + "x": 267.6322585795634, + "y": 6750.5398650179195, "width": 97.11990356445312, "height": 25, "angle": 0, @@ -6065,11 +6065,11 @@ "index": "b2R", "roundness": null, "seed": 1132775329, - "version": 1599, - "versionNonce": 2086875587, + "version": 1654, + "versionNonce": 525819165, "isDeleted": false, "boundElements": [], - "updated": 1767580076534, + "updated": 1767684541917, "link": null, "locked": false, "text": "statistics", @@ -6085,8 +6085,8 @@ { "id": "TtGKkGSFi4MbYg3ZfyNiG", "type": "rectangle", - "x": 417.28500640069166, - "y": 5873.0398650179195, + "x": 433.9516730673584, + "y": 6733.0398650179195, "width": 154.6436510018092, "height": 60, "angle": 0, @@ -6102,8 +6102,8 @@ "index": "b2S", "roundness": null, "seed": 62346383, - "version": 1570, - "versionNonce": 356385123, + "version": 1625, + "versionNonce": 302494077, "isDeleted": false, "boundElements": [ { @@ -6111,15 +6111,15 @@ "id": "G7gy-YlXMFxKiO7gmm6tl" } ], - "updated": 1767580076534, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "G7gy-YlXMFxKiO7gmm6tl", "type": "text", - "x": 448.5968526535494, - "y": 5890.5398650179195, + "x": 465.2635193202161, + "y": 6750.5398650179195, "width": 92.01995849609375, "height": 25, "angle": 0, @@ -6135,11 +6135,11 @@ "index": "b2T", "roundness": null, "seed": 1310452399, - "version": 1667, - "versionNonce": 1432214787, + "version": 1722, + "versionNonce": 141457885, "isDeleted": false, "boundElements": [], - "updated": 1767580076534, + "updated": 1767684541917, "link": null, "locked": false, "text": "sop tools", @@ -6155,8 +6155,8 @@ { "id": "JldxRrrtgsQXtVb-OKmEY", "type": "rectangle", - "x": 619.5583295451758, - "y": 5873.039865017919, + "x": 636.2249962118425, + "y": 6733.039865017919, "width": 154.6436510018092, "height": 60, "angle": 0, @@ -6172,8 +6172,8 @@ "index": "b2U", "roundness": null, "seed": 1283509455, - "version": 1606, - "versionNonce": 675366051, + "version": 1661, + "versionNonce": 731723325, "isDeleted": false, "boundElements": [ { @@ -6181,15 +6181,15 @@ "id": "YbDIjM6HTsYLldkdyo8ed" } ], - "updated": 1767580076534, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "YbDIjM6HTsYLldkdyo8ed", "type": "text", - "x": 669.4101767135609, - "y": 5890.539865017919, + "x": 686.0768433802276, + "y": 6750.539865017919, "width": 54.93995666503906, "height": 25, "angle": 0, @@ -6205,11 +6205,11 @@ "index": "b2V", "roundness": null, "seed": 1737251567, - "version": 1709, - "versionNonce": 93763651, + "version": 1764, + "versionNonce": 108357277, "isDeleted": false, "boundElements": [], - "updated": 1767580076534, + "updated": 1767684541917, "link": null, "locked": false, "text": "agent", @@ -6435,8 +6435,8 @@ { "id": "Z88ttfOpQUtGXPUZOTNLh", "type": "rectangle", - "x": 1085.3820455873556, - "y": 4565.045103668226, + "x": 1102.0487122540223, + "y": 5425.045103668226, "width": 78.84661364258261, "height": 239.8912269868912, "angle": 0, @@ -6452,8 +6452,8 @@ "index": "b2l", "roundness": null, "seed": 995417473, - "version": 1395, - "versionNonce": 512081763, + "version": 1450, + "versionNonce": 1269336829, "isDeleted": false, "boundElements": [ { @@ -6461,15 +6461,15 @@ "id": "JUS2JvDrkx5GXQjxkI78l" } ], - "updated": 1767580421422, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "JUS2JvDrkx5GXQjxkI78l", "type": "text", - "x": 1097.1953746864792, - "y": 4672.4907171616715, + "x": 1113.862041353146, + "y": 5532.4907171616715, "width": 55.21995544433594, "height": 25, "angle": 0, @@ -6485,11 +6485,11 @@ "index": "b2m", "roundness": null, "seed": 314541409, - "version": 1411, - "versionNonce": 452844291, + "version": 1466, + "versionNonce": 1538794333, "isDeleted": false, "boundElements": [], - "updated": 1767580421422, + "updated": 1767684541917, "link": null, "locked": false, "text": "GPFS", @@ -6505,8 +6505,8 @@ { "id": "py2gWjP2fsxBZN-9ttfBh", "type": "rectangle", - "x": 1080.7158973597411, - "y": 5309.476392482113, + "x": 1097.3825640264079, + "y": 6169.476392482113, "width": 78.84661364258261, "height": 239.8912269868912, "angle": 0, @@ -6522,8 +6522,8 @@ "index": "b2p", "roundness": null, "seed": 105076673, - "version": 1344, - "versionNonce": 854339171, + "version": 1399, + "versionNonce": 183873469, "isDeleted": false, "boundElements": [ { @@ -6531,15 +6531,15 @@ "id": "5tfhe4hwG92sJ3lHY77pG" } ], - "updated": 1767580076534, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "5tfhe4hwG92sJ3lHY77pG", "type": "text", - "x": 1092.5292264588647, - "y": 5416.922005975558, + "x": 1109.1958931255315, + "y": 6276.922005975558, "width": 55.21995544433594, "height": 25, "angle": 0, @@ -6555,11 +6555,11 @@ "index": "b2q", "roundness": null, "seed": 1717876641, - "version": 1361, - "versionNonce": 1455319555, + "version": 1416, + "versionNonce": 662876189, "isDeleted": false, "boundElements": [], - "updated": 1767580076534, + "updated": 1767684541917, "link": null, "locked": false, "text": "GPFS", @@ -6640,7 +6640,7 @@ "version": 123, "versionNonce": 2087641069, "isDeleted": false, - "boundElements": null, + "boundElements": [], "updated": 1766652208471, "link": null, "locked": false, @@ -6677,7 +6677,7 @@ "version": 147, "versionNonce": 1921843565, "isDeleted": false, - "boundElements": null, + "boundElements": [], "updated": 1766652209287, "link": null, "locked": false, @@ -6764,7 +6764,7 @@ "version": 70, "versionNonce": 1883440109, "isDeleted": false, - "boundElements": null, + "boundElements": [], "updated": 1766653783354, "link": null, "locked": false, @@ -6839,7 +6839,7 @@ "version": 84, "versionNonce": 1858333453, "isDeleted": false, - "boundElements": null, + "boundElements": [], "updated": 1766652208471, "link": null, "locked": false, @@ -6906,7 +6906,7 @@ "version": 31, "versionNonce": 712375885, "isDeleted": false, - "boundElements": null, + "boundElements": [], "updated": 1766653783355, "link": null, "locked": false, @@ -6958,7 +6958,7 @@ "version": 44, "versionNonce": 306885709, "isDeleted": false, - "boundElements": null, + "boundElements": [], "updated": 1766652192424, "link": null, "locked": false, @@ -8088,1107 +8088,6 @@ "autoResize": true, "lineHeight": 1.25 }, - { - "id": "qg8jFPFtp50OAJT2ab_YO", - "type": "rectangle", - "x": 187.2240309153068, - "y": 3838.839430576088, - "width": 647.3203086953981, - "height": 541.9127073088301, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b3U", - "roundness": { - "type": 3 - }, - "seed": 253245411, - "version": 1077, - "versionNonce": 1966178979, - "isDeleted": false, - "boundElements": [], - "updated": 1767580421422, - "link": null, - "locked": false - }, - { - "id": "ej_5uPkV3roLWgrNmRVQJ", - "type": "rectangle", - "x": 392.8775610462518, - "y": 4019.5060870702287, - "width": 156.66668701171875, - "height": 85, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b3V", - "roundness": null, - "seed": 866993027, - "version": 826, - "versionNonce": 97465923, - "isDeleted": false, - "boundElements": [ - { - "type": "text", - "id": "5J21ZHZTHpGu78H0oBGne" - }, - { - "id": "dCW4muSHBOjk2yt4VP-8M", - "type": "arrow" - }, - { - "id": "mfel3yZDZK2O4BoiB9bfp", - "type": "arrow" - } - ], - "updated": 1767580421422, - "link": null, - "locked": false - }, - { - "id": "5J21ZHZTHpGu78H0oBGne", - "type": "text", - "x": 413.82092805064633, - "y": 4037.0060870702287, - "width": 114.77995300292969, - "height": 50, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b3W", - "roundness": null, - "seed": 81779491, - "version": 817, - "versionNonce": 196441571, - "isDeleted": false, - "boundElements": [], - "updated": 1767580421422, - "link": null, - "locked": false, - "text": "ray job tool\n(ray client)", - "fontSize": 20, - "fontFamily": 5, - "textAlign": "center", - "verticalAlign": "middle", - "containerId": "ej_5uPkV3roLWgrNmRVQJ", - "originalText": "ray job tool\n(ray client)", - "autoResize": true, - "lineHeight": 1.25 - }, - { - "id": "UJvTvQ4khDdrdYsxDaKt0", - "type": "rectangle", - "x": 637.8775610462517, - "y": 4019.0060870702287, - "width": 156.66668701171875, - "height": 85, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b3X", - "roundness": null, - "seed": 790527683, - "version": 963, - "versionNonce": 1352432835, - "isDeleted": false, - "boundElements": [ - { - "type": "text", - "id": "0hvq7uN4KgQ1e0xT7DKVG" - }, - { - "id": "dCW4muSHBOjk2yt4VP-8M", - "type": "arrow" - } - ], - "updated": 1767580421422, - "link": null, - "locked": false - }, - { - "id": "0hvq7uN4KgQ1e0xT7DKVG", - "type": "text", - "x": 648.7609610096306, - "y": 4036.5060870702287, - "width": 134.89988708496094, - "height": 50, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b3Y", - "roundness": null, - "seed": 1292349027, - "version": 967, - "versionNonce": 1242950755, - "isDeleted": false, - "boundElements": [], - "updated": 1767580421422, - "link": null, - "locked": false, - "text": "VerlTaskSpec \nyaml", - "fontSize": 20, - "fontFamily": 5, - "textAlign": "center", - "verticalAlign": "middle", - "containerId": "UJvTvQ4khDdrdYsxDaKt0", - "originalText": "VerlTaskSpec \nyaml", - "autoResize": true, - "lineHeight": 1.25 - }, - { - "id": "dCW4muSHBOjk2yt4VP-8M", - "type": "arrow", - "x": 639.2109045521111, - "y": 4057.5060870702287, - "width": 90.33331298828125, - "height": 4, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b3Z", - "roundness": { - "type": 2 - }, - "seed": 560589315, - "version": 2197, - "versionNonce": 803445773, - "isDeleted": false, - "boundElements": [], - "updated": 1767580422054, - "link": null, - "locked": false, - "points": [ - [ - 0, - 0 - ], - [ - -90.33331298828125, - 4 - ] - ], - "lastCommittedPoint": null, - "startBinding": { - "elementId": "UJvTvQ4khDdrdYsxDaKt0", - "focus": 0.16118805172016018, - "gap": 1.333343505859375 - }, - "endBinding": { - "elementId": "ej_5uPkV3roLWgrNmRVQJ", - "focus": 0.06393742185082936, - "gap": 1 - }, - "startArrowhead": null, - "endArrowhead": "arrow", - "elbowed": false - }, - { - "id": "oSLr5BRtyqrLyySPfeO9j", - "type": "text", - "x": 225.14633005688438, - "y": 3790.8170038464273, - "width": 476.19970703125, - "height": 25, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b3a", - "roundness": null, - "seed": 289649059, - "version": 1167, - "versionNonce": 626310957, - "isDeleted": false, - "boundElements": [], - "updated": 1767580447801, - "link": null, - "locked": false, - "text": "v3.8 IB & Roce support for multi node training ", - "fontSize": 20, - "fontFamily": 5, - "textAlign": "left", - "verticalAlign": "top", - "containerId": null, - "originalText": "v3.8 IB & Roce support for multi node training ", - "autoResize": true, - "lineHeight": 1.25 - }, - { - "id": "ia_9sG-dZPebY0HaRl5mG", - "type": "rectangle", - "x": 578.7015091194257, - "y": 3861.672682529213, - "width": 210.1763265850289, - "height": 55.000000000000014, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b3b", - "roundness": null, - "seed": 1573094723, - "version": 1115, - "versionNonce": 1531388643, - "isDeleted": false, - "boundElements": [ - { - "type": "text", - "id": "LsfI3OipT-Y5Jx7jpcccf" - } - ], - "updated": 1767580421422, - "link": null, - "locked": false - }, - { - "id": "LsfI3OipT-Y5Jx7jpcccf", - "type": "text", - "x": 630.9397120847918, - "y": 3876.672682529213, - "width": 105.69992065429688, - "height": 25, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b3c", - "roundness": null, - "seed": 508028131, - "version": 1155, - "versionNonce": 281464451, - "isDeleted": false, - "boundElements": [], - "updated": 1767580421422, - "link": null, - "locked": false, - "text": "API server", - "fontSize": 20, - "fontFamily": 5, - "textAlign": "center", - "verticalAlign": "middle", - "containerId": "ia_9sG-dZPebY0HaRl5mG", - "originalText": "API server", - "autoResize": true, - "lineHeight": 1.25 - }, - { - "id": "xrABjxBFNPVtYSJ6ZPiSh", - "type": "rectangle", - "x": 392.7109045521112, - "y": 3932.672682529213, - "width": 154.6436510018092, - "height": 60, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b3d", - "roundness": null, - "seed": 2053066883, - "version": 1345, - "versionNonce": 1016071715, - "isDeleted": false, - "boundElements": [ - { - "type": "text", - "id": "-Kr5CotillYBlIuQsHPGW" - } - ], - "updated": 1767580421422, - "link": null, - "locked": false - }, - { - "id": "-Kr5CotillYBlIuQsHPGW", - "type": "text", - "x": 408.9127730828009, - "y": 3937.672682529213, - "width": 122.23991394042969, - "height": 50, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b3e", - "roundness": null, - "seed": 600328227, - "version": 1404, - "versionNonce": 1972455875, - "isDeleted": false, - "boundElements": [], - "updated": 1767580421422, - "link": null, - "locked": false, - "text": "task\nmanagement ", - "fontSize": 20, - "fontFamily": 5, - "textAlign": "center", - "verticalAlign": "middle", - "containerId": "xrABjxBFNPVtYSJ6ZPiSh", - "originalText": "task management ", - "autoResize": true, - "lineHeight": 1.25 - }, - { - "id": "Shkqjthowq7IUHYuPuzXr", - "type": "rectangle", - "x": 576.8184431377808, - "y": 3933.2469908419275, - "width": 207.59447102997498, - "height": 60, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b3f", - "roundness": null, - "seed": 275860419, - "version": 1436, - "versionNonce": 1894324579, - "isDeleted": false, - "boundElements": [ - { - "type": "text", - "id": "NUOv1Sok4Fz4O-Jom4drR" - } - ], - "updated": 1767580421422, - "link": null, - "locked": false - }, - { - "id": "NUOv1Sok4Fz4O-Jom4drR", - "type": "text", - "x": 593.0857485380222, - "y": 3938.2469908419275, - "width": 175.0598602294922, - "height": 50, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b3g", - "roundness": null, - "seed": 1954077539, - "version": 1526, - "versionNonce": 1004926211, - "isDeleted": false, - "boundElements": [], - "updated": 1767580421422, - "link": null, - "locked": false, - "text": "node management\n(ssh, ray cluster) ", - "fontSize": 20, - "fontFamily": 5, - "textAlign": "center", - "verticalAlign": "middle", - "containerId": "Shkqjthowq7IUHYuPuzXr", - "originalText": "node management\n(ssh, ray cluster) ", - "autoResize": true, - "lineHeight": 1.25 - }, - { - "id": "rdU6p3LbZpLNIPDuhEU6z", - "type": "rectangle", - "x": 913.0172482736575, - "y": 4014.620714934976, - "width": 163.1357446724801, - "height": 106.45708709635272, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b3h", - "roundness": null, - "seed": 2083354371, - "version": 733, - "versionNonce": 906386595, - "isDeleted": false, - "boundElements": [ - { - "type": "text", - "id": "fyUCxFxmJICAq48idEvPj" - } - ], - "updated": 1767580421422, - "link": null, - "locked": false - }, - { - "id": "fyUCxFxmJICAq48idEvPj", - "type": "text", - "x": 943.9351648603858, - "y": 4042.8492584831524, - "width": 101.29991149902344, - "height": 50, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b3i", - "roundness": null, - "seed": 1175297699, - "version": 795, - "versionNonce": 1257820227, - "isDeleted": false, - "boundElements": [], - "updated": 1767580421422, - "link": null, - "locked": false, - "text": "ray worker\nnode", - "fontSize": 20, - "fontFamily": 5, - "textAlign": "center", - "verticalAlign": "middle", - "containerId": "rdU6p3LbZpLNIPDuhEU6z", - "originalText": "ray worker node", - "autoResize": true, - "lineHeight": 1.25 - }, - { - "id": "cl1dzmeCwwz4SXuZrad2a", - "type": "rectangle", - "x": 914.9663589431866, - "y": 3878.182953197394, - "width": 163.1357446724801, - "height": 85, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b3j", - "roundness": null, - "seed": 482857539, - "version": 782, - "versionNonce": 1822150627, - "isDeleted": false, - "boundElements": [ - { - "type": "text", - "id": "fPAvr_57exrKFQWCnW2mb" - } - ], - "updated": 1767580421422, - "link": null, - "locked": false - }, - { - "id": "fPAvr_57exrKFQWCnW2mb", - "type": "text", - "x": 945.8842755299149, - "y": 3895.682953197394, - "width": 101.29991149902344, - "height": 50, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b3k", - "roundness": null, - "seed": 1899663843, - "version": 848, - "versionNonce": 1291705219, - "isDeleted": false, - "boundElements": [], - "updated": 1767580421422, - "link": null, - "locked": false, - "text": "ray worker\nnode", - "fontSize": 20, - "fontFamily": 5, - "textAlign": "center", - "verticalAlign": "middle", - "containerId": "cl1dzmeCwwz4SXuZrad2a", - "originalText": "ray worker node", - "autoResize": true, - "lineHeight": 1.25 - }, - { - "id": "ReF6vnI-dB3lXIMd4AvGs", - "type": "rectangle", - "x": 395.5164140181985, - "y": 3859.4597898634706, - "width": 147.4799234681481, - "height": 55.000000000000014, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b3l", - "roundness": null, - "seed": 343170435, - "version": 1233, - "versionNonce": 1482519331, - "isDeleted": false, - "boundElements": [ - { - "type": "text", - "id": "5tqRv0HXQ949u10Kw2eiU" - } - ], - "updated": 1767580421422, - "link": null, - "locked": false - }, - { - "id": "5tqRv0HXQ949u10Kw2eiU", - "type": "text", - "x": 438.02640291291704, - "y": 3874.4597898634706, - "width": 62.45994567871094, - "height": 25, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b3m", - "roundness": null, - "seed": 833860899, - "version": 1282, - "versionNonce": 1864963779, - "isDeleted": false, - "boundElements": [], - "updated": 1767580421422, - "link": null, - "locked": false, - "text": "WebUI", - "fontSize": 20, - "fontFamily": 5, - "textAlign": "center", - "verticalAlign": "middle", - "containerId": "ReF6vnI-dB3lXIMd4AvGs", - "originalText": "WebUI", - "autoResize": true, - "lineHeight": 1.25 - }, - { - "id": "QFS8OSVEFsg_hE5ngbZ3q", - "type": "rectangle", - "x": 639.1881957791711, - "y": 4134.555179879884, - "width": 156.66668701171875, - "height": 85, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b3n", - "roundness": null, - "seed": 140974275, - "version": 1015, - "versionNonce": 1610565219, - "isDeleted": false, - "boundElements": [ - { - "type": "text", - "id": "vd21IfNtOA5knKWzVf_sk" - }, - { - "id": "mfel3yZDZK2O4BoiB9bfp", - "type": "arrow" - } - ], - "updated": 1767580421422, - "link": null, - "locked": false - }, - { - "id": "vd21IfNtOA5knKWzVf_sk", - "type": "text", - "x": 650.07159574255, - "y": 4152.055179879884, - "width": 134.89988708496094, - "height": 50, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b3o", - "roundness": null, - "seed": 1714227299, - "version": 1067, - "versionNonce": 565084675, - "isDeleted": false, - "boundElements": [], - "updated": 1767580421422, - "link": null, - "locked": false, - "text": "Advanced\nVerlTaskSpec ", - "fontSize": 20, - "fontFamily": 5, - "textAlign": "center", - "verticalAlign": "middle", - "containerId": "QFS8OSVEFsg_hE5ngbZ3q", - "originalText": "Advanced VerlTaskSpec ", - "autoResize": true, - "lineHeight": 1.25 - }, - { - "id": "mfel3yZDZK2O4BoiB9bfp", - "type": "arrow", - "x": 638.7243889822546, - "y": 4178.920172394084, - "width": 80.66214281697523, - "height": 84.85848269127973, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "#ffc9c9", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b3p", - "roundness": { - "type": 2 - }, - "seed": 1624507395, - "version": 1044, - "versionNonce": 1833680077, - "isDeleted": false, - "boundElements": [], - "updated": 1767580422054, - "link": null, - "locked": false, - "points": [ - [ - 0, - 0 - ], - [ - -80.66214281697523, - -84.85848269127973 - ] - ], - "lastCommittedPoint": null, - "startBinding": { - "elementId": "QFS8OSVEFsg_hE5ngbZ3q", - "focus": -0.6785882314731545, - "gap": 1 - }, - "endBinding": { - "elementId": "ej_5uPkV3roLWgrNmRVQJ", - "focus": -0.47485998984048416, - "gap": 8.517998107308813 - }, - "startArrowhead": null, - "endArrowhead": "arrow", - "elbowed": false - }, - { - "id": "aWTI7cnR7QatcqJUTZEkH", - "type": "rectangle", - "x": 210.31236547207345, - "y": 3932.5777111654647, - "width": 154.6436510018092, - "height": 60, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b3q", - "roundness": null, - "seed": 1348111267, - "version": 1407, - "versionNonce": 213089507, - "isDeleted": false, - "boundElements": [ - { - "type": "text", - "id": "NDAe6kDHeZy_G-F24BbSA" - } - ], - "updated": 1767580421422, - "link": null, - "locked": false - }, - { - "id": "NDAe6kDHeZy_G-F24BbSA", - "type": "text", - "x": 226.5142340027632, - "y": 3937.5777111654647, - "width": 122.23991394042969, - "height": 50, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b3r", - "roundness": null, - "seed": 149888835, - "version": 1475, - "versionNonce": 1201137795, - "isDeleted": false, - "boundElements": [], - "updated": 1767580421422, - "link": null, - "locked": false, - "text": "user\nmanagement ", - "fontSize": 20, - "fontFamily": 5, - "textAlign": "center", - "verticalAlign": "middle", - "containerId": "aWTI7cnR7QatcqJUTZEkH", - "originalText": "user management ", - "autoResize": true, - "lineHeight": 1.25 - }, - { - "id": "6DIoCPbBjmqXhdWQiF-Ds", - "type": "rectangle", - "x": 216.37376189099052, - "y": 4126.162542818218, - "width": 154.6436510018092, - "height": 85, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b3s", - "roundness": null, - "seed": 1425030883, - "version": 1639, - "versionNonce": 1848500259, - "isDeleted": false, - "boundElements": [ - { - "type": "text", - "id": "ZMAp89RSyrH2uBqKYd6xX" - } - ], - "updated": 1767580421422, - "link": null, - "locked": false - }, - { - "id": "ZMAp89RSyrH2uBqKYd6xX", - "type": "text", - "x": 236.57563042168027, - "y": 4131.162542818218, - "width": 114.23991394042969, - "height": 75, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b3t", - "roundness": null, - "seed": 787245699, - "version": 1721, - "versionNonce": 711189443, - "isDeleted": false, - "boundElements": [], - "updated": 1767580421422, - "link": null, - "locked": false, - "text": "data\nmanagement\nSFTPGo ", - "fontSize": 20, - "fontFamily": 5, - "textAlign": "center", - "verticalAlign": "middle", - "containerId": "6DIoCPbBjmqXhdWQiF-Ds", - "originalText": "data management\nSFTPGo ", - "autoResize": true, - "lineHeight": 1.25 - }, - { - "id": "i9kRvyS1BKdIOzkDPwoS0", - "type": "rectangle", - "x": 1088.0908401287593, - "y": 3900.9122913284605, - "width": 78.84661364258261, - "height": 188.34936741723558, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b3u", - "roundness": null, - "seed": 1000481315, - "version": 1510, - "versionNonce": 1683518307, - "isDeleted": false, - "boundElements": [ - { - "type": "text", - "id": "j1I0PKrtvzLMy6fsxx_6a" - } - ], - "updated": 1767580421422, - "link": null, - "locked": false - }, - { - "id": "j1I0PKrtvzLMy6fsxx_6a", - "type": "text", - "x": 1099.9041692278827, - "y": 3982.5869750370784, - "width": 55.21995544433594, - "height": 25, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b3v", - "roundness": null, - "seed": 67102147, - "version": 1528, - "versionNonce": 2086276867, - "isDeleted": false, - "boundElements": [], - "updated": 1767580421422, - "link": null, - "locked": false, - "text": "GPFS", - "fontSize": 20, - "fontFamily": 5, - "textAlign": "center", - "verticalAlign": "middle", - "containerId": "i9kRvyS1BKdIOzkDPwoS0", - "originalText": "GPFS", - "autoResize": true, - "lineHeight": 1.25 - }, - { - "id": "mSWRJ8RKPV7XfoaSxNjz8", - "type": "rectangle", - "x": 221.28360050567835, - "y": 4249.526425506172, - "width": 239.9955982022902, - "height": 60, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b3w", - "roundness": null, - "seed": 1864643939, - "version": 2128, - "versionNonce": 688984067, - "isDeleted": false, - "boundElements": [ - { - "type": "text", - "id": "39BgAR4RVEZm9MjMi7RPJ" - } - ], - "updated": 1767580500208, - "link": null, - "locked": false - }, - { - "id": "39BgAR4RVEZm9MjMi7RPJ", - "type": "text", - "x": 275.4314545384641, - "y": 4267.026425506172, - "width": 131.69989013671875, - "height": 25, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b3x", - "roundness": null, - "seed": 2086506755, - "version": 2253, - "versionNonce": 622204835, - "isDeleted": false, - "boundElements": [], - "updated": 1767580500209, - "link": null, - "locked": false, - "text": "weight & bias", - "fontSize": 20, - "fontFamily": 5, - "textAlign": "center", - "verticalAlign": "middle", - "containerId": "mSWRJ8RKPV7XfoaSxNjz8", - "originalText": "weight & bias", - "autoResize": true, - "lineHeight": 1.25 - }, { "id": "_dawvRGHc8iIWvtZgQ8my", "type": "rectangle", @@ -9430,7 +8329,7 @@ "type": "text", "x": 193.03751973340792, "y": 3146.4696998450304, - "width": 184.7998809814453, + "width": 707.179443359375, "height": 25, "angle": 0, "strokeColor": "#1e1e1e", @@ -9445,20 +8344,20 @@ "index": "b46", "roundness": null, "seed": 136585027, - "version": 1220, - "versionNonce": 22919299, + "version": 1331, + "versionNonce": 2053451517, "isDeleted": false, "boundElements": [], - "updated": 1767580456184, + "updated": 1767684665253, "link": null, "locked": false, - "text": "v3.7 Model serving ", + "text": "v3.7 Shift from SGLang to VLLM backend, prepare for ray serve function", "fontSize": 20, "fontFamily": 5, "textAlign": "left", "verticalAlign": "top", "containerId": null, - "originalText": "v3.7 Model serving ", + "originalText": "v3.7 Shift from SGLang to VLLM backend, prepare for ray serve function", "autoResize": true, "lineHeight": 1.25 }, @@ -9675,13 +8574,13 @@ { "id": "ztzSxxw4gFHtLebqPFwq_", "type": "rectangle", - "x": 912.9010236820349, - "y": 3386.821291398311, + "x": 911.0491492245695, + "y": 3386.0805280519567, "width": 163.1357446724801, "height": 106.45708709635272, "angle": 0, "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", + "backgroundColor": "#ffc9c9", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", @@ -9692,8 +8591,8 @@ "index": "b4D", "roundness": null, "seed": 1509914787, - "version": 795, - "versionNonce": 1575047299, + "version": 797, + "versionNonce": 274293011, "isDeleted": false, "boundElements": [ { @@ -9701,17 +8600,17 @@ "id": "idEPabW5S1LZD69d85ve1" } ], - "updated": 1767580434170, + "updated": 1767684619945, "link": null, "locked": false }, { "id": "idEPabW5S1LZD69d85ve1", "type": "text", - "x": 943.8189402687632, - "y": 3415.049834946488, - "width": 101.29991149902344, - "height": 50, + "x": 939.5570392610049, + "y": 3401.809071600133, + "width": 106.11996459960938, + "height": 75, "angle": 0, "strokeColor": "#1e1e1e", "backgroundColor": "transparent", @@ -9725,20 +8624,20 @@ "index": "b4E", "roundness": null, "seed": 1231973443, - "version": 858, - "versionNonce": 1191856163, + "version": 863, + "versionNonce": 576524349, "isDeleted": false, "boundElements": [], - "updated": 1767580434170, + "updated": 1767684622696, "link": null, "locked": false, - "text": "ray worker\nnode", + "text": "ray worker\nnode (vllm\nbackend)", "fontSize": 20, "fontFamily": 5, "textAlign": "center", "verticalAlign": "middle", "containerId": "ztzSxxw4gFHtLebqPFwq_", - "originalText": "ray worker node", + "originalText": "ray worker node (vllm backend)", "autoResize": true, "lineHeight": 1.25 }, @@ -9751,7 +8650,7 @@ "height": 85, "angle": 0, "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", + "backgroundColor": "#ffc9c9", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", @@ -9762,8 +8661,8 @@ "index": "b4F", "roundness": null, "seed": 1107160035, - "version": 844, - "versionNonce": 2071153603, + "version": 847, + "versionNonce": 1986361811, "isDeleted": false, "boundElements": [ { @@ -9771,7 +8670,7 @@ "id": "stn0JrExRn0tfK-9wqEh2" } ], - "updated": 1767580434170, + "updated": 1767684615249, "link": null, "locked": false }, @@ -9779,9 +8678,9 @@ "id": "stn0JrExRn0tfK-9wqEh2", "type": "text", "x": 945.7680509382923, - "y": 3267.883529660728, + "y": 3255.383529660728, "width": 101.29991149902344, - "height": 50, + "height": 75, "angle": 0, "strokeColor": "#1e1e1e", "backgroundColor": "transparent", @@ -9795,20 +8694,20 @@ "index": "b4G", "roundness": null, "seed": 1740404611, - "version": 910, - "versionNonce": 792386403, + "version": 943, + "versionNonce": 1538062387, "isDeleted": false, "boundElements": [], - "updated": 1767580434170, + "updated": 1767684615248, "link": null, "locked": false, - "text": "ray worker\nnode", + "text": "ray worker\nnode(vllm\nbackend)", "fontSize": 20, "fontFamily": 5, "textAlign": "center", "verticalAlign": "middle", "containerId": "UhDaYiDTmw9wnsmkDqwgD", - "originalText": "ray worker node", + "originalText": "ray worker node(vllm backend)", "autoResize": true, "lineHeight": 1.25 }, @@ -10293,8 +9192,8 @@ { "id": "GdoROXtlGBxCQBezTYH1E", "type": "rectangle", - "x": 393.437456483182, - "y": 4786.201212157448, + "x": 410.10412314984876, + "y": 5646.201212157448, "width": 156.66668701171875, "height": 85, "angle": 0, @@ -10310,8 +9209,8 @@ "index": "b4U", "roundness": null, "seed": 2087868493, - "version": 1083, - "versionNonce": 1630780301, + "version": 1138, + "versionNonce": 1062249597, "isDeleted": false, "boundElements": [ { @@ -10319,15 +9218,15 @@ "id": "SBSYmCOAYENhuZjDKG2bQ" } ], - "updated": 1767580466531, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "SBSYmCOAYENhuZjDKG2bQ", "type": "text", - "x": 437.56083142214686, - "y": 4803.701212157448, + "x": 454.2274980888136, + "y": 5663.701212157448, "width": 68.41993713378906, "height": 50, "angle": 0, @@ -10343,11 +9242,11 @@ "index": "b4V", "roundness": null, "seed": 846972589, - "version": 1109, - "versionNonce": 397328877, + "version": 1164, + "versionNonce": 639700189, "isDeleted": false, "boundElements": [], - "updated": 1767580466531, + "updated": 1767684541917, "link": null, "locked": false, "text": "model\nServing", @@ -10360,221 +9259,11 @@ "autoResize": true, "lineHeight": 1.25 }, - { - "id": "NUlw66Q9OIa8TIBSh8shW", - "type": "rectangle", - "x": 397.85025827385925, - "y": 4127.595569661166, - "width": 156.66668701171875, - "height": 85, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b4W", - "roundness": null, - "seed": 1603019427, - "version": 1113, - "versionNonce": 319397283, - "isDeleted": false, - "boundElements": [ - { - "type": "text", - "id": "iI-RJL93xebiF9s9i5MaW" - } - ], - "updated": 1767580471663, - "link": null, - "locked": false - }, - { - "id": "iI-RJL93xebiF9s9i5MaW", - "type": "text", - "x": 441.9736332128241, - "y": 4145.095569661166, - "width": 68.41993713378906, - "height": 50, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b4X", - "roundness": null, - "seed": 198563395, - "version": 1139, - "versionNonce": 132903235, - "isDeleted": false, - "boundElements": [], - "updated": 1767580471663, - "link": null, - "locked": false, - "text": "model\nServing", - "fontSize": 20, - "fontFamily": 5, - "textAlign": "center", - "verticalAlign": "middle", - "containerId": "NUlw66Q9OIa8TIBSh8shW", - "originalText": "model\nServing", - "autoResize": true, - "lineHeight": 1.25 - }, - { - "id": "yPC5J4VL6MFR8KYd3TIqn", - "type": "rectangle", - "x": 395.643907878644, - "y": 3497.672886303671, - "width": 156.66668701171875, - "height": 85, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "#ffc9c9", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b4Y", - "roundness": null, - "seed": 1077772589, - "version": 1123, - "versionNonce": 1715199907, - "isDeleted": false, - "boundElements": [ - { - "type": "text", - "id": "aX2wRhDW7R4kZ49ZHsB7X" - } - ], - "updated": 1767580479760, - "link": null, - "locked": false - }, - { - "id": "aX2wRhDW7R4kZ49ZHsB7X", - "type": "text", - "x": 439.7672828176088, - "y": 3515.172886303671, - "width": 68.41993713378906, - "height": 50, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b4Z", - "roundness": null, - "seed": 682546061, - "version": 1148, - "versionNonce": 539578189, - "isDeleted": false, - "boundElements": [], - "updated": 1767580476679, - "link": null, - "locked": false, - "text": "model\nServing", - "fontSize": 20, - "fontFamily": 5, - "textAlign": "center", - "verticalAlign": "middle", - "containerId": "yPC5J4VL6MFR8KYd3TIqn", - "originalText": "model\nServing", - "autoResize": true, - "lineHeight": 1.25 - }, - { - "id": "sfgomunt3ThMckJ6XKU_q", - "type": "rectangle", - "x": 488.5689978967893, - "y": 4249.311580728391, - "width": 299.56796787532073, - "height": 60, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "#ffc9c9", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b4a", - "roundness": null, - "seed": 512539075, - "version": 2215, - "versionNonce": 190733965, - "isDeleted": false, - "boundElements": [ - { - "type": "text", - "id": "WeNR6EN-asrq8s5DPDrqu" - } - ], - "updated": 1767580516436, - "link": null, - "locked": false - }, - { - "id": "WeNR6EN-asrq8s5DPDrqu", - "type": "text", - "x": 541.7930376816176, - "y": 4266.811580728391, - "width": 193.11988830566406, - "height": 25, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b4b", - "roundness": null, - "seed": 1224116579, - "version": 2358, - "versionNonce": 2100077421, - "isDeleted": false, - "boundElements": [], - "updated": 1767580513277, - "link": null, - "locked": false, - "text": "IB & RoCE support", - "fontSize": 20, - "fontFamily": 5, - "textAlign": "center", - "verticalAlign": "middle", - "containerId": "sfgomunt3ThMckJ6XKU_q", - "originalText": "IB & RoCE support", - "autoResize": true, - "lineHeight": 1.25 - }, { "id": "MFBSa2ubaUD9ofO7oFDAX", "type": "rectangle", - "x": 219.23352789007174, - "y": 4915.747023497185, + "x": 235.90019455673848, + "y": 5775.747023497185, "width": 239.9955982022902, "height": 60, "angle": 0, @@ -10590,8 +9279,8 @@ "index": "b4c", "roundness": null, "seed": 1230068963, - "version": 2172, - "versionNonce": 1968459331, + "version": 2227, + "versionNonce": 998598973, "isDeleted": false, "boundElements": [ { @@ -10599,15 +9288,15 @@ "id": "L9ANH6aHqTr18kVeHpkD7" } ], - "updated": 1767580528071, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "L9ANH6aHqTr18kVeHpkD7", "type": "text", - "x": 273.3813819228575, - "y": 4933.247023497185, + "x": 290.0480485895242, + "y": 5793.247023497185, "width": 131.69989013671875, "height": 25, "angle": 0, @@ -10623,11 +9312,11 @@ "index": "b4d", "roundness": null, "seed": 1970915459, - "version": 2297, - "versionNonce": 2060946915, + "version": 2352, + "versionNonce": 1893677469, "isDeleted": false, "boundElements": [], - "updated": 1767580528071, + "updated": 1767684541917, "link": null, "locked": false, "text": "weight & bias", @@ -10643,8 +9332,8 @@ { "id": "lfA_f7LdHHZOlVJcdyc3d", "type": "rectangle", - "x": 486.51892528118265, - "y": 4915.532178719405, + "x": 503.1855919478494, + "y": 5775.532178719405, "width": 299.56796787532073, "height": 60, "angle": 0, @@ -10660,8 +9349,8 @@ "index": "b4e", "roundness": null, "seed": 1154727971, - "version": 2260, - "versionNonce": 1418667853, + "version": 2315, + "versionNonce": 257949181, "isDeleted": false, "boundElements": [ { @@ -10669,15 +9358,15 @@ "id": "u1iQgDeEBo4G9WP1xSZyQ" } ], - "updated": 1767580531532, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "u1iQgDeEBo4G9WP1xSZyQ", "type": "text", - "x": 539.742965066011, - "y": 4933.032178719405, + "x": 556.4096317326778, + "y": 5793.032178719405, "width": 193.11988830566406, "height": 25, "angle": 0, @@ -10693,11 +9382,11 @@ "index": "b4f", "roundness": null, "seed": 1900921795, - "version": 2402, - "versionNonce": 2101457187, + "version": 2457, + "versionNonce": 1040791133, "isDeleted": false, "boundElements": [], - "updated": 1767580528071, + "updated": 1767684541917, "link": null, "locked": false, "text": "IB & RoCE support", @@ -10713,8 +9402,8 @@ { "id": "d6HJM5niCdXm__tjACWnG", "type": "rectangle", - "x": 202.68559692521774, - "y": 5681.362225665228, + "x": 219.35226359188448, + "y": 6541.362225665228, "width": 239.9955982022902, "height": 60, "angle": 0, @@ -10730,8 +9419,8 @@ "index": "b4g", "roundness": null, "seed": 2084012493, - "version": 2298, - "versionNonce": 29748483, + "version": 2353, + "versionNonce": 1202479805, "isDeleted": false, "boundElements": [ { @@ -10739,15 +9428,15 @@ "id": "Qqo4oxKBaxNHFRuFrl-Mx" } ], - "updated": 1767580559782, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "Qqo4oxKBaxNHFRuFrl-Mx", "type": "text", - "x": 256.8334509580035, - "y": 5698.862225665228, + "x": 273.5001176246702, + "y": 6558.862225665228, "width": 131.69989013671875, "height": 25, "angle": 0, @@ -10763,11 +9452,11 @@ "index": "b4h", "roundness": null, "seed": 585089069, - "version": 2423, - "versionNonce": 757552291, + "version": 2478, + "versionNonce": 552272669, "isDeleted": false, "boundElements": [], - "updated": 1767580559782, + "updated": 1767684541917, "link": null, "locked": false, "text": "weight & bias", @@ -10783,8 +9472,8 @@ { "id": "WAAd0XFKUqYZrZJzHBml5", "type": "rectangle", - "x": 469.97099431632864, - "y": 5681.147380887448, + "x": 486.6376609829954, + "y": 6541.147380887448, "width": 299.56796787532073, "height": 60, "angle": 0, @@ -10800,8 +9489,8 @@ "index": "b4i", "roundness": null, "seed": 447820429, - "version": 2386, - "versionNonce": 591054915, + "version": 2441, + "versionNonce": 775329661, "isDeleted": false, "boundElements": [ { @@ -10809,15 +9498,15 @@ "id": "JtiTV8-vNxkwt8Ymi12EQ" } ], - "updated": 1767580559782, + "updated": 1767684541917, "link": null, "locked": false }, { "id": "JtiTV8-vNxkwt8Ymi12EQ", "type": "text", - "x": 523.195034101157, - "y": 5698.647380887448, + "x": 539.8617007678238, + "y": 6558.647380887448, "width": 193.11988830566406, "height": 25, "angle": 0, @@ -10833,11 +9522,11 @@ "index": "b4j", "roundness": null, "seed": 1348819181, - "version": 2528, - "versionNonce": 1507772387, + "version": 2583, + "versionNonce": 1238409181, "isDeleted": false, "boundElements": [], - "updated": 1767580559782, + "updated": 1767684541917, "link": null, "locked": false, "text": "IB & RoCE support", @@ -10849,6 +9538,2422 @@ "originalText": "IB & RoCE support", "autoResize": true, "lineHeight": 1.25 + }, + { + "id": "heTk23zJWeUvAArJU1npN", + "type": "rectangle", + "x": 222.18039794698217, + "y": 4622.016351777231, + "width": 647.3203086953981, + "height": 541.9127073088301, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b4k", + "roundness": { + "type": 3 + }, + "seed": 1217405693, + "version": 1114, + "versionNonce": 651415293, + "isDeleted": false, + "boundElements": [], + "updated": 1767684554854, + "link": null, + "locked": false + }, + { + "id": "dNJs8EC_o9ivMRSpRAiyN", + "type": "rectangle", + "x": 427.8339280779272, + "y": 4802.683008271371, + "width": 156.66668701171875, + "height": 85, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b4l", + "roundness": null, + "seed": 132954973, + "version": 863, + "versionNonce": 953870173, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "PAzLrJPYsg3e4DH5pekhu" + }, + { + "id": "CPCfaq7jaWOpUL6QoV_AW", + "type": "arrow" + }, + { + "id": "sZd_dP_CcDzIaAB3aKJFv", + "type": "arrow" + } + ], + "updated": 1767684554854, + "link": null, + "locked": false + }, + { + "id": "PAzLrJPYsg3e4DH5pekhu", + "type": "text", + "x": 448.77729508232176, + "y": 4820.183008271371, + "width": 114.77995300292969, + "height": 50, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b4m", + "roundness": null, + "seed": 1960265661, + "version": 854, + "versionNonce": 1059058621, + "isDeleted": false, + "boundElements": [], + "updated": 1767684554854, + "link": null, + "locked": false, + "text": "ray job tool\n(ray client)", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "dNJs8EC_o9ivMRSpRAiyN", + "originalText": "ray job tool\n(ray client)", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "XtP3-kgEREwW-vJ3J9Bb2", + "type": "rectangle", + "x": 672.833928077927, + "y": 4802.183008271371, + "width": 156.66668701171875, + "height": 85, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b4n", + "roundness": null, + "seed": 292636701, + "version": 1000, + "versionNonce": 1237438685, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "rVXMMkuJQM04yTMLDy42R" + }, + { + "id": "CPCfaq7jaWOpUL6QoV_AW", + "type": "arrow" + } + ], + "updated": 1767684554854, + "link": null, + "locked": false + }, + { + "id": "rVXMMkuJQM04yTMLDy42R", + "type": "text", + "x": 683.7173280413059, + "y": 4819.683008271371, + "width": 134.89988708496094, + "height": 50, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b4o", + "roundness": null, + "seed": 531381373, + "version": 1004, + "versionNonce": 913157437, + "isDeleted": false, + "boundElements": [], + "updated": 1767684554854, + "link": null, + "locked": false, + "text": "VerlTaskSpec \nyaml", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "XtP3-kgEREwW-vJ3J9Bb2", + "originalText": "VerlTaskSpec \nyaml", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "CPCfaq7jaWOpUL6QoV_AW", + "type": "arrow", + "x": 674.1672715837864, + "y": 4840.683008271371, + "width": 90.33331298828125, + "height": 4, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b4p", + "roundness": { + "type": 2 + }, + "seed": 659137757, + "version": 2304, + "versionNonce": 379611827, + "isDeleted": false, + "boundElements": [], + "updated": 1767684554974, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -90.33331298828125, + 4 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "XtP3-kgEREwW-vJ3J9Bb2", + "focus": 0.1611880517201581, + "gap": 1.333343505859375 + }, + "endBinding": { + "elementId": "dNJs8EC_o9ivMRSpRAiyN", + "focus": 0.06393742185083161, + "gap": 1 + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "qCvbOZiLw9alEjLTQG-6n", + "type": "text", + "x": 260.10269708855975, + "y": 4573.993925047569, + "width": 476.19970703125, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b4q", + "roundness": null, + "seed": 1004208445, + "version": 1204, + "versionNonce": 2032663133, + "isDeleted": false, + "boundElements": [], + "updated": 1767684554854, + "link": null, + "locked": false, + "text": "v3.8 IB & Roce support for multi node training ", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "v3.8 IB & Roce support for multi node training ", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "h6MaiZUa5w9nYowJZhKBM", + "type": "rectangle", + "x": 613.657876151101, + "y": 4644.849603730356, + "width": 210.1763265850289, + "height": 55.000000000000014, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b4r", + "roundness": null, + "seed": 46256541, + "version": 1152, + "versionNonce": 978495165, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "q8TUwsZ-OkjHwr0CSSxxi" + } + ], + "updated": 1767684554854, + "link": null, + "locked": false + }, + { + "id": "q8TUwsZ-OkjHwr0CSSxxi", + "type": "text", + "x": 665.896079116467, + "y": 4659.849603730356, + "width": 105.69992065429688, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b4s", + "roundness": null, + "seed": 2033161725, + "version": 1193, + "versionNonce": 1848166173, + "isDeleted": false, + "boundElements": [], + "updated": 1767684554854, + "link": null, + "locked": false, + "text": "API server", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "h6MaiZUa5w9nYowJZhKBM", + "originalText": "API server", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "LqmUk3bhOWMYlt25NtSMl", + "type": "rectangle", + "x": 427.6672715837866, + "y": 4715.849603730356, + "width": 154.6436510018092, + "height": 60, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b4t", + "roundness": null, + "seed": 2049298013, + "version": 1382, + "versionNonce": 387701629, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "XvT8JfmI8TNOVB-15YovG" + } + ], + "updated": 1767684554854, + "link": null, + "locked": false + }, + { + "id": "XvT8JfmI8TNOVB-15YovG", + "type": "text", + "x": 443.86914011447635, + "y": 4720.849603730356, + "width": 122.23991394042969, + "height": 50, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b4u", + "roundness": null, + "seed": 74840765, + "version": 1442, + "versionNonce": 1835885533, + "isDeleted": false, + "boundElements": [], + "updated": 1767684554854, + "link": null, + "locked": false, + "text": "task\nmanagement ", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "LqmUk3bhOWMYlt25NtSMl", + "originalText": "task management ", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "7EhI0Iv9n5QBOT2mbAzRu", + "type": "rectangle", + "x": 611.7748101694561, + "y": 4716.42391204307, + "width": 207.59447102997498, + "height": 60, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b4v", + "roundness": null, + "seed": 1086429981, + "version": 1473, + "versionNonce": 623829053, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "uMtMs3zVaipTL39V1z9qO" + } + ], + "updated": 1767684554854, + "link": null, + "locked": false + }, + { + "id": "uMtMs3zVaipTL39V1z9qO", + "type": "text", + "x": 628.0421155696976, + "y": 4721.42391204307, + "width": 175.0598602294922, + "height": 50, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b4w", + "roundness": null, + "seed": 1199189885, + "version": 1563, + "versionNonce": 1950470301, + "isDeleted": false, + "boundElements": [], + "updated": 1767684554854, + "link": null, + "locked": false, + "text": "node management\n(ssh, ray cluster) ", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "7EhI0Iv9n5QBOT2mbAzRu", + "originalText": "node management\n(ssh, ray cluster) ", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "xbLezhTmbaiuNOowcP5-Q", + "type": "rectangle", + "x": 947.9736153053328, + "y": 4797.797636136118, + "width": 163.1357446724801, + "height": 106.45708709635272, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b4x", + "roundness": null, + "seed": 652294109, + "version": 770, + "versionNonce": 1482142973, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "cnKFukZFqDoDP63iIUm9_" + } + ], + "updated": 1767684554854, + "link": null, + "locked": false + }, + { + "id": "cnKFukZFqDoDP63iIUm9_", + "type": "text", + "x": 978.8915318920612, + "y": 4826.026179684295, + "width": 101.29991149902344, + "height": 50, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b4y", + "roundness": null, + "seed": 688403517, + "version": 833, + "versionNonce": 485625181, + "isDeleted": false, + "boundElements": [], + "updated": 1767684554854, + "link": null, + "locked": false, + "text": "ray worker\nnode", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "xbLezhTmbaiuNOowcP5-Q", + "originalText": "ray worker node", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "RZW6ujHgIQvMdNW8cevGx", + "type": "rectangle", + "x": 949.9227259748619, + "y": 4661.359874398537, + "width": 163.1357446724801, + "height": 85, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b4z", + "roundness": null, + "seed": 1506845853, + "version": 819, + "versionNonce": 560093629, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "5AFbh75wn3qwB-QijQWzD" + } + ], + "updated": 1767684554854, + "link": null, + "locked": false + }, + { + "id": "5AFbh75wn3qwB-QijQWzD", + "type": "text", + "x": 980.8406425615902, + "y": 4678.859874398537, + "width": 101.29991149902344, + "height": 50, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b50", + "roundness": null, + "seed": 1290537213, + "version": 885, + "versionNonce": 1590303261, + "isDeleted": false, + "boundElements": [], + "updated": 1767684554854, + "link": null, + "locked": false, + "text": "ray worker\nnode", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "RZW6ujHgIQvMdNW8cevGx", + "originalText": "ray worker node", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "FtfYzzfZJog6J-3gXZioA", + "type": "rectangle", + "x": 430.4727810498738, + "y": 4642.636711064613, + "width": 147.4799234681481, + "height": 55.000000000000014, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b51", + "roundness": null, + "seed": 601612637, + "version": 1270, + "versionNonce": 773508733, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "cOCve3brssvurWdwd9Nth" + } + ], + "updated": 1767684554854, + "link": null, + "locked": false + }, + { + "id": "cOCve3brssvurWdwd9Nth", + "type": "text", + "x": 472.98276994459235, + "y": 4657.636711064613, + "width": 62.45994567871094, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b52", + "roundness": null, + "seed": 1408347581, + "version": 1320, + "versionNonce": 1310062301, + "isDeleted": false, + "boundElements": [], + "updated": 1767684554854, + "link": null, + "locked": false, + "text": "WebUI", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "FtfYzzfZJog6J-3gXZioA", + "originalText": "WebUI", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "ASyFxd3DW54qxF8tKwBg_", + "type": "rectangle", + "x": 674.1445628108464, + "y": 4917.7321010810265, + "width": 156.66668701171875, + "height": 85, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b53", + "roundness": null, + "seed": 743370269, + "version": 1052, + "versionNonce": 692917053, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "UCfmqG8sQLg78t_qD4wwa" + }, + { + "id": "sZd_dP_CcDzIaAB3aKJFv", + "type": "arrow" + } + ], + "updated": 1767684554854, + "link": null, + "locked": false + }, + { + "id": "UCfmqG8sQLg78t_qD4wwa", + "type": "text", + "x": 685.0279627742253, + "y": 4935.2321010810265, + "width": 134.89988708496094, + "height": 50, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b54", + "roundness": null, + "seed": 468001405, + "version": 1104, + "versionNonce": 1545708445, + "isDeleted": false, + "boundElements": [], + "updated": 1767684554854, + "link": null, + "locked": false, + "text": "Advanced\nVerlTaskSpec ", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "ASyFxd3DW54qxF8tKwBg_", + "originalText": "Advanced VerlTaskSpec ", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "sZd_dP_CcDzIaAB3aKJFv", + "type": "arrow", + "x": 673.6807560139299, + "y": 4962.097093595226, + "width": 80.66214281697523, + "height": 84.85848269127973, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffc9c9", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b55", + "roundness": { + "type": 2 + }, + "seed": 930466525, + "version": 1151, + "versionNonce": 863345139, + "isDeleted": false, + "boundElements": [], + "updated": 1767684554975, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -80.66214281697523, + -84.85848269127973 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "ASyFxd3DW54qxF8tKwBg_", + "focus": -0.6785882314731602, + "gap": 1 + }, + "endBinding": { + "elementId": "Hs9wFmEEbdIpA9vJXphjV", + "focus": 0, + "gap": 14 + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "dtRUpIgFf9hybiVPPWsVE", + "type": "rectangle", + "x": 245.26873250374882, + "y": 4715.7546323666065, + "width": 154.6436510018092, + "height": 60, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b56", + "roundness": null, + "seed": 2140301117, + "version": 1444, + "versionNonce": 122621117, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "EQCb1M_dJna0DGVu3DOzf" + } + ], + "updated": 1767684554854, + "link": null, + "locked": false + }, + { + "id": "EQCb1M_dJna0DGVu3DOzf", + "type": "text", + "x": 261.4706010344386, + "y": 4720.7546323666065, + "width": 122.23991394042969, + "height": 50, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b57", + "roundness": null, + "seed": 1721530269, + "version": 1512, + "versionNonce": 1789422877, + "isDeleted": false, + "boundElements": [], + "updated": 1767684554854, + "link": null, + "locked": false, + "text": "user\nmanagement ", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "dtRUpIgFf9hybiVPPWsVE", + "originalText": "user management ", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "8WUBjDJs7nLl0Nbm4mEYI", + "type": "rectangle", + "x": 251.3301289226659, + "y": 4909.33946401936, + "width": 154.6436510018092, + "height": 85, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b58", + "roundness": null, + "seed": 382041085, + "version": 1676, + "versionNonce": 1891025277, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "wTlq9A4HXC9t4SBYr6VzG" + } + ], + "updated": 1767684554854, + "link": null, + "locked": false + }, + { + "id": "wTlq9A4HXC9t4SBYr6VzG", + "type": "text", + "x": 271.53199745335564, + "y": 4914.33946401936, + "width": 114.23991394042969, + "height": 75, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b59", + "roundness": null, + "seed": 2014245981, + "version": 1758, + "versionNonce": 1589348829, + "isDeleted": false, + "boundElements": [], + "updated": 1767684554854, + "link": null, + "locked": false, + "text": "data\nmanagement\nSFTPGo ", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "8WUBjDJs7nLl0Nbm4mEYI", + "originalText": "data management\nSFTPGo ", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "eaMsiShOv4_vndnhERhg1", + "type": "rectangle", + "x": 1123.0472071604347, + "y": 4684.089212529603, + "width": 78.84661364258261, + "height": 188.34936741723558, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5A", + "roundness": null, + "seed": 1984585917, + "version": 1547, + "versionNonce": 1439188541, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "IV3hZPShTMVrFQj4jqOr8" + } + ], + "updated": 1767684554854, + "link": null, + "locked": false + }, + { + "id": "IV3hZPShTMVrFQj4jqOr8", + "type": "text", + "x": 1134.860536259558, + "y": 4765.763896238221, + "width": 55.21995544433594, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5B", + "roundness": null, + "seed": 119095581, + "version": 1566, + "versionNonce": 1488877213, + "isDeleted": false, + "boundElements": [], + "updated": 1767684554854, + "link": null, + "locked": false, + "text": "GPFS", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "eaMsiShOv4_vndnhERhg1", + "originalText": "GPFS", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "xROBVjCQGEk7huVn90qgK", + "type": "rectangle", + "x": 256.2399675373537, + "y": 5032.703346707314, + "width": 239.9955982022902, + "height": 60, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5C", + "roundness": null, + "seed": 1877808509, + "version": 2165, + "versionNonce": 307206909, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "qCqMkkJlJPTEM1aPwAgMA" + } + ], + "updated": 1767684554854, + "link": null, + "locked": false + }, + { + "id": "qCqMkkJlJPTEM1aPwAgMA", + "type": "text", + "x": 310.38782157013947, + "y": 5050.203346707314, + "width": 131.69989013671875, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5D", + "roundness": null, + "seed": 1275551197, + "version": 2290, + "versionNonce": 1526543197, + "isDeleted": false, + "boundElements": [], + "updated": 1767684554854, + "link": null, + "locked": false, + "text": "weight & bias", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "xROBVjCQGEk7huVn90qgK", + "originalText": "weight & bias", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "Hs9wFmEEbdIpA9vJXphjV", + "type": "rectangle", + "x": 432.80662530553457, + "y": 4910.772490862308, + "width": 156.66668701171875, + "height": 85, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5E", + "roundness": null, + "seed": 1383701053, + "version": 1151, + "versionNonce": 406053779, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "nrpfg4A7-jJ2tNBa4ebUP" + }, + { + "id": "sZd_dP_CcDzIaAB3aKJFv", + "type": "arrow" + } + ], + "updated": 1767684554975, + "link": null, + "locked": false + }, + { + "id": "nrpfg4A7-jJ2tNBa4ebUP", + "type": "text", + "x": 476.9300002444994, + "y": 4928.272490862308, + "width": 68.41993713378906, + "height": 50, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5F", + "roundness": null, + "seed": 1545947805, + "version": 1176, + "versionNonce": 1153679389, + "isDeleted": false, + "boundElements": [], + "updated": 1767684554854, + "link": null, + "locked": false, + "text": "model\nServing", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "Hs9wFmEEbdIpA9vJXphjV", + "originalText": "model\nServing", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "BoeIvlBhUQlPBlOZpHR08", + "type": "rectangle", + "x": 523.5253649284647, + "y": 5032.488501929534, + "width": 299.56796787532073, + "height": 60, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffc9c9", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5G", + "roundness": null, + "seed": 700182269, + "version": 2252, + "versionNonce": 837792893, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "b1wGV0cEUxZSUWjHVW5bB" + } + ], + "updated": 1767684554854, + "link": null, + "locked": false + }, + { + "id": "b1wGV0cEUxZSUWjHVW5bB", + "type": "text", + "x": 576.749404713293, + "y": 5049.988501929534, + "width": 193.11988830566406, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5H", + "roundness": null, + "seed": 1774126941, + "version": 2396, + "versionNonce": 308157661, + "isDeleted": false, + "boundElements": [], + "updated": 1767684554854, + "link": null, + "locked": false, + "text": "IB & RoCE support", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "BoeIvlBhUQlPBlOZpHR08", + "originalText": "IB & RoCE support", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "MjLfupuIEyHsJXHTOkWGm", + "type": "rectangle", + "x": 211.51373636657854, + "y": 3915.623564307773, + "width": 647.3203086953981, + "height": 541.9127073088301, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5I", + "roundness": { + "type": 3 + }, + "seed": 71288691, + "version": 1196, + "versionNonce": 959699283, + "isDeleted": false, + "boundElements": [], + "updated": 1767684575234, + "link": null, + "locked": false + }, + { + "id": "Gf0CtJAjOH9kfDFhCUE1Z", + "type": "rectangle", + "x": 417.16726649752354, + "y": 4096.290220801913, + "width": 156.66668701171875, + "height": 85, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5J", + "roundness": null, + "seed": 2052874515, + "version": 945, + "versionNonce": 2125674227, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "20DYWEmDFmwM_oJVjOqay" + }, + { + "id": "Gs0St6vy7AzWropCInu6q", + "type": "arrow" + }, + { + "id": "zN_q98Ryj-3lovwWckFiO", + "type": "arrow" + } + ], + "updated": 1767684575234, + "link": null, + "locked": false + }, + { + "id": "20DYWEmDFmwM_oJVjOqay", + "type": "text", + "x": 438.1106335019181, + "y": 4113.790220801913, + "width": 114.77995300292969, + "height": 50, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5K", + "roundness": null, + "seed": 1626654387, + "version": 936, + "versionNonce": 1135992979, + "isDeleted": false, + "boundElements": [], + "updated": 1767684575234, + "link": null, + "locked": false, + "text": "ray job tool\n(ray client)", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "Gf0CtJAjOH9kfDFhCUE1Z", + "originalText": "ray job tool\n(ray client)", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "PvoN0jKnOWq1xXn2lroWv", + "type": "rectangle", + "x": 662.1672664975235, + "y": 4095.7902208019136, + "width": 156.66668701171875, + "height": 85, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5L", + "roundness": null, + "seed": 1320122451, + "version": 1082, + "versionNonce": 1692575091, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "ntCCl1Q6fx9M0P9RABpHn" + }, + { + "id": "Gs0St6vy7AzWropCInu6q", + "type": "arrow" + } + ], + "updated": 1767684575234, + "link": null, + "locked": false + }, + { + "id": "ntCCl1Q6fx9M0P9RABpHn", + "type": "text", + "x": 673.0506664609024, + "y": 4113.290220801913, + "width": 134.89988708496094, + "height": 50, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5M", + "roundness": null, + "seed": 1097004531, + "version": 1086, + "versionNonce": 1056145171, + "isDeleted": false, + "boundElements": [], + "updated": 1767684575234, + "link": null, + "locked": false, + "text": "VerlTaskSpec \nyaml", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "PvoN0jKnOWq1xXn2lroWv", + "originalText": "VerlTaskSpec \nyaml", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "Gs0St6vy7AzWropCInu6q", + "type": "arrow", + "x": 663.5006100033829, + "y": 4134.290220801913, + "width": 90.33331298828125, + "height": 4, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5N", + "roundness": { + "type": 2 + }, + "seed": 940949395, + "version": 2546, + "versionNonce": 1745201981, + "isDeleted": false, + "boundElements": [], + "updated": 1767684575978, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -90.33331298828125, + 4 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "PvoN0jKnOWq1xXn2lroWv", + "focus": 0.16118805172015999, + "gap": 1.333343505859375 + }, + "endBinding": { + "elementId": "Gf0CtJAjOH9kfDFhCUE1Z", + "focus": 0.06393742185083104, + "gap": 1 + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "K8wHlmlEligI_h0jTUnJO", + "type": "text", + "x": 217.44344977630226, + "y": 3851.0532571133813, + "width": 330.57977294921875, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5O", + "roundness": null, + "seed": 217994547, + "version": 1296, + "versionNonce": 102748531, + "isDeleted": false, + "boundElements": [], + "updated": 1767684673131, + "link": null, + "locked": false, + "text": "v3.7 Model serving with ray serve", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "v3.7 Model serving with ray serve", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "n-Ff4Ze34fZJUns_NTd20", + "type": "rectangle", + "x": 602.9912145706976, + "y": 3938.456816260898, + "width": 210.1763265850289, + "height": 55.000000000000014, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5P", + "roundness": null, + "seed": 260257491, + "version": 1234, + "versionNonce": 1093524883, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "vLsB0AI_CkbcIkZaJdNiJ" + } + ], + "updated": 1767684575234, + "link": null, + "locked": false + }, + { + "id": "vLsB0AI_CkbcIkZaJdNiJ", + "type": "text", + "x": 655.2294175360637, + "y": 3953.456816260898, + "width": 105.69992065429688, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5Q", + "roundness": null, + "seed": 1580917875, + "version": 1276, + "versionNonce": 962769715, + "isDeleted": false, + "boundElements": [], + "updated": 1767684575234, + "link": null, + "locked": false, + "text": "API server", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "n-Ff4Ze34fZJUns_NTd20", + "originalText": "API server", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "qxqtFfa77QmZ0wtXJbNdf", + "type": "rectangle", + "x": 417.0006100033829, + "y": 4009.456816260898, + "width": 154.6436510018092, + "height": 60, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5R", + "roundness": null, + "seed": 15457811, + "version": 1464, + "versionNonce": 732681427, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "21ZGMSVPphjNxPbD4fYsP" + } + ], + "updated": 1767684575234, + "link": null, + "locked": false + }, + { + "id": "21ZGMSVPphjNxPbD4fYsP", + "type": "text", + "x": 433.2024785340727, + "y": 4014.456816260898, + "width": 122.23991394042969, + "height": 50, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5S", + "roundness": null, + "seed": 1116513203, + "version": 1524, + "versionNonce": 1738523251, + "isDeleted": false, + "boundElements": [], + "updated": 1767684575234, + "link": null, + "locked": false, + "text": "task\nmanagement ", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "qxqtFfa77QmZ0wtXJbNdf", + "originalText": "task management ", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "KcxzNn7Z9-QxXtT7AkZ3C", + "type": "rectangle", + "x": 601.1081485890527, + "y": 4010.0311245736125, + "width": 207.59447102997498, + "height": 60, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5T", + "roundness": null, + "seed": 1262728531, + "version": 1555, + "versionNonce": 100989971, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "Xmeq8g2n9AW6KRTGM-M5I" + } + ], + "updated": 1767684575234, + "link": null, + "locked": false + }, + { + "id": "Xmeq8g2n9AW6KRTGM-M5I", + "type": "text", + "x": 617.3754539892941, + "y": 4015.0311245736125, + "width": 175.0598602294922, + "height": 50, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5U", + "roundness": null, + "seed": 2053608179, + "version": 1646, + "versionNonce": 1121059251, + "isDeleted": false, + "boundElements": [], + "updated": 1767684575234, + "link": null, + "locked": false, + "text": "node management\n(ssh, ray cluster) ", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "KcxzNn7Z9-QxXtT7AkZ3C", + "originalText": "node management\n(ssh, ray cluster) ", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "X3LvP6ca37T41BZ-Q0noS", + "type": "rectangle", + "x": 937.3069537249294, + "y": 4091.404848666662, + "width": 163.1357446724801, + "height": 106.45708709635272, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5V", + "roundness": null, + "seed": 8305811, + "version": 852, + "versionNonce": 388846419, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "d1Zxyw9BfzoP8jVeALpVW" + } + ], + "updated": 1767684575234, + "link": null, + "locked": false + }, + { + "id": "d1Zxyw9BfzoP8jVeALpVW", + "type": "text", + "x": 968.2248703116577, + "y": 4119.633392214839, + "width": 101.29991149902344, + "height": 50, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5W", + "roundness": null, + "seed": 960475699, + "version": 916, + "versionNonce": 325939443, + "isDeleted": false, + "boundElements": [], + "updated": 1767684575234, + "link": null, + "locked": false, + "text": "ray worker\nnode", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "X3LvP6ca37T41BZ-Q0noS", + "originalText": "ray worker node", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "Om29ryg7xCDfxim1gwvSQ", + "type": "rectangle", + "x": 939.2560643944585, + "y": 3954.967086929079, + "width": 163.1357446724801, + "height": 85, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5X", + "roundness": null, + "seed": 1167127507, + "version": 901, + "versionNonce": 2031671955, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "rRfaJGVJvSMVoayQ_ZCBZ" + } + ], + "updated": 1767684575234, + "link": null, + "locked": false + }, + { + "id": "rRfaJGVJvSMVoayQ_ZCBZ", + "type": "text", + "x": 970.1739809811868, + "y": 3972.467086929079, + "width": 101.29991149902344, + "height": 50, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5Y", + "roundness": null, + "seed": 1703477619, + "version": 968, + "versionNonce": 350621747, + "isDeleted": false, + "boundElements": [], + "updated": 1767684575234, + "link": null, + "locked": false, + "text": "ray worker\nnode", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "Om29ryg7xCDfxim1gwvSQ", + "originalText": "ray worker node", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "XVf5M1qJ68ycZad4qnKcr", + "type": "rectangle", + "x": 419.8061194694701, + "y": 3936.2439235951556, + "width": 147.4799234681481, + "height": 55.000000000000014, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5Z", + "roundness": null, + "seed": 309222163, + "version": 1352, + "versionNonce": 1488687571, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "oKk-BRI0PDZXF4MkLu6xs" + } + ], + "updated": 1767684575234, + "link": null, + "locked": false + }, + { + "id": "oKk-BRI0PDZXF4MkLu6xs", + "type": "text", + "x": 462.3161083641887, + "y": 3951.2439235951556, + "width": 62.45994567871094, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5a", + "roundness": null, + "seed": 1488421043, + "version": 1402, + "versionNonce": 900718451, + "isDeleted": false, + "boundElements": [], + "updated": 1767684575234, + "link": null, + "locked": false, + "text": "WebUI", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "XVf5M1qJ68ycZad4qnKcr", + "originalText": "WebUI", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "Hg8ObmOCWMqPbn09S7yjk", + "type": "rectangle", + "x": 663.477901230443, + "y": 4211.33931361157, + "width": 156.66668701171875, + "height": 85, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5b", + "roundness": null, + "seed": 1913320019, + "version": 1134, + "versionNonce": 906590483, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "pI6z32ZixK5gmBToyLilS" + }, + { + "id": "zN_q98Ryj-3lovwWckFiO", + "type": "arrow" + } + ], + "updated": 1767684575234, + "link": null, + "locked": false + }, + { + "id": "pI6z32ZixK5gmBToyLilS", + "type": "text", + "x": 674.3613011938219, + "y": 4228.83931361157, + "width": 134.89988708496094, + "height": 50, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5c", + "roundness": null, + "seed": 524234739, + "version": 1186, + "versionNonce": 1463738035, + "isDeleted": false, + "boundElements": [], + "updated": 1767684575234, + "link": null, + "locked": false, + "text": "Advanced\nVerlTaskSpec ", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "Hg8ObmOCWMqPbn09S7yjk", + "originalText": "Advanced VerlTaskSpec ", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "zN_q98Ryj-3lovwWckFiO", + "type": "arrow", + "x": 663.0140944335262, + "y": 4255.70430612577, + "width": 80.66214281697523, + "height": 84.85848269127973, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffc9c9", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5d", + "roundness": { + "type": 2 + }, + "seed": 1113242003, + "version": 1393, + "versionNonce": 641187837, + "isDeleted": false, + "boundElements": [], + "updated": 1767684575978, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -80.66214281697523, + -84.85848269127973 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "Hg8ObmOCWMqPbn09S7yjk", + "focus": -0.6785882314731602, + "gap": 1 + }, + "endBinding": { + "elementId": "Gf0CtJAjOH9kfDFhCUE1Z", + "focus": -0.47485998984046796, + "gap": 8.5179981073087 + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "BPhlnGltdlIkoIoRjKnat", + "type": "rectangle", + "x": 234.6020709233453, + "y": 4009.3618448971506, + "width": 154.6436510018092, + "height": 60, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5e", + "roundness": null, + "seed": 949227315, + "version": 1526, + "versionNonce": 142006163, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "AZwdH-22GzEBgAHGN1nsM" + } + ], + "updated": 1767684575234, + "link": null, + "locked": false + }, + { + "id": "AZwdH-22GzEBgAHGN1nsM", + "type": "text", + "x": 250.80393945403512, + "y": 4014.3618448971506, + "width": 122.23991394042969, + "height": 50, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5f", + "roundness": null, + "seed": 1951510739, + "version": 1595, + "versionNonce": 1635544371, + "isDeleted": false, + "boundElements": [], + "updated": 1767684575234, + "link": null, + "locked": false, + "text": "user\nmanagement ", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "BPhlnGltdlIkoIoRjKnat", + "originalText": "user management ", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "4gXYDCzJEyB-R3vjaRh-v", + "type": "rectangle", + "x": 240.66346734226227, + "y": 4202.946676549904, + "width": 154.6436510018092, + "height": 85, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5g", + "roundness": null, + "seed": 1462378099, + "version": 1758, + "versionNonce": 1551781587, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "mSgJ1Tvg6_a8nKBUdLcd9" + } + ], + "updated": 1767684575234, + "link": null, + "locked": false + }, + { + "id": "mSgJ1Tvg6_a8nKBUdLcd9", + "type": "text", + "x": 260.8653358729521, + "y": 4207.946676549904, + "width": 114.23991394042969, + "height": 75, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5h", + "roundness": null, + "seed": 1273223187, + "version": 1840, + "versionNonce": 202570867, + "isDeleted": false, + "boundElements": [], + "updated": 1767684575234, + "link": null, + "locked": false, + "text": "data\nmanagement\nSFTPGo ", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "4gXYDCzJEyB-R3vjaRh-v", + "originalText": "data management\nSFTPGo ", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "I7ATDrhNb6eDEYhRqzYfp", + "type": "rectangle", + "x": 1112.3805455800311, + "y": 3977.6964250601454, + "width": 78.84661364258261, + "height": 188.34936741723558, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5i", + "roundness": null, + "seed": 544562611, + "version": 1629, + "versionNonce": 1812002323, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "Y5vi7JlFMdxgh4SbYm5LG" + } + ], + "updated": 1767684575234, + "link": null, + "locked": false + }, + { + "id": "Y5vi7JlFMdxgh4SbYm5LG", + "type": "text", + "x": 1124.1938746791545, + "y": 4059.3711087687634, + "width": 55.21995544433594, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5j", + "roundness": null, + "seed": 537210707, + "version": 1649, + "versionNonce": 993967027, + "isDeleted": false, + "boundElements": [], + "updated": 1767684575234, + "link": null, + "locked": false, + "text": "GPFS", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "I7ATDrhNb6eDEYhRqzYfp", + "originalText": "GPFS", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "xDPUWf7m9nXCB78-A-vMP", + "type": "rectangle", + "x": 245.5733059569502, + "y": 4326.310559237858, + "width": 566.5404057062265, + "height": 60, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5k", + "roundness": null, + "seed": 1827261683, + "version": 2202, + "versionNonce": 368233811, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "Q2W49xaHGQW3e7Xc2e0Hm" + } + ], + "updated": 1767684575234, + "link": null, + "locked": false + }, + { + "id": "Q2W49xaHGQW3e7Xc2e0Hm", + "type": "text", + "x": 462.99356374170407, + "y": 4343.810559237858, + "width": 131.69989013671875, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5l", + "roundness": null, + "seed": 447329939, + "version": 2329, + "versionNonce": 1332980467, + "isDeleted": false, + "boundElements": [], + "updated": 1767684575234, + "link": null, + "locked": false, + "text": "weight & bias", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "xDPUWf7m9nXCB78-A-vMP", + "originalText": "weight & bias", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "9rlu1cDuS6ea839qCFeyC", + "type": "rectangle", + "x": 420.04983792153826, + "y": 4202.256443572021, + "width": 156.66668701171875, + "height": 85, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffc9c9", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5m", + "roundness": null, + "seed": 133917747, + "version": 1180, + "versionNonce": 206034067, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "OMatW1nuQfmSyfQVTADJH" + } + ], + "updated": 1767684575234, + "link": null, + "locked": false + }, + { + "id": "OMatW1nuQfmSyfQVTADJH", + "type": "text", + "x": 464.1732128605031, + "y": 4219.756443572021, + "width": 68.41993713378906, + "height": 50, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b5n", + "roundness": null, + "seed": 2026204627, + "version": 1205, + "versionNonce": 472238643, + "isDeleted": false, + "boundElements": [], + "updated": 1767684575234, + "link": null, + "locked": false, + "text": "model\nServing", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "9rlu1cDuS6ea839qCFeyC", + "originalText": "model\nServing", + "autoResize": true, + "lineHeight": 1.25 } ], "appState": { diff --git a/specs/mvp/v3.8/ray_serve.md b/specs/mvp/v3.8/ray_serve.md new file mode 100644 index 0000000..c2f8a96 --- /dev/null +++ b/specs/mvp/v3.8/ray_serve.md @@ -0,0 +1,314 @@ + +API参考资料 +https://docs.ray.io/en/latest/serve/api/doc/ray.serve.llm.LLMConfig.html + +ray.serve.llm.LLMConfig +pydantic model ray.serve.llm.LLMConfig[source] +The configuration for starting an LLM deployment. + +PublicAPI (alpha): This API is in alpha and may change before becoming stable. + +field accelerator_type: str | None = None +The type of accelerator runs the model on. Only the following values are supported: [‘V100’, ‘P100’, ‘T4’, ‘P4’, ‘K80’, ‘A10G’, ‘L4’, ‘L40S’, ‘A100’, ‘H100’, ‘H200’, ‘H20’, ‘B200’, ‘Intel-GPU-Max-1550’, ‘Intel-GPU-Max-1100’, ‘Intel-GAUDI’, ‘AMD-Instinct-MI100’, ‘AMD-Instinct-MI250X’, ‘AMD-Instinct-MI250X-MI250’, ‘AMD-Instinct-MI210’, ‘AMD-Instinct-MI300A’, ‘AMD-Instinct-MI300X-OAM’, ‘AMD-Instinct-MI300X-HF’, ‘AMD-Instinct-MI308X’, ‘AMD-Instinct-MI325X-OAM’, ‘AMD-Instinct-MI350X-OAM’, ‘AMD-Instinct-MI355X-OAM’, ‘AMD-Radeon-R9-200-HD-7900’, ‘AMD-Radeon-HD-7900’, ‘aws-neuron-core’, ‘TPU-V2’, ‘TPU-V3’, ‘TPU-V4’, ‘TPU-V5P’, ‘TPU-V5LITEPOD’, ‘TPU-V6E’, ‘Ascend910B’, ‘Ascend910B4’, ‘MXC500’, ‘MXC550’, ‘A100-40G’, ‘A100-80G’] + +field callback_config: CallbackConfig [Optional] +Callback configuration to use for model initialization. Can be a string path to a class or a Callback subclass. + +field deployment_config: Dict[str, Any] [Optional] +The Ray @server.deployment options. Supported fields are: name, num_replicas, ray_actor_options, max_ongoing_requests, autoscaling_config, max_queued_requests, user_config, health_check_period_s, health_check_timeout_s, graceful_shutdown_wait_loop_s, graceful_shutdown_timeout_s, logging_config, request_router_config. For more details, see the Ray Serve Documentation. + +field engine_kwargs: Dict[str, Any] = {} +Additional keyword arguments for the engine. In case of vLLM, this will include all the configuration knobs they provide out of the box, except for tensor-parallelism which is set automatically from Ray Serve configs. + +field experimental_configs: Dict[str, Any] [Optional] +Experimental configurations for Ray Serve LLM. This is a dictionary of key-value pairs. Current supported keys are: - stream_batching_interval_ms: Ray Serve LLM batches streaming requests together. This config decides how long to wait for the batch before processing the requests. Defaults to 50.0. - num_ingress_replicas: The number of replicas for the router. Ray Serve will take the max amount all the replicas. Default would be 2 router replicas per model replica. + +field llm_engine: str = 'vLLM' +The LLMEngine that should be used to run the model. Only the following values are supported: [‘vLLM’] + +field log_engine_metrics: bool | None = True +Enable additional engine metrics via Ray Prometheus port. + +field lora_config: Dict[str, Any] | LoraConfig | None = None +Settings for LoRA adapter. Validated against LoraConfig. + +field model_loading_config: Dict[str, Any] | ModelLoadingConfig [Required] +The settings for how to download and expose the model. Validated against ModelLoadingConfig. + +field placement_group_config: Dict[str, Any] | None = None +Ray placement group configuration for scheduling vLLM engine workers. Defines resource bundles and placement strategy for multi-node deployments. Should contain ‘bundles’ (list of resource dicts) and optionally ‘strategy’ (defaults to ‘PACK’). Example: {‘bundles’: [{‘GPU’: 1, ‘CPU’: 2}], ‘strategy’: ‘PACK’} + +field runtime_env: Dict[str, Any] | None = None +The runtime_env to use for the model deployment replica and the engine workers. + +apply_checkpoint_info(model_id_or_path: str, trust_remote_code: bool = False) → None[source] +Apply the checkpoint info to the model config. + +classmethod from_file(path: str, **kwargs) → ModelT +Load a model from a YAML file path. + +get_engine_config() → None | VLLMEngineConfig[source] +Returns the engine config for the given LLM config. + +LLMConfig not only has engine config but also deployment config, etc. + +get_or_create_callback() → CallbackBase | None[source] +Get or create the callback instance for this process. + +This ensures one callback instance per process (singleton pattern). The instance is cached so the same object is used across all hooks. + +Returns +: +Instance of class that implements Callback + +multiplex_config() → ServeMultiplexConfig[source] +classmethod parse_yaml(file, **kwargs) → ModelT +setup_engine_backend()[source] +update_engine_kwargs(**kwargs: Any) → None[source] +Update the engine_kwargs and the engine_config engine_kwargs. + +This is typically called during engine starts, when certain engine_kwargs (e.g., data_parallel_rank) become available. + +validator validate_accelerator_type » accelerator_type[source] +validator validate_deployment_config » deployment_config[source] +Validates the deployment config dictionary. + +validator validate_experimental_configs » experimental_configs[source] +Validates the experimental configs dictionary. + +validator validate_llm_engine » llm_engine[source] +Validates the llm_engine string value. + +validator validate_lora_config » lora_config[source] +Validates the lora config dictionary. + +validator validate_model_loading_config » model_loading_config[source] +Validates the model loading config dictionary. + +property input_modality: str +Returns the input modality of the model. There could be more types in the future. Right now assumes if the model doesn’t support version, it’ll be text. + +property max_request_context_length: int | None +property model_architecture: str +property model_id: str +property supports_vision: bool + +# Python API +ray serve api +https://docs.ray.io/en/latest/serve/api/index.html#serve-api + + +Python API +Writing Applications +serve.Deployment + +Class (or function) decorated with the @serve.deployment decorator. + +serve.Application + +One or more deployments bound with arguments that can be deployed together. + +Deployment Decorators +serve.deployment + +Decorator that converts a Python class to a Deployment. + +serve.ingress + +Wrap a deployment class with an ASGI application for HTTP request parsing. + +serve.batch + +Converts a function to asynchronously handle batches. + +serve.multiplexed + +Wrap a callable or method used to load multiplexed models in a replica. + +Deployment Handles +Note + +The deprecated RayServeHandle and RayServeSyncHandle APIs have been fully removed as of Ray 2.10. See the model composition guide for how to update code to use the DeploymentHandle API instead. + +serve.handle.DeploymentHandle + +A handle used to make requests to a deployment at runtime. + +serve.handle.DeploymentResponse + +A future-like object wrapping the result of a unary deployment handle call. + +serve.handle.DeploymentResponseGenerator + +A future-like object wrapping the result of a streaming deployment handle call. + +Running Applications +serve.start + +Start Serve on the cluster. + +serve.run + +Run an application and return a handle to its ingress deployment. + +serve.delete + +Delete an application by its name. + +serve.status + +Get the status of Serve on the cluster. + +serve.shutdown + +Completely shut down Serve on the cluster. + +serve.shutdown_async + +Completely shut down Serve on the cluster asynchronously. + +Configurations +serve.config.ProxyLocation + +Config for where to run proxies to receive ingress traffic to the cluster. + +serve.config.gRPCOptions + +gRPC options for the proxies. + +serve.config.HTTPOptions + +HTTP options for the proxies. + +serve.config.AutoscalingConfig + +Config for the Serve Autoscaler. + +serve.config.AutoscalingPolicy + +PublicAPI (alpha): This API is in alpha and may change before becoming stable. + +serve.config.AutoscalingContext + +Rich context provided to custom autoscaling policies. + +serve.config.AggregationFunction + +An enumeration. + +serve.config.RequestRouterConfig + +Config for the Serve request router. + +Schemas +serve.schema.ServeActorDetails + +Detailed info about a Ray Serve actor. + +serve.schema.ProxyDetails + +Detailed info about a Ray Serve ProxyActor. + +serve.schema.ApplicationStatusOverview + +Describes the status of an application and all its deployments. + +serve.schema.ServeStatus + +Describes the status of Serve. + +serve.schema.DeploymentStatusOverview + +Describes the status of a deployment. + +serve.schema.EncodingType + +Encoding type for the serve logs. + +serve.schema.AutoscalingMetricsHealth + +An enumeration. + +serve.schema.AutoscalingStatus + +An enumeration. + +serve.schema.ScalingDecision + +One autoscaling decision with minimal provenance. + +serve.schema.DeploymentAutoscalingDetail + +Deployment-level autoscaler observability. + +serve.schema.ReplicaRank + +Replica rank model. + +Request Router +serve.request_router.ReplicaID + +A unique identifier for a replica. + +serve.request_router.PendingRequest + +A request that is pending execution by a replica. + +serve.request_router.RunningReplica + +Contains info on a running replica. + +serve.request_router.FIFOMixin + +Mixin for FIFO routing. + +serve.request_router.LocalityMixin + +Mixin for locality routing. + +serve.request_router.MultiplexMixin + +Mixin for multiplex routing. + +serve.request_router.RequestRouter + +Abstract interface for a request router (how the router calls it). + +Advanced APIs +serve.get_replica_context + +Returns the deployment and replica tag from within a replica at runtime. + +serve.context.ReplicaContext + +Stores runtime context info for replicas. + +serve.get_multiplexed_model_id + +Get the multiplexed model ID for the current request. + +serve.get_app_handle + +Get a handle to the application's ingress deployment by name. + +serve.get_deployment_handle + +Get a handle to a deployment by name. + +serve.grpc_util.RayServegRPCContext + +Context manager to set and get gRPC context. + +serve.exceptions.BackPressureError + +Raised when max_queued_requests is exceeded on a DeploymentHandle. + +serve.exceptions.RayServeException + +serve.exceptions.RequestCancelledError + +Raise when a Serve request is cancelled. + +serve.exceptions.DeploymentUnavailableError + +Raised when a Serve deployment is unavailable to receive requests. \ No newline at end of file diff --git a/specs/mvp/v3.8/ray_serve_llm.md b/specs/mvp/v3.8/ray_serve_llm.md new file mode 100644 index 0000000..89e61aa --- /dev/null +++ b/specs/mvp/v3.8/ray_serve_llm.md @@ -0,0 +1,87 @@ + +基于提供的来源,以下是使用 **Builder Pattern(构建器模式)** 结合 Ray Serve 和 vllm 动态部署**中型大语言模型(Medium-sized LLM)**的原理与操作方案。 + +### 一、 核心原理 + +1. **中型 LLM 定义**:中型模型(如 Llama-3.1-70B)通常具有约 70B 参数。它们通常运行在**单个节点**上,利用 **4 到 8 个 GPU**。 +2. **Builder Pattern 机制**:该模式通过 `build_openai_app` 函数提供高度抽象。开发者只需定义一个 `LLMConfig` 对象,即可自动构建并链接底层的 `LLMServer` 和 `OpenAiIngress` 组件。 +3. **高性能后端 (vLLM)**:Ray Serve LLM 使用 vLLM 作为推理引擎,支持高性能推理和显存管理。 +4. **动态扩缩容与资源调度**: + * **张量并行 (Tensor Parallelism)**:通过 `tensor_parallel_size` 将模型权重均匀分布在单节点的所有 GPU 上。 + * **副本缩放 (Autoscaling)**:通过 `autoscaling_config` 动态调整 `min_replicas` 和 `max_replicas`,使服务能根据实时流量增减推理副本。 + +--- + +### 二、 操作方案 + +#### 1. 环境准备 +确保已安装必要的依赖包并配置 Hugging Face 访问令牌(针对 Llama-3.1 等受限模型)。 +```bash +pip install "ray[serve,llm]" +export HF_TOKEN= +``` + +#### 2. 编写部署脚本 (`serve_medium_llm.py`) +使用 **Builder Pattern** 定义配置并构建应用。以下示例配置了一个典型的 70B 模型部署: + +```python +# serve_medium_llm.py +from ray.serve.llm import LLMConfig, build_openai_app +import os + +llm_config = LLMConfig( + model_loading_config=dict( + model_id="my-llama-3.1-70b", + model_source="meta-llama/Llama-3.1-70B-Instruct", + ), + accelerator_type="A100-40G", # 或 L40S + deployment_config=dict( + autoscaling_config=dict( + min_replicas=1, # 最小副本数 + max_replicas=4, # 最大副本数,实现动态扩展 + ) + ), + runtime_env=dict(env_vars={"HF_TOKEN": os.environ.get("HF_TOKEN")}), + engine_kwargs=dict( + max_model_len=32768, # 上下文长度 + tensor_parallel_size=8, # 在单节点的 8 个 GPU 间拆分权重 + ), +) + +# 使用 Builder Pattern 构建应用 +app = build_openai_app({"llm_configs": [llm_config]}) +``` + +#### 3. 启动部署 +在终端运行以下命令启动服务: +```bash +serve run serve_medium_llm:app +``` +部署过程通常需要几分钟,包括配置集群、启动 vLLM 服务器以及下载模型权重。 + +#### 4. 发送请求测试 +服务启动后,可以通过符合 OpenAI 标准的接口进行访问。 +```python +from openai import OpenAI + +client = OpenAI(base_url="http://localhost:8000/v1", api_key="FAKE_KEY") +response = client.chat.completions.create( + model="my-llama-3.1-70b", + messages=[{"role": "user", "content": "解释一下什么是量子纠缠?"}], + stream=True +) +for chunk in response: + if chunk.choices.delta.content: + print(chunk.choices.delta.content, end="", flush=True) +``` + +--- + +### 三、 性能与并发优化建议 + +* **提高并发量**:可以通过降低 `max_model_len` 来减少 KV 缓存所需的显存,从而显著提升每个副本支持的最大并发请求数。 +* **监控指标**:通过 Ray Serve LLM 仪表盘监控 **TTFT(首字延迟)**、**TPOT(单字延迟)** 和 **Token 吞吐量** 来评估服务性能。 +* **精度折衷**:对于资源受限的场景,可以使用**量化模型**(如 FP8)来减少模型内存占用,为 KV 缓存留出更多空间,进而提高并发能力。 + +**比喻理解**: +部署**中型 LLM** 就像是在一个大型车间里组装一台复杂的精密机器(模型权重)。**Builder Pattern** 是你的“全自动组装线”,你只需设定好机器的参数(Config),生产线就会自动帮你把零件固定好并接通电源。而 **vLLM 和张量并行** 就像是让 8 个熟练工人(GPU)共同抬起这台沉重的机器,每个人只负责自己那一部分的力气,从而让机器能够平稳地运转。 \ No newline at end of file diff --git a/specs/mvp/v3.8/requirements.md b/specs/mvp/v3.8/requirements.md new file mode 100644 index 0000000..aaaa642 --- /dev/null +++ b/specs/mvp/v3.8/requirements.md @@ -0,0 +1,8 @@ + +1. 通过ray serve(后端vllm)来动态拉起llm,支持多模型application部署, +2. 默认一个模型只有一个replica,用户配置可以多个 +3. 用户可以删除(下线)模型 +4. 可以指定模型用几张卡 +5. 通过WebUI来进行配置,查看当前部署的模型列表,以及可以查看详情 +6. 模型路径可以使用common,也可以用户自己指定user路径 +7. \ No newline at end of file diff --git a/specs/mvp/v3.8/v3.8_api.md b/specs/mvp/v3.8/v3.8_api.md new file mode 100644 index 0000000..813f2a9 --- /dev/null +++ b/specs/mvp/v3.8/v3.8_api.md @@ -0,0 +1,224 @@ +# MVP v3.8 API Reference(Serving) + +> 说明:本节为 v3.8 新增的 **Model Serving** API(Ray Serve LLM / vLLM)。 +> 认证:Serving 管理 API 复用现有 MVP API 的认证方式(`Authorization: Bearer `)。 +> 推理:对外 OpenAI endpoint **不做鉴权**(v3.8 约定)。 + +## 0. 基本信息 + +### 0.1 Base URLs + +- MVP API server:`http://:8080` +- Ray Serve OpenAI ingress(固定端口 8000):`http://:8000/v1` + +### 0.2 认证 + +所有 `/api/v2/serve/*` 接口要求: + +``` +Authorization: Bearer +``` + +其中 `user_token` 由管理员通过 `/api/v2/users//tokens` 颁发(沿用现有机制)。 + +### 0.3 命名规则:`model_id = user_id-YYYYMMDDHHMM-` + +- 用户提交时填写 `model_id`(语义为 suffix,例如 `qwen-0.5b`) +- 平台生成前缀: + - `prefix = "-"` +- 平台实际对外暴露的 OpenAI model 名称为: + - `model_id = "-"` + - 示例:`alice-202601061235-qwen-0.5b` + +## 1. 数据结构 + +### 1.1 ServingSpec(YAML) + +请求体建议使用 YAML(与 TaskSpec 一致),示例: + +```yaml +model_id: qwen-0.5b # 必填:suffix(平台自动加 user_id- 前缀) +model_source: $HOME/common/hf/.../ # 必填:本地路径或 repo id;平台做 $HOME 宏替换与路径校验 +num_replicas: 1 # 可选,默认 1 +gpus_per_replica: 1 # 可选,默认 1 +# engine_kwargs: # 可选:vLLM 参数透传(白名单/黑名单由实现决定) +# max_model_len: 8192 +# gpu_memory_utilization: 0.9 +``` + +说明: +- `accelerator_type` 不在 ServingSpec 中暴露;由平台配置(`dev.yaml` 的 `serving.llm.accelerator_type`)统一注入到 Ray Serve LLM 的 `LLMConfig.accelerator_type`(dev/h1: `H20`)。 + +#### 宏替换 + +- `$HOME` → `/private/users/` +- `$HOME/common/hf` → `/private/hf` +- `$HOME/common/datasets` → `/private/datasets`(serving 不强依赖,但保留一致语义) + +#### 路径校验(v3.8 约定) + +`model_source` 允许: + +- `/private/hf/...`(common) +- `/private/users//...`(user) + +拒绝: + +- 其它用户目录 +- 非 `/private` 下路径 +- 空路径或包含 `..` 的可疑路径 + +### 1.2 ServingModel(响应体,JSON) + +```json +{ + "model_key": "svc-alice-20260106-123000-abcd", + "user_id": "alice", + "model_id": "alice-202601061235-qwen-0.5b", + "model_id_suffix": "qwen-0.5b", + "model_id_prefix": "alice-202601061235", + "model_source": "/private/hf/hub/models--.../snapshots/", + "num_replicas": 1, + "gpus_per_replica": 1, + "total_gpus": 1, + "state": "RUNNING", + "endpoint": { + "openai_base_url": "http://:8000/v1", + "model": "alice-202601061235-qwen-0.5b" + }, + "error_summary": null, + "created_at": "2026-01-06T12:30:00Z", + "updated_at": "2026-01-06T12:31:02Z" +} +``` + +## 2. 管理 API(MVP API server) + +### 2.1 Create / Upsert model + +`POST /api/v2/serve/models` + +#### Request + +- Header: `Content-Type: application/yaml` +- Body: ServingSpec(YAML) + +#### Response (202) + +```json +{ + "model_key": "svc-alice-20260106-123000-abcd", + "state": "QUEUED" +} +``` + +语义: +- 创建新模型(若 suffix 不存在) +- 或更新已有模型(若同一用户同一 suffix 已存在):更新 replicas/gpu 等配置,进入 `QUEUED` 等待 reconciler apply + +### 2.2 List models (current user) + +`GET /api/v2/serve/models` + +#### Response (200) + +```json +{ + "items": [ ... ServingModel ... ], + "openai_base_url": "http://:8000/v1" +} +``` + +### 2.3 Get model detail + +`GET /api/v2/serve/models/{model_key}` + +#### Response (200) + +```json +{ + "model": { ... ServingModel ... }, + "resolved_spec_yaml": "model_id: ...\nmodel_source: ...\n", + "events": [ + { "event_type": "DEPLOY_REQUESTED", "created_at": "...", "payload": {...} } + ], + "serve_status": { + "app_name": "argus_llm_app", + "app_status": "RUNNING" + } +} +``` + +### 2.4 Scale replicas (PATCH) + +`PATCH /api/v2/serve/models/{model_key}` + +#### Request (JSON) + +```json +{ "num_replicas": 2 } +``` + +#### Response (200) + +```json +{ "model_key": "...", "state": "QUEUED" } +``` + +> v3.8 只支持修改 `num_replicas`(以及可选 engine_kwargs);`gpus_per_replica` 若修改,可能触发重新部署。 + +### 2.5 Delete / Undeploy model + +`DELETE /api/v2/serve/models/{model_key}` + +#### Response (200) + +```json +{ "model_key": "...", "state": "DELETING" } +``` + +语义:从“声明式配置”中删除该模型,reconciler 会在下一轮 tick 触发 `serve.run(...)` 更新 app 配置并最终使其不可见。 + +### 2.6 Admin: Serve cluster status(可选) + +`GET /api/v2/serve/status` + +#### Response (200) + +返回 `serve.status()` 摘要(集群级 + app 级)。 + +> 仅 admin token 可访问(沿用 v3.x admin gate)。 + +## 3. 推理 API(Ray Serve OpenAI ingress) + +> v3.8 不做鉴权:无需 `Authorization`。 + +### 3.1 List models + +`GET http://:8000/v1/models` + +返回可用 model 列表(包含 `alice-qwen-0.5b` 这类带前缀名称)。 + +### 3.2 Chat completions + +`POST http://:8000/v1/chat/completions` + +```json +{ + "model": "alice-202601061235-qwen-0.5b", + "messages": [{"role":"user","content":"Hello"}], + "stream": false +} +``` + +### 3.3 Completions / Embeddings + +按 Ray Serve LLM OpenAI ingress 支持范围提供(v3.8 验收至少覆盖 chat)。 + +## 4. 错误码约定(MVP API server) + +- `400 invalid yaml/spec`:YAML 解析失败、字段缺失、值不合法 +- `403 forbidden`:路径越权(model_source 访问其他用户目录) +- `409 conflict`:model_id_suffix 冲突(同一用户重复创建且不允许覆盖时;若选择 upsert 则不返回该错误) +- `422 unprocessable`:资源参数非法(replica/gpu <=0) +- `500 internal`:reconciler/serve 调用异常(详情记录到 `serve_events`,并写入 `error_summary`) diff --git a/specs/mvp/v3.8/v3.8_design.md b/specs/mvp/v3.8/v3.8_design.md new file mode 100644 index 0000000..a69a99c --- /dev/null +++ b/specs/mvp/v3.8/v3.8_design.md @@ -0,0 +1,371 @@ +# MVP v3.8 详细设计方案:Ray Serve(vLLM)模型动态部署与管理 + +> 基线:当前已具备 v3.7 能力(训练平台 + W&B + SFTPGo + WebUI/API + Ray stateless pool,训练侧默认 rollout=vllm)。 +> v3.8 目标:在同一套 Ray 集群上,引入 **Ray Serve LLM(后端 vLLM)** 的模型推理服务能力,并通过 WebUI/API 动态管理模型生命周期。 + +## 0. 需求范围(来自 requirements.md) + +1) 通过 Ray Serve(后端 vLLM)动态拉起 LLM,支持**多模型 application** 部署 +2) 默认一个模型 1 个 replica,用户可配置多个 +3) 用户可删除(下线)模型 +4) 用户可指定模型使用几张 GPU +5) WebUI 可配置、查看模型列表、查看详情 +6) 模型路径可用 common,也可用 user 路径(本地路径) + +## 1. 总体架构 + +### 1.1 组件关系 + +v3.8 在现有“训练平台”之上新增一个 **Serving 子系统**: + +- **API server(现有)** + - 新增 Serving API(模型部署/删除/扩缩容/状态) + - 新增 Serving 后台线程(reconciler):周期性对齐 DB 与 Ray Serve 实际状态 +- **SQLite(现有)** + - 新增 `serve_models`、`serve_events` 等表,保存声明式配置与状态 +- **Ray 集群(现有 stateless pool)** + - 复用现有 head/worker 容器 + - 在集群内启动 Ray Serve(controller + proxy + deployments) +- **Ray Serve LLM(新增)** + - 通过 `ray.serve.llm.build_openai_app` 构建一个 OpenAI-compatible app + - app 内包含多个 `LLMConfig`(每个对应一个模型) + +### 1.2 为什么选择“单个 multi-model application” + +Ray Serve 支持 multi-app,但在 dev/docker 场景下多个 app 的 route_prefix 管理更复杂;同时 requirements 要求“多模型 application 部署”,因此 v3.8 采用: + +- 一个固定的 app:`argus_llm_app`(名字可配置) +- route_prefix 固定为 `/`(对外暴露 `/v1/...` OpenAI 接口) +- 每个模型对应一个 `LLMConfig`,通过 `model_id` 区分(即 OpenAI API 里的 `model` 字段) + +这样对用户而言最直观: + +- base_url 固定:`http://:8000/v1` +- `model=` 选择不同模型(`/v1/models` 自动列出) + +## 2. Ray Serve 部署策略(dev/h1 约束) + +### 2.1 HTTP 入口端口与 docker compose + +Ray Serve 默认 HTTP 端口是 `8000`。v3.8 约定: + +- 在 **head 容器** 映射 `8000:8000` +- API server 仍在 `8080` +- Ray Dashboard 在 `8265` + +原因:在单机多容器 docker 环境里,如果让 proxy “每个节点都起”,会出现多个容器同时想绑定同一个 host 端口的问题(不可行)。因此 v3.8 推荐: + +- Serve proxy 位置设为 **HeadOnly**(只在 head 上提供 HTTP 入口) +- GPU replica 仍运行在 worker 上(proxy 只转发,不跑推理) + +> 需要注意: +> - Serve 的 HTTP 配置(host/port/proxy_location)是 **Ray 集群全局配置**,启动后无法动态修改,因此应当在平台启动时一次性设定并持久化。 +> - proxy Actor 需要 CPU 资源;head 节点的 `num-cpus=0` 策略可能需要在 v3.8 做小幅调整(例如给 head 保留少量 CPU),但仍通过 `entrypoint_resources` 确保训练 driver 不会被调度到 head。 + +#### 2.1.1 compose 预期改动(v3.8 实现时落地) + +- `src/mvp/docker-compose.yaml`(ray_head)新增: + - `ports: - "8000:8000"` + +> worker 容器不暴露 8000(避免 host 端口冲突),由 head proxy 统一对外提供入口。 + +### 2.2 启动/配置方式(Python SDK 优先) + +v3.8 采用 Ray Serve Python SDK: + +- `ray.init(address="auto")` +- `serve.start(proxy_location="HeadOnly", http_options={"host":"0.0.0.0","port":8000})`(一次性全局配置) +- `serve.run(app, name=, route_prefix="/")` +- `serve.delete(name=)`(必要时) +- `serve.status()` 查询集群/应用状态 + +理由: + +- 避免在平台内部引入额外 REST client 依赖(并减少跨版本 REST schema 不稳定风险) +- API server 本身运行在 head 容器内,可直接 `ray.init(address="auto")` 连接现有集群 + +> 另:Ray Dashboard 暴露 Serve REST API(`PUT /api/serve/applications/` 等)可作为备选方案,但 v3.8 先不以它为主通路。 + +### 2.3 依赖与镜像假设 + +v3.8 依赖: + +- `ray[serve]`(Serve Controller/Proxy) +- `ray[llm]`(Ray Serve LLM 的 `ray.serve.llm` 模块) +- vLLM(推理引擎) + +由于 v3.7 已切换到 `verlai/verl:vllm011.latest`,预期镜像内包含 vLLM;但 `ray.serve.llm` 是否开箱即用需要在实现阶段确认。 +若缺失,v3.8 将在 `argus-ray-node` 镜像构建阶段补充 `pip install "ray[serve,llm]"`(或按官方建议的最小依赖)并做版本锁定。 + +### 2.4 Serving 配置(dev.yaml) + +v3.8 新增一段 serving 配置,至少包含: + +```yaml +serving: + serve: + http_port: 8000 # 固定 8000 + proxy_location: HeadOnly # dev/docker 下推荐 + llm: + accelerator_type: H20 # dev 环境填写 H20(对应 ray.serve.llm.LLMConfig.accelerator_type) +``` + +说明: +- `accelerator_type` 是 Ray Serve LLM 的 `LLMConfig.accelerator_type` 字段,用于表达“该模型运行在哪类加速卡上”。在 dev/h1 环境我们固定为 `H20`。 +- v3.8 不把 `accelerator_type` 暴露给普通用户编辑(避免误配);由部署环境配置统一决定。 + +## 3. 模型配置与资源映射 + +### 3.1 关键配置对象:`ray.serve.llm.LLMConfig` + +每个模型部署由一个 `LLMConfig` 描述,关键字段(v3.8 用到的子集): + +- `model_loading_config` + - `model_id`: 对外展示/请求时用的模型名(唯一 key) + - `model_source`: HF repo id / S3 / **local path** +- `accelerator_type` + - 从 `dev.yaml` 的 `serving.llm.accelerator_type` 读取(dev/h1: `H20`) +- `deployment_config` + - `num_replicas` 或 `autoscaling_config`(v3.8 先用固定 `num_replicas`) + - `ray_actor_options`(CPU/资源约束) +- `engine_kwargs` + - vLLM 相关参数(`max_model_len`、`gpu_memory_utilization` 等) +- `placement_group_config` + - 控制 vLLM engine workers 使用的资源 bundle(用于多 GPU / 跨节点) +- `runtime_env` + - 注入 HF cache、离线开关等环境变量 + +### 3.2 GPU 张数(gpus_per_replica)如何落到 LLMConfig + +v3.8 把用户输入的: + +- `gpus_per_replica = N` + +映射为: + +- `engine_kwargs.tensor_parallel_size = N`(单机/跨机张量并行,Ray Serve LLM 官方示例写法) +- `placement_group_config = {"bundles": [{"GPU": 1, "CPU": }] * N, "strategy": "PACK"}` + +并在 `engine_kwargs` 中保留 vLLM 其他参数(`max_model_len`、`gpu_memory_utilization` 等)。 + +> 兼容性说明:Ray Serve LLM/Serve LLM 仍处于快速演进阶段;v3.8 会以我们线上实际 Ray 版本为准做最小适配与回归测试。 + +### 3.2.1 跨节点场景(N > 单机 GPU) + +Ray Serve LLM 默认使用 `PACK` 策略,优先把 GPU worker 放在尽量少的节点上;如果单机放不下,会自动 spill 到其它节点,从而支持跨节点张量并行(TP)部署。 + +### 3.3 replica 数(num_replicas) + +v3.8 默认: + +- `num_replicas = 1` + +允许用户在 UI 中设置为 `>=1`。 +多 replica 会线性消耗 GPU(`num_replicas * gpus_per_replica`),需要做资源预检查。 + +### 3.4 模型路径与宏替换(common / user) + +v3.8 支持两类模型来源: + +1) **common** +- 典型为 `/private/hf/...`(共享 HF cache / snapshot) + +2) **user** +- `/private/users//models/...` +- 以及用户训练输出(例如 `jobs//checkpoints/.../huggingface`) + +为保证 UI 易用,沿用平台已有的宏语义: + +- `$HOME` → `/private/users/` +- `$HOME/common/hf` → `/private/hf` + +并进行路径校验: + +- 允许前缀:`/private/hf`、`/private/users//` +- 拒绝:越权访问其他用户目录、或访问系统敏感路径 + +### 3.5 离线模式(避免 HF mirror 429) + +v3.7 训练侧已验证 `HF_HUB_OFFLINE=1` 的必要性。v3.8 Serving 侧同样默认注入: + +- `HF_HOME=/private/hf` +- `HUGGINGFACE_HUB_CACHE=/private/hf/hub` +- `TRANSFORMERS_CACHE=/private/hf/transformers` +- `HF_HUB_OFFLINE=1` +- `HF_ENDPOINT=https://hf-mirror.com`(可保留,但离线模式下不应触发网络) + +并建议用户在 ServingSpec 中尽量填写 **local path** 作为 `model_source`,而不是直接 repo id。 + +## 4. 平台数据模型(SQLite) + +新增两张主表: + +### 4.1 `serve_models` + +每一行代表一个“声明式模型部署”: + +- `model_key`(平台内部唯一 ID,便于重命名/去重) +- `user_id` +- `model_id`(对外 OpenAI model 名称,要求 per-app 唯一) +- `model_source`(本地路径或 repo id,存 resolved 后的结果) +- `num_replicas` +- `gpus_per_replica` +- `engine_kwargs_json`(可选) +- `state`:`QUEUED | DEPLOYING | RUNNING | FAILED | DELETING | DELETED` +- `serve_app_name`(默认 `argus_llm_app`) +- `created_at / updated_at` +- `error_summary` + +### 4.2 `serve_events` + +记录关键事件与排障信息(类似 task_events): + +- `id` +- `model_key` +- `event_type`(DEPLOY_REQUESTED/DEPLOY_APPLIED/STATUS_SYNC/DELETE_REQUESTED/...) +- `payload_json` +- `created_at` + +## 5. API 设计(新增) + +在现有 `Authorization: Bearer ` 的认证体系下,新增 Serving API(路径仅示意,具体在实现时与现有 `api/v2` 对齐)。 + +### 5.1 用户接口 + +- `POST /api/v2/serve/models` + - body: YAML 或 JSON(v3.8 先用 YAML 与现有 TaskSpec 一致) + - 创建/更新(upsert)一个模型配置,进入 `QUEUED` +- `GET /api/v2/serve/models` + - 列出当前用户的模型列表(含 state、资源、endpoint) +- `GET /api/v2/serve/models/{model_key}` + - 详情:完整 spec + 最近事件 + Serve status 摘要 +- `PATCH /api/v2/serve/models/{model_key}` + - 修改 `num_replicas`、或 engine_kwargs(可选) +- `DELETE /api/v2/serve/models/{model_key}` + - 下线模型(进入 `DELETING`) + +### 5.2 系统接口(admin) + +- `GET /api/v2/serve/status`(admin) + - 返回 `serve.status()` 的摘要(集群级 / app 级) + +### 5.3 对外推理 endpoint + +固定输出到 UI/接口中: + +- `openai_base_url = http://:8000/v1` +- 支持: + - `/v1/chat/completions` + - `/v1/completions` + - `/v1/embeddings` + - `/v1/models` + +> v3.8 不做额外网关与鉴权(保持与现有 dev 环境一致);若后续需要,可在 v3.9+ 引入 token 校验/反向代理。 + +### 5.4 `model_id` 前缀策略(user_id-) + +为避免多用户冲突并保持可读性: + +v3.8 采用“**user_id + 日期小时分钟**”作为稳定前缀,以降低冲突并便于快速定位创建时间: + +- 用户在 UI/API 中仅填写 `model_id_suffix`(或仍用字段名 `model_id`,但语义为 suffix) +- 平台计算实际对外 `model_id`: + - `prefix = f"{user_id}-{YYYYMMDDHHMM}"` + - `model_id = f"{prefix}-{model_id_suffix}"` +- 在列表/详情中同时展示: + - `model_id_suffix`(用户输入) + - `model_id_prefix`(平台生成,例如 `alice-202601061235`) + - `model_id`(对外 OpenAI 名称) + +## 6. 后台执行模型(Serving Reconciler) + +v3.8 参考任务 scheduler 的模式,引入一个轻量的 reconciler: + +- tick 周期(例如 5s) +- 每次 tick: + 1) 拉取 DB 中 `QUEUED/DEPLOYING/RUNNING/DELETING` 的模型 + 2) 调用 `serve.status()` 读取当前 app 及 deployments 状态 + 3) 若存在 `QUEUED` 或需要变更的模型:构建新的 multi-model app(包含全部 `RUNNING/DEPLOYING/QUEUED` 的模型配置)并 `serve.run(...)` + 4) 若存在 `DELETING`:从 app 配置中移除对应模型,并 `serve.run(...)` 应用变更 + 5) 更新每个模型的 state(依据 Serve status) + +重要行为说明(multi-model app 的代价): +- 每次“新增/删除/改 replicas”都会触发对同一个 app 的一次 `serve.run(...)` 更新; +- Ray Serve 会尽量做增量更新,但在某些版本/配置下可能导致 ingress/router 短暂重启; +- v3.8 先接受该代价(满足需求闭环优先);若后续需要“删除某模型不影响其它模型”,可演进为“每模型一个 app + 单独 route_prefix”的方案。 + +资源预检查: +- 在 apply 前使用 `ray.available_resources()` 做粗粒度 GPU 预检查: + - 需要 GPU 总量 = `sum(num_replicas * gpus_per_replica)`(仅对“新增/扩容的差量”更精确) +- 若不足: + - 模型保持 `QUEUED`,记录事件 `PENDING_RESOURCES` + - 用户 UI 显示“资源不足,等待释放” + +> v3.8 不引入更复杂的抢占/优先级。Serving 与 Training 会竞争 GPU;用户需要自行规划资源(或后续版本引入统一调度)。 + +## 7. WebUI 设计(新增 Serving 页面) + +新增侧边栏入口:**Serving** + +### 7.1 Serving 列表页 + +- 展示字段: + - model_id + - user_id(仅 admin 可见) + - replicas / gpus_per_replica / total_gpus + - state(RUNNING/DEPLOYING/QUEUED/FAILED) + - 操作:Scale(修改 replicas)、Delete + +### 7.2 Serving 创建/编辑页 + +两种模式(与 New Task 类似,先做 YAML 模式即可): + +示例 YAML(v3.8): + +```yaml +model_id: qwen-0.5b +model_source: $HOME/common/hf/hub/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots/ +num_replicas: 1 +gpus_per_replica: 1 +# engine_kwargs: +# max_model_len: 8192 +# gpu_memory_utilization: 0.9 +``` + +### 7.3 Serving 详情页 + +- 完整配置(resolved spec) +- Serve status 摘要(deployments 状态、replica 健康) +- OpenAI 调用示例(python openai client) + +## 8. 验收标准(v3.8) + +1) 部署: +- 一键部署一个模型(1 replica、1 GPU)成功,状态变为 RUNNING +- `/v1/models` 可列出该模型 + +2) 扩缩容: +- 修改 `num_replicas` 生效(Serve status 看到副本数变化) + +3) 多模型: +- 同一个 app 内能同时部署 2 个模型(不同 model_id) +- 通过 OpenAI 接口用不同 `model=` 请求可得到响应 + +4) 下线: +- 删除某模型后 `/v1/models` 不再出现 + +5) 模型路径: +- 支持 `/private/hf/...`(common)与 `/private/users//...`(user)两类本地路径 + +6) 资源不足可解释: +- 当 GPU 不足时,模型进入 `QUEUED` 并在 UI/详情中提示“资源不足” + +## 9. 待确认点(请你评审时确认) + +已确认(来自评审): + +1) 推理端口固定使用 `8000`(Ray Serve 默认端口)。 +2) 对外暴露的 OpenAI 接口 **不与现有 token 体系绑定**(v3.8 不做推理侧鉴权)。 +3) `model_id` 命名规则:平台统一加 `user_id + 日期小时分钟` 前缀,用户在 UI 里只填写后缀部分。 + +> 说明:这样可以避免跨用户 model_id 冲突,同时在 OpenAI API 的 `model=` 字段上自然可读。 diff --git a/specs/mvp/v3.8/v3.8_dev_plan.md b/specs/mvp/v3.8/v3.8_dev_plan.md new file mode 100644 index 0000000..49950f9 --- /dev/null +++ b/specs/mvp/v3.8/v3.8_dev_plan.md @@ -0,0 +1,266 @@ +# MVP v3.8 开发计划(TDD,细化版) + +> 目标:在 v3.7 基础上引入 Ray Serve(vLLM)模型动态部署与管理(多模型单 app),并提供 WebUI + API 管理闭环。 +> 约束(已确认): +> - 推理端口固定 `8000`(Serve HTTP)。 +> - 推理侧不接入现有 token 鉴权(对外 OpenAI endpoint 无鉴权)。 +> - 对外 `model_id` 统一加前缀:`--`(用户只填 suffix)。 +> - `LLMConfig.accelerator_type` 从 `dev.yaml` 读取(dev/h1: `H20`)。 + +本计划按“测试先行 → 实现 → 回归”的节奏拆分到可验证粒度;每个 milestone 都能单独验收。 + +--- + +## M0 - 基线与依赖探测(不改行为) + +**目的**:确认 v3.7 baseline 稳定,并明确 Ray Serve LLM 依赖是否已具备(否则后续会卡在镜像/依赖)。 + +### M0.1 本地回归 +- [ ] `.venv/bin/python -m pytest` 通过(coverage ≥ 90%) + +### M0.2 远端回归(h1) +- [ ] `src/mvp/scripts/run_all_v30_api.sh` 可跑通(确认训练闭环未回退) + +### M0.3 head 容器内依赖探测(记录结论) +- [ ] `python3 -c "import ray; import ray.serve; print(ray.__version__)"` +- [ ] `python3 -c "from ray.serve.llm import LLMConfig, build_openai_app; print('serve_llm_ok')"` +- [ ] 若失败(例如缺 `gymnasium`):记录缺失项,并在 M6 通过补齐 `ray[llm]` 解决 + +### M0.4 配置探测 +- [ ] `configs/dev.yaml` 中存在: + - `serving.llm.accelerator_type: H20` + - `serving.serve.http_port: 8000` + - `serving.serve.proxy_location: HeadOnly` + +**验收**: +- baseline 无回退;依赖探测结论明确(可用/不可用) + +--- + +## M1 - ServingSpec(解析/校验/宏替换/路径校验)(单测驱动) + +**目的**:先把“输入”这层彻底固化(API/UI 复用),避免后期反复改 schema。 + +### M1.1 新增/扩展数据模型 +- [ ] `ServingSpec`(输入) + - `model_id`(suffix) + - `model_source`(支持 `$HOME` 宏) + - `num_replicas`(default=1) + - `gpus_per_replica`(default=1) + - `engine_kwargs`(可选 dict,先原样存 DB;实现阶段再做白名单/黑名单) +- [ ] `ResolvedServingSpec`(内部) + - `model_id_suffix` + - `model_id_prefix`(由平台生成:`user_id-YYYYMMDDHHMM`) + - `model_id`(对外:`-`) + - `model_source`(resolved path) + +### M1.2 规则(写成纯函数,便于测) +- [ ] `validate_model_id_suffix(suffix)`:长度/字符集限制(建议:`[a-zA-Z0-9][a-zA-Z0-9._-]{0,63}`) +- [ ] `$HOME` 宏替换:`$HOME`、`$HOME/common/hf`、`$HOME/common/datasets` +- [ ] 路径校验(强制本地路径): + - 允许:`/private/hf/...`、`/private/users//...` + - 拒绝:`..`、空、其它用户路径、非 `/private` 路径 +- [ ] `make_model_id_prefix(user_id, now_utc)`:`YYYYMMDDHHMM`(UTC)+ user_id + +### M1.3 单测(先写失败用例,再补实现) +- [ ] `test_serving_spec_validation.py` + - suffix 合法/非法 + - replicas/gpus 边界:0、负数、小数、超大值(按实现决定是否限制上限) +- [ ] `test_serving_spec_paths.py` + - `$HOME` 替换正确 + - 越权路径返回 403/ValueError(按接口层映射) + - `/private/hf` 与 `/private/users/` 均可 +- [ ] `test_serving_model_id_prefix.py` + - 固定时间输入 → prefix 输出一致(避免时区/格式问题) + +**验收**: +- 输入 spec 规则稳定;核心校验/替换均有单测覆盖 + +--- + +## M2 - SQLite 表结构与 Db 接口(单测驱动) + +**目的**:Serving 的声明式状态必须持久化,可审计、可恢复。 + +### M2.1 DB schema +- [ ] `serve_models` + - 主键:`model_key`(平台生成) + - unique:`(user_id, model_id_suffix)`(实现 upsert) + - 存储:resolved spec(包含 prefix/full model_id、resolved model_source) + - 状态:`QUEUED/DEPLOYING/RUNNING/FAILED/DELETING/DELETED` + - `error_summary` +- [ ] `serve_events`(append-only) + +### M2.2 Db 方法 +- [ ] `upsert_serve_model(user_id, spec_yaml, now)` → (model_key, state) +- [ ] `list_serve_models(user_id, include_deleted=False, limit/offset?)` +- [ ] `get_serve_model(model_key)` +- [ ] `set_serve_model_state(model_key, state, error_summary=None)` +- [ ] `append_serve_event(model_key, event_type, payload_json=None)` +- [ ] `pick_next_runnable_serve_change()`(给 reconciler 用) + +### M2.3 单测 +- [ ] `test_db_serving.py` + - upsert 行为(同 suffix 更新不产生新 model_key 或产生新版本——此处需在实现前明确策略) + - state 流转 + 事件记录 + - list 的过滤与排序(按 updated_at) + +**验收**: +- DB 行为可预测;upsert/unique 语义确定并测试覆盖 + +--- + +## M3 - Serving 管理 API(FastAPI)(单测驱动) + +**目的**:先把管理 API 跑通,Ray Serve 先不接真实(reconciler 之后再接)。 + +### M3.1 API 路由(用户) +- [ ] `POST /api/v2/serve/models`(Content-Type: application/yaml) + - 入参:ServingSpec YAML + - 出参:`{model_key,state}`(202) +- [ ] `GET /api/v2/serve/models` + - 返回 items + `openai_base_url=http://:8000/v1` +- [ ] `GET /api/v2/serve/models/{model_key}` + - 返回 model + resolved_spec_yaml + events(分页可后置)+ serve_status(先空/占位) +- [ ] `PATCH /api/v2/serve/models/{model_key}`(JSON) + - 支持 `num_replicas`(最小闭环) +- [ ] `DELETE /api/v2/serve/models/{model_key}` + +### M3.2 API 路由(admin,可选) +- [ ] `GET /api/v2/serve/status`(仅 admin token) + +### M3.3 错误映射(必须测试) +- [ ] YAML 解析失败:400 +- [ ] spec 校验失败:422 +- [ ] 越权路径:403 +- [ ] 不存在 model_key:404 + +### M3.4 单测 +- [ ] `test_app_serving_api.py` + - happy path:create → list → get → patch → delete + - 多用户隔离:用户只能看到自己的 model + - 错误码覆盖:400/403/404/422 + +**验收**: +- API reference (`v3.8_api.md`) 中所有管理接口可返回预期结构(Serve 未接入也能工作) + +--- + +## M4 - ServeClient 抽象 + LLMConfig builder(单测驱动) + +**目的**:将“如何从 ResolvedServingSpec 构造 LLMConfig”固化,并把 Ray Serve 的依赖隔离到 client 里,便于 mock。 + +### M4.1 `ServeClient` 接口(可 mock) +- [ ] `ensure_started(http_port=8000, proxy_location="HeadOnly")` +- [ ] `apply_app(app_name, llm_configs)`(multi-model) +- [ ] `get_status()`(serve.status 摘要) + +### M4.2 `build_llm_config(resolved_spec, accelerator_type, runtime_env_defaults)` 纯函数 +- [ ] 写入 `LLMConfig.accelerator_type`(来自 dev.yaml:H20) +- [ ] `deployment_config.num_replicas` +- [ ] `engine_kwargs.tensor_parallel_size = gpus_per_replica` +- [ ] `placement_group_config` bundles 按 GPU 张数生成 +- [ ] `runtime_env.env_vars` 注入(至少包含 HF cache + `HF_HUB_OFFLINE=1`) + +### M4.3 单测 +- [ ] `test_llm_config_builder.py` + - gpus_per_replica=1/2/4 → tensor_parallel_size 与 bundles 数量正确 + - accelerator_type 注入正确 + - runtime_env 含 HF_HUB_OFFLINE 等关键 env + +**验收**: +- 从平台 spec 到 Ray Serve LLMConfig 的映射规则稳定,有单测锁定 + +--- + +## M5 - Serving Reconciler(状态机 + 资源预检查)(单测驱动) + +**目的**:实现声明式对齐:DB → Serve;同时提供可解释的 QUEUED/FAILED 状态。 + +### M5.1 状态机(最小闭环) +- [ ] `QUEUED`:等待 apply +- [ ] `DEPLOYING`:已触发 apply,等待 Serve running/healthy +- [ ] `RUNNING`:Serve status running +- [ ] `FAILED`:apply 或 status 失败(写 error_summary + event) +- [ ] `DELETING`:等待从 app 中移除 +- [ ] `DELETED`:完成删除(可选保留记录) + +### M5.2 资源预检查 +- [ ] `needed_total_gpus = sum(num_replicas*gpus_per_replica)`(最小可用预检查) +- [ ] `ray.available_resources()["GPU"]`(或更稳健的 per-node 统计)不足时: + - 保持 `QUEUED` + - 记录 `PENDING_RESOURCES` event + +### M5.3 reconcile 策略(multi-model app) +- [ ] tick 读取 active models,构建全量 `llm_configs` +- [ ] 处理 deleting:从 configs 中移除对应 model,再 apply + +### M5.4 单测(mock ServeClient + mock ray resources) +- [ ] `test_serving_reconciler.py` + - 新增模型:apply_app 被调用;state 进入 DEPLOYING + - 删除模型:apply_app configs 不包含该模型 + - GPU 不足:不 apply;state 仍 QUEUED;event 写入 + - apply 抛异常:state FAILED;error_summary 写入 + +**验收**: +- reconciler 行为在纯单测环境可验证;失败可解释 + +--- + +## M6 - 真实集成(h1):Ray Serve 启动 + 推理闭环(E2E) + +**目的**:在 dev/h1 环境真正跑通:部署模型 → `/v1/models` 可见 → `chat/completions` 成功 → 删除后消失。 + +### M6.1 compose/端口 +- [ ] `src/mvp/docker-compose.yaml`:`ray_head` 增加 `8000:8000` + +### M6.2 镜像依赖(若 M0 发现缺失) +- [ ] 在 `argus-ray-node` 镜像中补齐 `ray[serve,llm]`(版本与现有 Ray 对齐,避免升级 Ray 导致不兼容) + - 推荐优先补齐 `ray[llm]`(包含 `ray.serve.llm` 依赖闭包,如 `gymnasium`),再按需补 `ray[serve]` + - 验证点:`python3 -c "from ray.serve.llm import LLMConfig, build_openai_app; print('serve_llm_ok')"` + +### M6.3 E2E 脚本(幂等) +- [ ] 新增 `scripts/run_all_v38_serving.sh`: + - 起 compose(确保 Serve 端口可用) + - 起 API + - 创建 user + token + - `POST /api/v2/serve/models` 创建 1GPU 模型 + - 轮询模型 state 到 RUNNING + - `curl http://127.0.0.1:8000/v1/models` 验证包含 `-` + - `curl http://127.0.0.1:8000/v1/chat/completions` 进行最小推理 + - `DELETE /api/v2/serve/models/{model_key}` 下线 + - 再轮询 `/v1/models` 不包含 + +**验收**: +- E2E 可重复跑通(至少两次连续跑不需要人工清理) + +--- + +## M7 - WebUI(Serving 页面)(单测驱动) + +**目的**:给用户可视化的模型管理页面(最小必要功能)。 + +### M7.1 页面 +- [ ] Sidebar 增加 Serving +- [ ] `/ui/serving`:列表 + 状态 + 操作(delete/scale) +- [ ] `/ui/serving/new`:YAML 输入 + submit +- [ ] `/ui/serving/{model_key}`:详情(resolved spec、events、OpenAI 调用示例) + +### M7.2 单测 +- [ ] `test_ui_serving.py`:路由 200、关键链接存在、包含 openai_base_url=8000 + +**验收**: +- WebUI 覆盖 create/list/detail/scale/delete 的主链路 + +--- + +## M8 - 文档与验收用例(交付) + +**目的**:给用户/运维一套可复用的运行方式与排障路径。 + +- [ ] 更新 `specs/mvp/v3.8/v3.8_progress.md`(按 milestone 记录) +- [ ] 补充 README(可选):端口说明、推理 API 无鉴权警示、模型路径约定 +- [ ] 验收清单(checklist): + - 单测通过 + - h1 E2E 通过 + - UI 主链路可操作 diff --git a/specs/mvp/v3.8/v3.8_progress.md b/specs/mvp/v3.8/v3.8_progress.md new file mode 100644 index 0000000..83eae88 --- /dev/null +++ b/specs/mvp/v3.8/v3.8_progress.md @@ -0,0 +1,48 @@ +# MVP v3.8 进展记录 + +## 2026-01-06 + +- 完成 v3.8 设计文档:`specs/mvp/v3.8/v3.8_design.md` +- 完成 v3.8 Serving API reference:`specs/mvp/v3.8/v3.8_api.md` +- 完成 v3.8 TDD 开发计划:`specs/mvp/v3.8/v3.8_dev_plan.md` +- 完成 M0:`configs/dev.yaml` 增加 `serving` 配置(http_port=8000, proxy_location=HeadOnly, accelerator_type=H20) +- 完成 M1:ServingSpec 解析/宏替换/路径校验 + 单测(`src/mvp/py/argus/service/serving_spec.py`) +- 完成 M2:SQLite 新增 `serve_models`/`serve_events` + Db API + 单测(`src/mvp/py/argus/service/db.py`) +- 完成 M3:FastAPI Serving 管理 API + 单测(`src/mvp/py/argus/service/app.py`) +- 完成 M4:ServeClient 抽象 + LLMConfig builder(dict 形态)+ 单测(`src/mvp/py/argus/service/serve_client.py`、`src/mvp/py/argus/service/serve_llm_config.py`) +- 完成 M5:Serving reconciler(状态机 + 资源预检查 + mock 单测)(`src/mvp/py/argus/service/serving_reconciler.py`) + +### M6(h1 真实集成) + +- `argus-ray-node` 镜像补齐依赖:`ray[serve,llm]` + `gymnasium` + `dm-tree`(避免 `ray.serve.llm` 导入失败) +- 修复 Ray 2.49.2 兼容性问题: + - `LLMConfig` 不支持 `placement_group_config`,改为使用 `resources_per_bundle`(`src/mvp/py/argus/service/serve_llm_config.py`) +- 远端 E2E: + - `scripts/run_all_v38_serving.sh` 可跑通:create → RUNNING → `/v1/models` → `chat/completions` → delete → DELETED + - 修复脚本中 `/v1/models` 解析的 bash heredoc 引号错误(`src/mvp/scripts/run_all_v38_serving.sh`) + +### M7(WebUI - Serving) + +- WebUI 增加 Serving 页面: + - 列表:`/ui/serving` + - 创建:`/ui/serving/new` + - 详情/事件/缩放/删除:`/ui/serving/{model_key}` +- 单测覆盖: + - `src/mvp/py/tests/test_ui_serving.py` + +### M8(文档/验收) + +- `src/mvp/README.md` 补充 v3.8 serving 端口与 E2E 脚本说明 + +### 环境探测(h1 / head 容器) + +> 目的:确认 Ray Serve LLM 依赖是否开箱即用,避免后续集成阶段才暴雷。 + +- `ray`:可用,版本 `2.49.2` +- `ray.serve`:可 import(Serve 基础可用) +- `ray.serve.llm`:当前不可 import + - 报错:`ModuleNotFoundError: No module named 'gymnasium'` + - 原因:`ray.serve.llm` 的导入链路会触发 `ray.rllib`,而 rllib 依赖 `gymnasium` + +结论: +- v3.8 在实现阶段需要在 `argus-ray-node` 镜像中补齐 `ray[llm]`(推荐)或至少补齐 `gymnasium` 等必要依赖,确保 `from ray.serve.llm import ...` 可用。 diff --git a/src/mvp/README.md b/src/mvp/README.md index 4fd720e..26cd18d 100644 --- a/src/mvp/README.md +++ b/src/mvp/README.md @@ -24,3 +24,9 @@ v3.0 访问入口(dev/h1): - SFTPGo: - SFTP:`127.0.0.1:2022` - Admin API/UI:`http://127.0.0.1:8081`(容器内 8080,host 映射到 8081 避免与 API server 冲突) + +v3.8(Ray Serve LLM / vLLM 模型服务): +- 推理端口:`8000`(Ray Serve HTTP) +- OpenAI-compatible endpoint:`http://127.0.0.1:8000/v1` + - 注意:v3.8 推理接口**不做鉴权** +- E2E 脚本:`scripts/run_all_v38_serving.sh` diff --git a/src/mvp/configs/dev.yaml b/src/mvp/configs/dev.yaml index 41e5475..4c41416 100644 --- a/src/mvp/configs/dev.yaml +++ b/src/mvp/configs/dev.yaml @@ -69,3 +69,11 @@ data: jobs_trash_after_days: 3 jobs_purge_after_days: 7 janitor_interval_s: 3600 + +# v3.8: model serving via Ray Serve LLM (vLLM backend) +serving: + serve: + http_port: 8000 + proxy_location: HeadOnly + llm: + accelerator_type: H20 diff --git a/src/mvp/docker-compose.yaml b/src/mvp/docker-compose.yaml index 16e73d8..0bb57c7 100644 --- a/src/mvp/docker-compose.yaml +++ b/src/mvp/docker-compose.yaml @@ -1,10 +1,16 @@ services: ray_head: image: argus/argus-ray-node:vllm011.latest + build: + context: . + dockerfile: images/argus-ray-node/Dockerfile + args: + BASE_IMAGE: verlai/verl:vllm011.latest container_name: argus-ray-head ports: - "8265:8265" - "8080:8080" + - "8000:8000" volumes: # NOTE: this compose file is intended for the dev env layout like: # /home2/argus/infra/mvp/{shared,verl,src/mvp} @@ -92,6 +98,11 @@ services: ray_worker_0: image: argus/argus-ray-node:vllm011.latest + build: + context: . + dockerfile: images/argus-ray-node/Dockerfile + args: + BASE_IMAGE: verlai/verl:vllm011.latest container_name: argus-ray-worker-0 volumes: - ../../verl:/workspace/verl @@ -124,6 +135,11 @@ services: ray_worker_1: image: argus/argus-ray-node:vllm011.latest + build: + context: . + dockerfile: images/argus-ray-node/Dockerfile + args: + BASE_IMAGE: verlai/verl:vllm011.latest container_name: argus-ray-worker-1 volumes: - ../../verl:/workspace/verl diff --git a/src/mvp/images/argus-ray-node/Dockerfile b/src/mvp/images/argus-ray-node/Dockerfile index 06423f1..54d1be2 100644 --- a/src/mvp/images/argus-ray-node/Dockerfile +++ b/src/mvp/images/argus-ray-node/Dockerfile @@ -6,6 +6,15 @@ SHELL ["/bin/bash", "-lc"] # Install supervisord (prefer pip to avoid relying on distro package manager). RUN python3 -m pip install --no-cache-dir supervisor +# v3.8: Ray Serve LLM deps (keep Ray version pinned to what's already in the base image). +# NOTE: base image already includes Ray; we only add extras. +RUN RAY_VER="$(python3 -c 'import ray; print(ray.__version__)')" && \ + python3 -m pip install --no-cache-dir "ray[serve,llm]==${RAY_VER}" +# Ray Serve LLM's import chain currently pulls in ray.rllib which requires extra deps. +# Install them explicitly to make `from ray.serve.llm import ...` work reliably. +RUN python3 -m pip install --no-cache-dir gymnasium dm-tree && \ + python3 -c "from ray.serve.llm import LLMConfig, build_openai_app; print('ray_serve_llm_ok')" + RUN mkdir -p /opt/argus/py/argus/ray # Minimal embedded code for stateless pool (API code is intentionally excluded). diff --git a/src/mvp/images/argus-ray-node/argus-head-ray.sh b/src/mvp/images/argus-ray-node/argus-head-ray.sh index 1e873dc..31c5a54 100644 --- a/src/mvp/images/argus-ray-node/argus-head-ray.sh +++ b/src/mvp/images/argus-ray-node/argus-head-ray.sh @@ -16,9 +16,8 @@ exec ray start \ --port="${ray_port}" \ --dashboard-host=0.0.0.0 \ --dashboard-port="${dashboard_port}" \ - --num-cpus=0 \ + --num-cpus="${ARGUS_HEAD_NUM_CPUS:-1}" \ --num-gpus=0 \ --disable-usage-stats \ --block \ ${ARGUS_RAY_EXTRA_ARGS:-} - diff --git a/src/mvp/py/argus/core/ids.py b/src/mvp/py/argus/core/ids.py index bd46978..abc7ed8 100644 --- a/src/mvp/py/argus/core/ids.py +++ b/src/mvp/py/argus/core/ids.py @@ -26,3 +26,19 @@ def new_task_id(workload: str, *, user_id: str | None = None) -> str: def attempt_submission_id(task_id: str, attempt_no: int) -> str: return f"{task_id}--a{attempt_no:02d}" + + +def new_model_key(*, user_id: str) -> str: + """ + Internal identifier for a serving model record. + + Note: + - model_id is the OpenAI-facing name (user_id + timestamp prefix + suffix). + - model_key is used for stable DB identity and API resource path. + """ + ts = datetime.now().strftime("%Y%m%d-%H%M%S") + suffix = secrets.token_hex(2) + u = _normalize_user_id(user_id) + if not u: + raise ValueError("user_id is required") + return f"mvp2-{u}-serve-{ts}-{suffix}" diff --git a/src/mvp/py/argus/service/app.py b/src/mvp/py/argus/service/app.py index faa7942..f6ab121 100644 --- a/src/mvp/py/argus/service/app.py +++ b/src/mvp/py/argus/service/app.py @@ -4,11 +4,13 @@ import os import secrets import threading from typing import Any +import json +from dataclasses import asdict import yaml from fastapi import FastAPI, HTTPException, Request, Response -from argus.core.ids import new_task_id +from argus.core.ids import new_model_key, new_task_id from argus.ray.models import AdvancedTaskSpec, JobSpec, RayConfig, parse_taskspec from .advanced_command import expand_advanced_command, validate_advanced_command @@ -16,6 +18,7 @@ from .config import V2Config from .db import Db from .janitor import JobsJanitor from .scheduler import Scheduler +from .serving_spec import ServingSpec, parse_serving_spec, resolve_serving_spec from .sftpgo import SFTPGoAdminClient, SFTPGoError from .ui import register_ui_routes @@ -85,6 +88,61 @@ def create_app(config_path: str) -> FastAPI: common_root=f"{shared_root}/common", ) + def _serving_enabled() -> bool: + return bool(v2_cfg.serving.enabled) + + def _openai_base_url(req: Request) -> str: + # Prefer forwarded headers if present; otherwise fall back to Host. + host = req.headers.get("x-forwarded-host") or req.headers.get("host") or req.url.hostname or "127.0.0.1" + # Strip port if present (common for Host header). + hostname = host + if hostname.startswith("[") and "]" in hostname: + # IPv6 like: [::1]:8080 + hostname = hostname.split("]")[0] + "]" + else: + hostname = hostname.split(":")[0] + scheme = req.headers.get("x-forwarded-proto") or req.url.scheme or "http" + port = int(v2_cfg.serving.serve.http_port) + return f"{scheme}://{hostname}:{port}/v1" + + def _dump_yaml(obj: Any) -> str: + return yaml.safe_dump(obj, sort_keys=False) + + def _serving_spec_to_dict(spec: ServingSpec) -> dict[str, Any]: + return { + "model_id": spec.model_id, + "model_source": spec.model_source, + "num_replicas": int(spec.num_replicas), + "gpus_per_replica": int(spec.gpus_per_replica), + "engine_kwargs": spec.engine_kwargs, + } + + def _serve_model_public(row: dict[str, Any], *, req: Request) -> dict[str, Any]: + num_replicas = int(row.get("num_replicas") or 0) + gpus_per_replica = int(row.get("gpus_per_replica") or 0) + total_gpus = num_replicas * gpus_per_replica + model_id = str(row.get("model_id") or "") + return { + "model_key": str(row.get("model_key") or ""), + "user_id": str(row.get("user_id") or ""), + "model_id": model_id, + "model_id_suffix": str(row.get("model_id_suffix") or ""), + "model_id_prefix": str(row.get("model_id_prefix") or ""), + "model_source": str(row.get("model_source") or ""), + "num_replicas": num_replicas, + "gpus_per_replica": gpus_per_replica, + "total_gpus": total_gpus, + "state": str(row.get("state") or ""), + "error_summary": row.get("error_summary"), + "created_at": str(row.get("created_at") or ""), + "updated_at": str(row.get("updated_at") or ""), + "deleted_at": row.get("deleted_at"), + "endpoint": { + "openai_base_url": _openai_base_url(req), + "model": model_id, + }, + } + def _auth(req: Request) -> dict[str, Any]: token_env = v2_cfg.auth.token_env admin_token = os.environ.get(token_env, "") @@ -565,6 +623,162 @@ def create_app(config_path: str) -> FastAPI: return db.list_queue() return db.list_queue(user_id=str(subject["user_id"])) + # v3.8: Model serving (Ray Serve LLM) management APIs. + @app.post("/api/v2/serve/models") + async def create_serve_model(req: Request) -> dict[str, Any]: + subject = _auth(req) + if not _serving_enabled(): + raise HTTPException(status_code=400, detail="serving is not enabled") + + body = (await req.body()).decode("utf-8") + try: + obj = yaml.safe_load(body) or {} + except Exception as e: + raise HTTPException(status_code=400, detail=f"invalid YAML: {e!r}") + if not isinstance(obj, dict): + raise HTTPException(status_code=400, detail="serving spec must be a YAML mapping") + + user_id = str(subject["user_id"]).strip() + try: + spec = parse_serving_spec(obj) + resolved = resolve_serving_spec(spec=spec, user_id=user_id) + except PermissionError as e: + raise HTTPException(status_code=403, detail=str(e)) + except ValueError as e: + msg = str(e) + code = 422 if ("num_replicas" in msg or "gpus_per_replica" in msg) else 400 + raise HTTPException(status_code=code, detail=f"invalid serving spec: {e!r}") + + model_key = new_model_key(user_id=user_id) + try: + engine_kwargs_json = json.dumps(resolved.engine_kwargs, sort_keys=True) if resolved.engine_kwargs is not None else None + except TypeError as e: + raise HTTPException(status_code=400, detail=f"engine_kwargs must be JSON-serializable: {e!r}") + + spec_yaml = _dump_yaml(_serving_spec_to_dict(spec)) + resolved_spec_yaml = _dump_yaml(asdict(resolved)) + + db.create_serve_model( + model_key=model_key, + user_id=user_id, + model_id_suffix=resolved.model_id_suffix, + model_id_prefix=resolved.model_id_prefix, + model_id=resolved.model_id, + model_source=resolved.model_source, + num_replicas=resolved.num_replicas, + gpus_per_replica=resolved.gpus_per_replica, + engine_kwargs_json=engine_kwargs_json, + spec_yaml=spec_yaml, + resolved_spec_yaml=resolved_spec_yaml, + ) + return {"model_key": model_key, "state": "QUEUED"} + + @app.get("/api/v2/serve/models") + async def list_serve_models(req: Request, limit: int = 200, offset: int = 0, include_deleted: int = 0) -> dict[str, Any]: + subject = _auth(req) + if not _serving_enabled(): + raise HTTPException(status_code=400, detail="serving is not enabled") + + lim = max(1, min(int(limit), 1000)) + off = max(0, int(offset)) + inc = bool(int(include_deleted)) + user_id = str(subject["user_id"]) + + items = db.list_serve_models(user_id=user_id, include_deleted=inc, limit=lim, offset=off) + out = [_serve_model_public(i, req=req) for i in items] + return { + "items": out, + "openai_base_url": _openai_base_url(req), + "limit": lim, + "offset": off, + "has_more": bool(len(items) == lim), + } + + @app.get("/api/v2/serve/models/{model_key}") + async def get_serve_model(model_key: str, req: Request) -> dict[str, Any]: + subject = _auth(req) + if not _serving_enabled(): + raise HTTPException(status_code=400, detail="serving is not enabled") + + row = db.get_serve_model(model_key) + if not row: + raise HTTPException(status_code=404, detail="model not found") + if not subject.get("is_admin"): + if str(row.get("user_id") or "") != str(subject["user_id"]): + raise HTTPException(status_code=404, detail="model not found") + + events = db.list_serve_events(model_key, limit=200, offset=0) + ev_out = [ + { + "id": int(e.get("id") or 0), + "model_key": str(e.get("model_key") or ""), + "created_at": str(e.get("ts") or ""), + "event_type": str(e.get("event_type") or ""), + "payload_json": e.get("payload_json"), + } + for e in events + ] + return { + "model": _serve_model_public(row, req=req), + "resolved_spec_yaml": str(row.get("resolved_spec_yaml") or ""), + "events": ev_out, + "serve_status": None, + } + + @app.patch("/api/v2/serve/models/{model_key}") + async def patch_serve_model(model_key: str, req: Request) -> dict[str, Any]: + subject = _auth(req) + if not _serving_enabled(): + raise HTTPException(status_code=400, detail="serving is not enabled") + + row = db.get_serve_model(model_key) + if not row: + raise HTTPException(status_code=404, detail="model not found") + if not subject.get("is_admin"): + if str(row.get("user_id") or "") != str(subject["user_id"]): + raise HTTPException(status_code=404, detail="model not found") + + obj = await req.json() + if not isinstance(obj, dict): + raise HTTPException(status_code=400, detail="body must be a JSON object") + if "num_replicas" not in obj: + raise HTTPException(status_code=400, detail="missing num_replicas") + num_replicas = obj.get("num_replicas") + if not isinstance(num_replicas, int) or int(num_replicas) < 1: + raise HTTPException(status_code=422, detail="num_replicas must be an integer >= 1") + + db.update_serve_model_num_replicas(model_key=model_key, num_replicas=int(num_replicas)) + return {"model_key": model_key, "state": "QUEUED"} + + @app.delete("/api/v2/serve/models/{model_key}") + async def delete_serve_model(model_key: str, req: Request) -> dict[str, Any]: + subject = _auth(req) + if not _serving_enabled(): + raise HTTPException(status_code=400, detail="serving is not enabled") + + row = db.get_serve_model(model_key) + if not row: + raise HTTPException(status_code=404, detail="model not found") + if not subject.get("is_admin"): + if str(row.get("user_id") or "") != str(subject["user_id"]): + raise HTTPException(status_code=404, detail="model not found") + + db.set_serve_model_state(model_key=model_key, state="DELETING", event_type="SERVE_DELETE_REQUESTED") + return {"model_key": model_key, "state": "DELETING"} + + @app.get("/api/v2/serve/status") + async def serve_status(req: Request) -> dict[str, Any]: + _require_admin(req) + if not _serving_enabled(): + raise HTTPException(status_code=400, detail="serving is not enabled") + return { + "enabled": True, + "openai_base_url": _openai_base_url(req), + "http_port": int(v2_cfg.serving.serve.http_port), + "proxy_location": str(v2_cfg.serving.serve.proxy_location), + "accelerator_type": str(v2_cfg.serving.llm.accelerator_type), + } + # v3.0: minimal WebUI (no server-side session; token stored in browser localStorage). register_ui_routes(app) diff --git a/src/mvp/py/argus/service/config.py b/src/mvp/py/argus/service/config.py index 193aef2..babee4f 100644 --- a/src/mvp/py/argus/service/config.py +++ b/src/mvp/py/argus/service/config.py @@ -57,6 +57,24 @@ class V2SFTPGoConfig: admin_password_env: str = "SFTPGO_ADMIN_PASSWORD" +@dataclass(frozen=True) +class V2ServingServeConfig: + http_port: int = 8000 + proxy_location: str = "HeadOnly" + + +@dataclass(frozen=True) +class V2ServingLLMConfig: + accelerator_type: str = "" + + +@dataclass(frozen=True) +class V2ServingConfig: + enabled: bool = False + serve: V2ServingServeConfig = V2ServingServeConfig() + llm: V2ServingLLMConfig = V2ServingLLMConfig() + + @dataclass(frozen=True) class V2DataConfig: user_root: str @@ -72,6 +90,7 @@ class V2Config: scheduler: V2SchedulerConfig tracking: V2TrackingConfig data: V2DataConfig + serving: V2ServingConfig @staticmethod def from_root_dict(root: dict[str, Any]) -> "V2Config": @@ -112,6 +131,15 @@ class V2Config: if not isinstance(sftpgo, dict) or not isinstance(retention, dict): raise ValueError("config.data.{sftpgo,retention} must be mappings") + serving = root.get("serving") or {} + if not isinstance(serving, dict): + raise ValueError("config.serving must be a mapping") + serving_enabled = bool(serving.get("enabled")) if "enabled" in serving else bool(serving) + serving_serve = serving.get("serve") or {} + serving_llm = serving.get("llm") or {} + if not isinstance(serving_serve, dict) or not isinstance(serving_llm, dict): + raise ValueError("config.serving.{serve,llm} must be mappings") + default_db_path = f"{shared_root}/common/db/mvp.sqlite3" db_path = str(sqlite.get("db_path") or default_db_path) @@ -158,4 +186,14 @@ class V2Config: janitor_interval_s=int(retention.get("janitor_interval_s") or 3600), ), ), + serving=V2ServingConfig( + enabled=serving_enabled, + serve=V2ServingServeConfig( + http_port=int(serving_serve.get("http_port") or 8000), + proxy_location=str(serving_serve.get("proxy_location") or "HeadOnly"), + ), + llm=V2ServingLLMConfig( + accelerator_type=str(serving_llm.get("accelerator_type") or ""), + ), + ), ) diff --git a/src/mvp/py/argus/service/db.py b/src/mvp/py/argus/service/db.py index 8ca314b..6c18c56 100644 --- a/src/mvp/py/argus/service/db.py +++ b/src/mvp/py/argus/service/db.py @@ -117,6 +117,43 @@ class Db: ) """ ) + conn.execute( + """ + CREATE TABLE IF NOT EXISTS serve_models ( + model_key TEXT PRIMARY KEY, + user_id TEXT NOT NULL, + model_id_suffix TEXT NOT NULL, + model_id_prefix TEXT NOT NULL, + model_id TEXT NOT NULL, + model_source TEXT NOT NULL, + num_replicas INTEGER NOT NULL, + gpus_per_replica INTEGER NOT NULL, + engine_kwargs_json TEXT, + state TEXT NOT NULL, + spec_yaml TEXT NOT NULL, + resolved_spec_yaml TEXT NOT NULL, + error_summary TEXT, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + deleted_at TEXT + ) + """ + ) + conn.execute( + """ + CREATE TABLE IF NOT EXISTS serve_events ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + model_key TEXT NOT NULL, + ts TEXT NOT NULL, + event_type TEXT NOT NULL, + payload_json TEXT, + FOREIGN KEY (model_key) REFERENCES serve_models(model_key) ON DELETE CASCADE + ) + """ + ) + conn.execute("CREATE INDEX IF NOT EXISTS idx_serve_models_user ON serve_models(user_id)") + conn.execute("CREATE INDEX IF NOT EXISTS idx_serve_models_state ON serve_models(state)") + conn.execute("CREATE INDEX IF NOT EXISTS idx_serve_events_model ON serve_events(model_key)") @contextmanager def tx(self) -> Iterator[sqlite3.Connection]: @@ -493,3 +530,239 @@ class Db: (str(end_time_le), int(limit)), ).fetchall() return [dict(r) for r in rows] + + def create_serve_model( + self, + *, + model_key: str, + user_id: str, + model_id_suffix: str, + model_id_prefix: str, + model_id: str, + model_source: str, + num_replicas: int, + gpus_per_replica: int, + spec_yaml: str, + resolved_spec_yaml: str, + engine_kwargs_json: str | None = None, + ) -> dict[str, Any]: + now = _utc_now_iso() + with self.tx() as conn: + conn.execute( + """ + INSERT INTO serve_models ( + model_key, + user_id, + model_id_suffix, + model_id_prefix, + model_id, + model_source, + num_replicas, + gpus_per_replica, + engine_kwargs_json, + state, + spec_yaml, + resolved_spec_yaml, + created_at, + updated_at + ) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, 'QUEUED', ?, ?, ?, ?) + """, + ( + model_key, + user_id, + model_id_suffix, + model_id_prefix, + model_id, + model_source, + int(num_replicas), + int(gpus_per_replica), + engine_kwargs_json, + spec_yaml, + resolved_spec_yaml, + now, + now, + ), + ) + conn.execute( + "INSERT INTO serve_events (model_key, ts, event_type, payload_json) VALUES (?, ?, 'SERVE_MODEL_CREATED', ?)", + (model_key, now, None), + ) + row = conn.execute("SELECT * FROM serve_models WHERE model_key = ?", (model_key,)).fetchone() + return dict(row) if row else {} + + def list_serve_models( + self, + *, + user_id: str, + include_deleted: bool = False, + limit: int = 200, + offset: int = 0, + ) -> list[dict[str, Any]]: + with self._connect() as conn: + where_sql = "WHERE user_id = ?" + params: list[Any] = [user_id] + if not include_deleted: + where_sql += " AND deleted_at IS NULL" + params.append(int(limit)) + params.append(max(0, int(offset))) + rows = conn.execute( + f""" + SELECT + model_key, + user_id, + model_id_suffix, + model_id_prefix, + model_id, + model_source, + num_replicas, + gpus_per_replica, + engine_kwargs_json, + state, + error_summary, + created_at, + updated_at, + deleted_at + FROM serve_models + {where_sql} + ORDER BY created_at DESC, model_key DESC + LIMIT ? OFFSET ? + """, + tuple(params), + ).fetchall() + return [dict(r) for r in rows] + + def list_all_serve_models( + self, + *, + include_deleted: bool = False, + limit: int = 2000, + offset: int = 0, + ) -> list[dict[str, Any]]: + with self._connect() as conn: + where_sql = "" + if not include_deleted: + where_sql = "WHERE deleted_at IS NULL" + rows = conn.execute( + f""" + SELECT + model_key, + user_id, + model_id_suffix, + model_id_prefix, + model_id, + model_source, + num_replicas, + gpus_per_replica, + engine_kwargs_json, + state, + error_summary, + spec_yaml, + resolved_spec_yaml, + created_at, + updated_at, + deleted_at + FROM serve_models + {where_sql} + ORDER BY created_at ASC, model_key ASC + LIMIT ? OFFSET ? + """, + (int(limit), max(0, int(offset))), + ).fetchall() + return [dict(r) for r in rows] + + def get_serve_model(self, model_key: str) -> dict[str, Any] | None: + with self._connect() as conn: + row = conn.execute("SELECT * FROM serve_models WHERE model_key = ?", (model_key,)).fetchone() + return dict(row) if row else None + + def list_serve_events(self, model_key: str, limit: int = 200, offset: int = 0) -> list[dict[str, Any]]: + with self._connect() as conn: + rows = conn.execute( + """ + SELECT id, model_key, ts, event_type, payload_json + FROM serve_events + WHERE model_key = ? + ORDER BY id DESC + LIMIT ? OFFSET ? + """, + (model_key, int(limit), max(0, int(offset))), + ).fetchall() + return [dict(r) for r in rows] + + def append_serve_event(self, *, model_key: str, event_type: str, payload_json: str | None = None) -> None: + now = _utc_now_iso() + with self.tx() as conn: + conn.execute( + "INSERT INTO serve_events (model_key, ts, event_type, payload_json) VALUES (?, ?, ?, ?)", + (model_key, now, event_type, payload_json), + ) + + def set_serve_model_state( + self, + *, + model_key: str, + state: str, + error_summary: str | None = None, + event_type: str = "SERVE_STATE_UPDATE", + payload_json: str | None = None, + ) -> None: + now = _utc_now_iso() + with self.tx() as conn: + sets = ["state = ?", "updated_at = ?"] + params: list[Any] = [state, now] + if error_summary is not None: + sets.append("error_summary = ?") + params.append(error_summary) + if state == "DELETED": + sets.append("deleted_at = ?") + params.append(now) + params.append(model_key) + conn.execute(f"UPDATE serve_models SET {', '.join(sets)} WHERE model_key = ?", tuple(params)) + conn.execute( + "INSERT INTO serve_events (model_key, ts, event_type, payload_json) VALUES (?, ?, ?, ?)", + (model_key, now, event_type, payload_json), + ) + + def update_serve_model_num_replicas(self, *, model_key: str, num_replicas: int) -> None: + if not isinstance(num_replicas, int) or num_replicas < 1: + raise ValueError("num_replicas must be an integer >= 1") + now = _utc_now_iso() + with self.tx() as conn: + conn.execute( + """ + UPDATE serve_models + SET num_replicas = ?, state = 'QUEUED', error_summary = NULL, updated_at = ? + WHERE model_key = ? + """, + (int(num_replicas), now, model_key), + ) + conn.execute( + "INSERT INTO serve_events (model_key, ts, event_type, payload_json) VALUES (?, ?, 'SERVE_PATCH_NUM_REPLICAS', ?)", + (model_key, now, str(num_replicas)), + ) + + def pick_next_runnable_serve_change(self) -> dict[str, Any] | None: + """ + Returns the next serve model that needs reconciliation. + + Minimal state machine for now: + - QUEUED: needs (re)apply + - DELETING: needs removal + """ + with self._connect() as conn: + row = conn.execute( + """ + SELECT * + FROM serve_models + WHERE deleted_at IS NULL + AND state IN ('QUEUED','DELETING') + ORDER BY updated_at ASC + LIMIT 1 + """ + ).fetchone() + return dict(row) if row else None + + # Backward compatible naming (v3.8 docs originally used "upsert"). + def upsert_serve_model(self, **kwargs: Any) -> dict[str, Any]: + return self.create_serve_model(**kwargs) diff --git a/src/mvp/py/argus/service/scheduler.py b/src/mvp/py/argus/service/scheduler.py index 4fb3fc1..624aca9 100644 --- a/src/mvp/py/argus/service/scheduler.py +++ b/src/mvp/py/argus/service/scheduler.py @@ -16,6 +16,8 @@ from argus.ray.ray_job_tool import RayJobTool from .config import V2Config from .db import Db from .ray_resources import ensure_ray_connected, get_cluster_available +from .serve_client import RayServeClient +from .serving_reconciler import ServingReconciler _INSUFFICIENT_RE = re.compile(r"Total available GPUs\\s+\\d+\\s+is less than total desired GPUs\\s+\\d+") @@ -37,6 +39,18 @@ class Scheduler: def __post_init__(self) -> None: self.tool = RayJobTool(self.ray_cfg) + self._serving: ServingReconciler | None = None + if bool(self.v2_cfg.serving.enabled): + self._serving = ServingReconciler( + db=self.db, + v2_cfg=self.v2_cfg, + ray_runtime_env_env_vars=self.ray_cfg.runtime_env_env_vars, + serve_client=RayServeClient( + http_port=int(self.v2_cfg.serving.serve.http_port), + proxy_location=str(self.v2_cfg.serving.serve.proxy_location), + ray_init_address="auto", + ), + ) def _job_dir_for_task(self, *, user_id: str | None, ray_submission_id: str) -> str: root = self.ray_cfg.shared_root.rstrip("/") @@ -251,6 +265,14 @@ class Scheduler: def tick(self) -> None: ensure_ray_connected() + # v3.8: reconcile serve_models (best-effort). + if self._serving is not None: + try: + self._serving.tick() + except Exception: + # Keep scheduler alive even if serving tick fails. + pass + # Sync active tasks for row in self.db.list_active_tasks(limit=50): self._sync_one_running(row) diff --git a/src/mvp/py/argus/service/serve_client.py b/src/mvp/py/argus/service/serve_client.py new file mode 100644 index 0000000..a630468 --- /dev/null +++ b/src/mvp/py/argus/service/serve_client.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + + +@dataclass(frozen=True) +class RayServeClient: + """ + Minimal Ray Serve client wrapper. + + This is intentionally tiny and uses runtime imports so that: + - unit tests can stub `ray` modules without needing real Ray installed + - production can run with the real Ray Serve stack (v3.8+) + """ + + http_port: int = 8000 + proxy_location: str = "HeadOnly" + ray_init_address: str = "auto" + + def ensure_started(self) -> None: + import ray # runtime import + + # Scheduler already calls ray.init(); make this idempotent. + ray.init(address=self.ray_init_address, ignore_reinit_error=True, log_to_driver=False) # type: ignore[call-arg] + + # Import serve lazily to allow tests to stub it. + from ray import serve # type: ignore + + serve.start(proxy_location=self.proxy_location, http_options={"host": "0.0.0.0", "port": int(self.http_port)}) + + def apply_app(self, *, app: Any, app_name: str, route_prefix: str = "/") -> Any: + from ray import serve # type: ignore + + # If Ray Serve LLM isn't available, callers may pass a plain dict placeholder. + # Running that through serve.run() results in a confusing TypeError; fail fast. + if isinstance(app, dict): + raise ValueError("invalid serve app object (Ray Serve LLM not available or build_openai_app failed)") + + return serve.run(app, name=app_name, route_prefix=route_prefix) + + def get_status(self) -> Any: + from ray import serve # type: ignore + + return serve.status() diff --git a/src/mvp/py/argus/service/serve_llm_config.py b/src/mvp/py/argus/service/serve_llm_config.py new file mode 100644 index 0000000..0dc661e --- /dev/null +++ b/src/mvp/py/argus/service/serve_llm_config.py @@ -0,0 +1,63 @@ +from __future__ import annotations + +from typing import Any + +from .serving_spec import ResolvedServingSpec + + +def _ensure_hf_env_defaults(env: dict[str, str]) -> dict[str, str]: + out = dict(env or {}) + # Prefer existing values if present, but always force offline mode in the platform. + out.setdefault("HF_HOME", "/private/hf") + out.setdefault("HUGGINGFACE_HUB_CACHE", "/private/hf/hub") + out.setdefault("TRANSFORMERS_CACHE", "/private/hf/transformers") + out["HF_HUB_OFFLINE"] = "1" + return out + + +def build_llm_config_dict( + resolved: ResolvedServingSpec, + *, + accelerator_type: str, + runtime_env_env_vars: dict[str, str] | None, + cpu_per_gpu: float = 1.0, +) -> dict[str, Any]: + """ + Pure builder: maps a platform ResolvedServingSpec to a Ray Serve LLM-like config. + + We return a plain dict here to keep this layer unit-testable without depending on + a specific Ray Serve LLM version. The reconciler (later milestone) can choose to + instantiate `ray.serve.llm.LLMConfig` using this dict. + """ + if not accelerator_type: + raise ValueError("accelerator_type is required") + if resolved.num_replicas < 1: + raise ValueError("num_replicas must be >= 1") + if resolved.gpus_per_replica < 1: + raise ValueError("gpus_per_replica must be >= 1") + if cpu_per_gpu <= 0: + raise ValueError("cpu_per_gpu must be > 0") + + engine_kwargs: dict[str, Any] = dict(resolved.engine_kwargs or {}) + # Enforce tensor parallel mapping; user-provided value must not contradict requested GPUs. + engine_kwargs["tensor_parallel_size"] = int(resolved.gpus_per_replica) + + # Ray Serve LLM (Ray 2.49.x) exposes `resources_per_bundle` instead of the older + # `placement_group_config`. Use a single bundle that reserves the full GPU set + # required by tensor-parallel execution. + resources_per_bundle = { + "GPU": float(resolved.gpus_per_replica), + "CPU": float(cpu_per_gpu) * float(resolved.gpus_per_replica), + } + + env_vars = _ensure_hf_env_defaults(dict(runtime_env_env_vars or {})) + + return { + # Ray Serve LLM expects `model_loading_config` with model_id/model_source. + "model_loading_config": {"model_id": resolved.model_id, "model_source": resolved.model_source}, + "accelerator_type": accelerator_type, + "deployment_config": {"num_replicas": int(resolved.num_replicas)}, + "engine_kwargs": engine_kwargs, + "resources_per_bundle": resources_per_bundle, + "runtime_env": {"env_vars": env_vars}, + } diff --git a/src/mvp/py/argus/service/serving_reconciler.py b/src/mvp/py/argus/service/serving_reconciler.py new file mode 100644 index 0000000..2c830d5 --- /dev/null +++ b/src/mvp/py/argus/service/serving_reconciler.py @@ -0,0 +1,151 @@ +from __future__ import annotations + +import json +import traceback +from dataclasses import dataclass +from typing import Any, Protocol + +from argus.service.ray_resources import ClusterAvailable, get_cluster_available + +from .config import V2Config +from .db import Db +from .serve_llm_config import build_llm_config_dict +from .serving_spec import ResolvedServingSpec + + +class ServeClient(Protocol): + def ensure_started(self) -> None: ... + + def apply_app(self, *, app: Any, app_name: str, route_prefix: str = "/") -> Any: ... + + def get_status(self) -> Any: ... + + +def _parse_engine_kwargs(row: dict[str, Any]) -> dict[str, Any] | None: + raw = row.get("engine_kwargs_json") + if raw in (None, ""): + return None + try: + obj = json.loads(str(raw)) + return obj if isinstance(obj, dict) else None + except Exception: + return None + + +def _row_to_resolved_spec(row: dict[str, Any]) -> ResolvedServingSpec: + return ResolvedServingSpec( + user_id=str(row["user_id"]), + model_id_suffix=str(row["model_id_suffix"]), + model_id_prefix=str(row["model_id_prefix"]), + model_id=str(row["model_id"]), + model_source=str(row["model_source"]), + num_replicas=int(row["num_replicas"]), + gpus_per_replica=int(row["gpus_per_replica"]), + engine_kwargs=_parse_engine_kwargs(row), + ) + + +def _needed_total_gpus(rows: list[dict[str, Any]]) -> int: + total = 0 + for r in rows: + total += int(r.get("num_replicas") or 0) * int(r.get("gpus_per_replica") or 0) + return total + + +@dataclass +class ServingReconciler: + """ + v3.8: reconcile declared serve_models (SQLite) into a multi-model Ray Serve app. + + This reconciler is intentionally conservative: + - Only acts on models in states QUEUED/DELETING. + - Performs a minimal GPU precheck using ray available GPU totals. + - Writes events and state transitions for explainability. + """ + + db: Db + v2_cfg: V2Config + ray_runtime_env_env_vars: dict[str, str] + serve_client: ServeClient + app_name: str = "argus_llm_app" + route_prefix: str = "/" + cpu_per_gpu: float = 1.0 + get_available_fn: Any = get_cluster_available + + def tick(self) -> None: + # Pick the next desired change. + change = self.db.pick_next_runnable_serve_change() + if not change: + return + + model_key = str(change["model_key"]) + state = str(change.get("state") or "") + + # Ensure Ray (and Serve) can be started before doing anything else. + try: + self.serve_client.ensure_started() + except Exception as e: + self.db.append_serve_event(model_key=model_key, event_type="SERVE_START_ERROR", payload_json=repr(e)) + return + + # Desired set: all non-deleted models except those marked DELETING. + all_rows = self.db.list_all_serve_models(include_deleted=False, limit=5000, offset=0) + # FAILED models are not part of the desired running set. A user can PATCH to + # re-queue a failed model (e.g., after fixing env/deps) which will move it back to QUEUED. + desired_rows = [r for r in all_rows if str(r.get("state") or "") not in ("DELETING", "DELETED", "FAILED")] + + # Precheck resources: multi-model app apply needs enough GPUs for the whole desired set. + needed = _needed_total_gpus(desired_rows) + avail: ClusterAvailable = self.get_available_fn() + if float(avail.total_available_gpus) < float(needed): + msg = f"Insufficient GPUs: need {needed}, available {avail.total_available_gpus}" + self.db.append_serve_event(model_key=model_key, event_type="SERVE_PENDING_RESOURCES", payload_json=msg) + return + + # Build per-model LLM configs (dict form in M4). + llm_cfg_dicts: list[dict[str, Any]] = [] + accelerator_type = str(self.v2_cfg.serving.llm.accelerator_type or "") + for r in desired_rows: + resolved = _row_to_resolved_spec(r) + llm_cfg_dicts.append( + build_llm_config_dict( + resolved, + accelerator_type=accelerator_type, + runtime_env_env_vars=self.ray_runtime_env_env_vars, + cpu_per_gpu=self.cpu_per_gpu, + ) + ) + + # Build a Ray Serve OpenAI-compatible app if Ray Serve LLM is available. + # Fall back to a plain dict so unit tests can run without real Ray Serve. + app_obj: Any + try: + from ray.serve.llm import LLMConfig, build_openai_app # type: ignore + + llm_cfgs = [LLMConfig(**d) for d in llm_cfg_dicts] + app_obj = build_openai_app({"llm_configs": llm_cfgs}) + except Exception as e: + self.db.append_serve_event(model_key=model_key, event_type="SERVE_LLM_IMPORT_ERROR", payload_json=repr(e)) + app_obj = {"llm_configs": llm_cfg_dicts} + + try: + self.db.append_serve_event(model_key=model_key, event_type="SERVE_APPLY_REQUESTED", payload_json=str(len(llm_cfg_dicts))) + self.serve_client.apply_app(app=app_obj, app_name=self.app_name, route_prefix=self.route_prefix) + except Exception as e: + err = f"{type(e).__name__}: {e}" + tb = traceback.format_exc(limit=10) + self.db.set_serve_model_state(model_key=model_key, state="FAILED", error_summary=err, event_type="SERVE_APPLY_FAILED", payload_json=tb) + return + + # Apply succeeded. Update the changing model's state. + if state == "DELETING": + self.db.set_serve_model_state(model_key=model_key, state="DELETED", event_type="SERVE_DELETE_APPLIED") + return + + # Mark as deploying; best-effort status probe can promote to RUNNING. + self.db.set_serve_model_state(model_key=model_key, state="DEPLOYING", event_type="SERVE_DEPLOYING") + try: + _ = self.serve_client.get_status() + self.db.set_serve_model_state(model_key=model_key, state="RUNNING", event_type="SERVE_RUNNING") + except Exception as e: + self.db.append_serve_event(model_key=model_key, event_type="SERVE_STATUS_ERROR", payload_json=repr(e)) diff --git a/src/mvp/py/argus/service/serving_spec.py b/src/mvp/py/argus/service/serving_spec.py new file mode 100644 index 0000000..c04362f --- /dev/null +++ b/src/mvp/py/argus/service/serving_spec.py @@ -0,0 +1,144 @@ +from __future__ import annotations + +import re +from dataclasses import dataclass +from datetime import datetime, timezone +from typing import Any + + +_MODEL_ID_SUFFIX_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]{0,63}$") + + +@dataclass(frozen=True) +class ServingSpec: + model_id: str + model_source: str + num_replicas: int = 1 + gpus_per_replica: int = 1 + engine_kwargs: dict[str, Any] | None = None + + +@dataclass(frozen=True) +class ResolvedServingSpec: + user_id: str + model_id_suffix: str + model_id_prefix: str + model_id: str + model_source: str + num_replicas: int + gpus_per_replica: int + engine_kwargs: dict[str, Any] | None + + +def validate_model_id_suffix(suffix: str) -> None: + if not isinstance(suffix, str): + raise ValueError("model_id must be a string") + s = suffix.strip() + if s != suffix: + raise ValueError("model_id must not contain leading/trailing whitespace") + if not s: + raise ValueError("model_id is required") + if not _MODEL_ID_SUFFIX_RE.match(s): + raise ValueError("model_id must match regex: ^[A-Za-z0-9][A-Za-z0-9._-]{0,63}$") + if ".." in s: + raise ValueError("model_id must not contain '..'") + + +def make_model_id_prefix(*, user_id: str, now_utc: datetime | None = None) -> str: + if not user_id or not isinstance(user_id, str): + raise ValueError("user_id is required") + if "/" in user_id: + raise ValueError("user_id must not contain '/'") + + dt = now_utc or datetime.now(timezone.utc) + if dt.tzinfo is None: + dt = dt.replace(tzinfo=timezone.utc) + dt = dt.astimezone(timezone.utc) + stamp = dt.strftime("%Y%m%d%H%M") + return f"{user_id}-{stamp}" + + +def expand_home_macros(*, user_id: str, text: str) -> str: + if not isinstance(text, str): + raise ValueError("model_source must be a string") + if not text: + raise ValueError("model_source is required") + + out = text + out = out.replace("$HOME/common/hf", "/private/hf") + out = out.replace("$HOME/common/datasets", "/private/datasets") + out = out.replace("$HOME", f"/private/users/{user_id}") + return out + + +def validate_model_source_path(*, user_id: str, model_source: str) -> None: + if not isinstance(model_source, str): + raise ValueError("model_source must be a string") + if not model_source.startswith("/"): + raise ValueError("model_source must be an absolute path") + if not model_source.startswith("/private/"): + raise ValueError("model_source must be under /private") + if "\x00" in model_source: + raise ValueError("model_source contains null byte") + parts = [p for p in model_source.split("/") if p] + if any(p == ".." for p in parts): + raise ValueError("model_source must not contain '..'") + + allowed_user_prefix = f"/private/users/{user_id}/" + allowed = model_source.startswith("/private/hf/") or model_source.startswith(allowed_user_prefix) + if not allowed: + raise PermissionError("model_source is not allowed (must be under /private/hf or your /private/users/)") + + +def parse_serving_spec(obj: Any) -> ServingSpec: + if not isinstance(obj, dict): + raise ValueError("serving spec must be a mapping") + + model_id = obj.get("model_id") + model_source = obj.get("model_source") + num_replicas = obj.get("num_replicas", 1) + gpus_per_replica = obj.get("gpus_per_replica", 1) + engine_kwargs = obj.get("engine_kwargs", None) + + if not isinstance(model_id, str): + raise ValueError("missing required field: model_id") + validate_model_id_suffix(model_id) + + if not isinstance(model_source, str) or not model_source: + raise ValueError("missing required field: model_source") + + if not isinstance(num_replicas, int) or num_replicas < 1: + raise ValueError("num_replicas must be an integer >= 1") + if not isinstance(gpus_per_replica, int) or gpus_per_replica < 1: + raise ValueError("gpus_per_replica must be an integer >= 1") + + if engine_kwargs is not None and not isinstance(engine_kwargs, dict): + raise ValueError("engine_kwargs must be a mapping when provided") + + return ServingSpec( + model_id=model_id, + model_source=model_source, + num_replicas=num_replicas, + gpus_per_replica=gpus_per_replica, + engine_kwargs=engine_kwargs, + ) + + +def resolve_serving_spec(*, spec: ServingSpec, user_id: str, now_utc: datetime | None = None) -> ResolvedServingSpec: + validate_model_id_suffix(spec.model_id) + prefix = make_model_id_prefix(user_id=user_id, now_utc=now_utc) + full_model_id = f"{prefix}-{spec.model_id}" + + resolved_source = expand_home_macros(user_id=user_id, text=spec.model_source) + validate_model_source_path(user_id=user_id, model_source=resolved_source) + + return ResolvedServingSpec( + user_id=user_id, + model_id_suffix=spec.model_id, + model_id_prefix=prefix, + model_id=full_model_id, + model_source=resolved_source, + num_replicas=spec.num_replicas, + gpus_per_replica=spec.gpus_per_replica, + engine_kwargs=spec.engine_kwargs, + ) diff --git a/src/mvp/py/argus/service/ui.py b/src/mvp/py/argus/service/ui.py index b5a0822..c77b629 100644 --- a/src/mvp/py/argus/service/ui.py +++ b/src/mvp/py/argus/service/ui.py @@ -112,6 +112,7 @@ def _nav(active: str) -> str: links = [ ("login", "/ui/login", "Login"), ("tasks", "/ui/tasks", "Tasks"), + ("serving", "/ui/serving", "Serving"), ("new", "/ui/tasks/new", "New Task"), ("data", "/ui/data", "Data"), ("admin", "/ui/admin", "Admin"), @@ -992,6 +993,253 @@ refresh(); """.strip() return HTMLResponse(content=_page(f"Logs {task_id}", "tasks", body, script)) + @app.get("/ui/serving") + async def ui_serving() -> HTMLResponse: + body = """ +

Serving

+
+
+ + New Model + OpenAI /v1/models +
+
+
Loading...
+
+""".strip() + script = """ +document.getElementById("nav-ray-dashboard").href = curOriginWithPort(8265); +document.getElementById("openai-models").href = curOriginWithPort(8000) + "/v1/models"; +const out = document.getElementById("out"); + +function pill(state) { + const s = String(state || ""); + if (s === "RUNNING") return `${s}`; + if (s === "FAILED") return `${s}`; + return `${s}`; +} + +async function refresh() { + out.textContent = "Loading..."; + try { + const lim = 50; + const off = Number(localStorage.getItem("mvp_serving_offset") || "0") || 0; + const resp = await apiJson("/api/v2/serve/models?limit=" + lim + "&offset=" + off + "&include_deleted=0"); + const items = resp.items || []; + const hasMore = !!resp.has_more; + const pageNo = Math.floor(off / lim) + 1; + const prevDisabled = off <= 0; + const nextDisabled = !hasMore; + + function row(m) { + return ` + ${m.model_key} + ${m.model_id} + ${pill(m.state)} + ${m.num_replicas} × ${m.gpus_per_replica} GPU + ${m.updated_at || ""} + `; + } + const rows = items.map(row).join(""); + + out.innerHTML = ` +
+
OpenAI base: ${resp.openai_base_url || curOriginWithPort(8000) + "/v1"}
+
+ Page ${pageNo} + + +
+
+ + + ${rows || ""} +
Model KeyModel IDStateResourcesUpdated
(none)
+ `; + + const prevBtn = document.getElementById("prev"); + const nextBtn = document.getElementById("next"); + if (prevBtn) prevBtn.onclick = () => { localStorage.setItem("mvp_serving_offset", String(Math.max(0, off - lim))); refresh(); }; + if (nextBtn) nextBtn.onclick = () => { localStorage.setItem("mvp_serving_offset", String(off + lim)); refresh(); }; + } catch (e) { + let text = "Error: " + (e.status || "") + "\\n" + (e.body || String(e)); + if (e.body && String(e.body).includes("serving is not enabled")) { + text = "Serving is not enabled in server config.\\nAsk admin to enable `serving:` in dev.yaml."; + } + out.textContent = text; + } +} + +document.getElementById("refresh").onclick = refresh; +refresh(); +""".strip() + return HTMLResponse(content=_page("Serving", "serving", body, script)) + + @app.get("/ui/serving/new") + async def ui_serving_new() -> HTMLResponse: + example = """# ServingSpec (YAML) +# 说明: +# - model_id: 这里是 suffix(平台会自动加前缀:--) +# - model_source: 本地模型路径(支持 $HOME 宏;推荐使用 $HOME/common/hf 指向共享 HF cache) +# +# 常用路径: +# - $HOME/common/hf -> /private/hf +# - $HOME -> /private/users/ +# +model_id: qwen-0.5b +model_source: $HOME/common/hf/hub/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots/ +num_replicas: 1 +gpus_per_replica: 1 + +# engine_kwargs: # 可选:透传给 vLLM +# gpu_memory_utilization: 0.4 +""".strip() + body = f""" +

New Model

+
+
Paste ServingSpec YAML and submit to /api/v2/serve/models.
+
+ +
+
+ + Back +
+
+

+
+""".strip() + script = """ +document.getElementById("nav-ray-dashboard").href = curOriginWithPort(8265); +const out = document.getElementById("out"); +document.getElementById("submit").onclick = async () => { + out.textContent = "Submitting..."; + const yaml = document.getElementById("yaml").value || ""; + try { + const resp = await apiJson("/api/v2/serve/models", { method: "POST", headers: { "Content-Type": "application/yaml" }, body: yaml }); + out.textContent = "Created: " + resp.model_key + "\\nState: " + resp.state; + if (resp.model_key) window.location.href = "/ui/serving/" + encodeURIComponent(resp.model_key); + } catch (e) { + out.textContent = "Error: " + (e.status || "") + "\\n" + (e.body || String(e)); + } +}; +""".strip() + return HTMLResponse(content=_page("New Model", "serving", body, script)) + + @app.get("/ui/serving/{model_key}") + async def ui_serving_detail(model_key: str) -> HTMLResponse: + body = f""" +

Model

+
+
+
model_key: {html.escape(model_key)}
+ +
+
+
+ + + + +
+
+
Loading...
+
+

Resolved Spec (YAML)

+
(loading)
+
+

Events

+
(loading)
+
+

OpenAI Example

+
(loading)
+
+""".strip() + script = f""" +document.getElementById("nav-ray-dashboard").href = curOriginWithPort(8265); +document.getElementById("openai-models").href = curOriginWithPort(8000) + "/v1/models"; +const modelKey = {json.dumps(model_key)}; +const meta = document.getElementById("meta"); +const spec = document.getElementById("spec"); +const eventsEl = document.getElementById("events"); +const example = document.getElementById("example"); +const replicas = document.getElementById("replicas"); + +function pill(state) {{ + const s = String(state || ""); + if (s === "RUNNING") return `${{s}}`; + if (s === "FAILED") return `${{s}}`; + return `${{s}}`; +}} + +function renderEvents(events) {{ + if (!events || !events.length) return "
(none)
"; + const rows = events.map(e => {{ + const payload = (e.payload_json || ""); + const short = String(payload).length > 240 ? String(payload).slice(0, 240) + "..." : String(payload); + return `${{e.created_at || ""}}${{e.event_type}}
${{short}}
`; + }}).join(""); + return `${{rows}}
TimeTypePayload
`; +}} + +async function refresh() {{ + meta.textContent = "Loading..."; + spec.textContent = "(loading)"; + eventsEl.textContent = "(loading)"; + example.textContent = "(loading)"; + try {{ + const obj = await apiJson("/api/v2/serve/models/" + encodeURIComponent(modelKey)); + const m = obj.model || {{}}; + replicas.value = String(m.num_replicas || 1); + meta.innerHTML = ` +
+
state: ${{pill(m.state)}}
+
model_id: ${{m.model_id || ""}}
+
source: ${{m.model_source || ""}}
+
+
endpoint: ${{(m.endpoint && m.endpoint.openai_base_url) || (curOriginWithPort(8000) + "/v1")}}
+ `; + spec.textContent = obj.resolved_spec_yaml || ""; + eventsEl.innerHTML = renderEvents(obj.events || []); + const base = (m.endpoint && m.endpoint.openai_base_url) || (curOriginWithPort(8000) + "/v1"); + const mid = m.model_id || ""; + example.textContent = `curl -sS -H 'Content-Type: application/json' -H 'Authorization: Bearer FAKE_KEY' \\\\\\n -X POST ${{base}}/chat/completions \\\\\\n --data-binary '{{\\"model\\":\\"${{mid}}\\",\\"messages\\":[{{\\"role\\":\\"user\\",\\"content\\":\\"hello\\"}}],\\"max_tokens\\":16,\\"stream\\":false}}' | python3 -m json.tool`; + }} catch (e) {{ + meta.textContent = "Error: " + (e.status || "") + "\\n" + (e.body || String(e)); + spec.textContent = ""; + eventsEl.textContent = ""; + example.textContent = ""; + }} +}} + +document.getElementById("scale").onclick = async () => {{ + const n = Number(replicas.value || "1"); + if (!Number.isFinite(n) || n < 1) return; + try {{ + await apiJson("/api/v2/serve/models/" + encodeURIComponent(modelKey), {{ method: "PATCH", headers: {{ "Content-Type": "application/json" }}, body: JSON.stringify({{ num_replicas: n }}) }}); + await refresh(); + }} catch (e) {{ + meta.textContent = "Error: " + (e.status || "") + "\\n" + (e.body || String(e)); + }} +}}; + +document.getElementById("delete").onclick = async () => {{ + if (!confirm("Delete this model?")) return; + try {{ + await apiJson("/api/v2/serve/models/" + encodeURIComponent(modelKey), {{ method: "DELETE" }}); + await refresh(); + }} catch (e) {{ + meta.textContent = "Error: " + (e.status || "") + "\\n" + (e.body || String(e)); + }} +}}; + +refresh(); +""".strip() + return HTMLResponse(content=_page("Model", "serving", body, script)) + @app.get("/ui/data") async def ui_data() -> HTMLResponse: body = """ diff --git a/src/mvp/py/tests/test_app_serving_api.py b/src/mvp/py/tests/test_app_serving_api.py new file mode 100644 index 0000000..4f9167f --- /dev/null +++ b/src/mvp/py/tests/test_app_serving_api.py @@ -0,0 +1,282 @@ +from __future__ import annotations + +from pathlib import Path + +import yaml +from fastapi.testclient import TestClient + + +def _write_config(tmp_path: Path) -> Path: + cfg = { + "ray": { + "address": "http://127.0.0.1:8265", + "shared_root": "/private", + "entrypoint_resources": {"worker_node": 1}, + "runtime_env": {"env_vars": {}}, + }, + "data": { + "user_root": str(tmp_path / "users"), + }, + "service": { + "api": {"host": "127.0.0.1", "port": 0}, + "auth": {"token_env": "MVP_INTERNAL_TOKEN"}, + "sqlite": {"db_path": str(tmp_path / "mvp.sqlite3")}, + "scheduler": {"tick_s": 1, "retry_interval_s": 1, "max_running_tasks": 1}, + }, + "serving": { + "serve": {"http_port": 8000, "proxy_location": "HeadOnly"}, + "llm": {"accelerator_type": "H20"}, + }, + } + p = tmp_path / "cfg.yaml" + p.write_text(yaml.safe_dump(cfg), encoding="utf-8") + return p + + +def test_serving_api_crud_flow(tmp_path: Path, monkeypatch): + from argus.service import app as app_mod + + cfg_path = _write_config(tmp_path) + monkeypatch.setenv("MVP_INTERNAL_TOKEN", "admin-token") + + class _Scheduler: + def __init__(self, **kwargs): + self.tool = object() + + def run_forever(self, stop_flag): + return None + + monkeypatch.setattr(app_mod, "Scheduler", _Scheduler) + monkeypatch.setattr(app_mod, "new_model_key", lambda user_id: f"mk-{user_id}") + + app = app_mod.create_app(str(cfg_path)) + + admin_headers = {"authorization": "Bearer admin-token"} + with TestClient(app) as c: + r = c.post("/api/v2/users", headers=admin_headers, json={"user_id": "alice"}) + assert r.status_code == 200 + r2 = c.post("/api/v2/users/alice/tokens", headers=admin_headers) + assert r2.status_code == 200 + user_token = r2.json()["token"] + + headers = {"authorization": f"Bearer {user_token}"} + + spec_yaml = ( + "model_id: qwen-0.5b\n" + "model_source: $HOME/common/hf/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots/sha\n" + "num_replicas: 1\n" + "gpus_per_replica: 1\n" + ) + r3 = c.post("/api/v2/serve/models", headers=headers, data=spec_yaml) + assert r3.status_code == 200 + assert r3.json()["model_key"] == "mk-alice" + assert r3.json()["state"] == "QUEUED" + + r4 = c.get("/api/v2/serve/models?limit=10&offset=0", headers=headers) + assert r4.status_code == 200 + obj = r4.json() + assert obj["openai_base_url"] == "http://testserver:8000/v1" + assert len(obj["items"]) == 1 + assert obj["items"][0]["model_key"] == "mk-alice" + + r5 = c.get("/api/v2/serve/models/mk-alice", headers=headers) + assert r5.status_code == 200 + detail = r5.json() + assert detail["model"]["model_key"] == "mk-alice" + assert "model_id_prefix" in detail["model"] + assert "resolved_spec_yaml" in detail + assert isinstance(detail.get("events"), list) + + r6 = c.patch("/api/v2/serve/models/mk-alice", headers=headers, json={"num_replicas": 2}) + assert r6.status_code == 200 + assert r6.json()["state"] == "QUEUED" + + r7 = c.delete("/api/v2/serve/models/mk-alice", headers=headers) + assert r7.status_code == 200 + assert r7.json()["state"] == "DELETING" + + # Admin status endpoint + r8 = c.get("/api/v2/serve/status", headers=admin_headers) + assert r8.status_code == 200 + assert r8.json()["http_port"] == 8000 + + +def test_serving_api_rejects_path_outside_user_and_hf(tmp_path: Path, monkeypatch): + from argus.service import app as app_mod + + cfg_path = _write_config(tmp_path) + monkeypatch.setenv("MVP_INTERNAL_TOKEN", "admin-token") + + class _Scheduler: + def __init__(self, **kwargs): + self.tool = object() + + def run_forever(self, stop_flag): + return None + + monkeypatch.setattr(app_mod, "Scheduler", _Scheduler) + monkeypatch.setattr(app_mod, "new_model_key", lambda user_id: f"mk-{user_id}") + + app = app_mod.create_app(str(cfg_path)) + + admin_headers = {"authorization": "Bearer admin-token"} + with TestClient(app) as c: + c.post("/api/v2/users", headers=admin_headers, json={"user_id": "alice"}) + r2 = c.post("/api/v2/users/alice/tokens", headers=admin_headers) + user_token = r2.json()["token"] + headers = {"authorization": f"Bearer {user_token}"} + + spec_yaml = ( + "model_id: x\n" + "model_source: /private/users/bob/models/evil\n" + "num_replicas: 1\n" + "gpus_per_replica: 1\n" + ) + r3 = c.post("/api/v2/serve/models", headers=headers, data=spec_yaml) + assert r3.status_code == 403 + + +def test_serving_api_invalid_yaml_and_non_mapping(tmp_path: Path, monkeypatch): + from argus.service import app as app_mod + + cfg_path = _write_config(tmp_path) + monkeypatch.setenv("MVP_INTERNAL_TOKEN", "admin-token") + + class _Scheduler: + def __init__(self, **kwargs): + self.tool = object() + + def run_forever(self, stop_flag): + return None + + monkeypatch.setattr(app_mod, "Scheduler", _Scheduler) + monkeypatch.setattr(app_mod, "new_model_key", lambda user_id: f"mk-{user_id}") + app = app_mod.create_app(str(cfg_path)) + + with TestClient(app) as c: + # Create a user token + admin_headers = {"authorization": "Bearer admin-token"} + c.post("/api/v2/users", headers=admin_headers, json={"user_id": "alice"}) + token = c.post("/api/v2/users/alice/tokens", headers=admin_headers).json()["token"] + headers = {"authorization": f"Bearer {token}"} + + r = c.post("/api/v2/serve/models", headers=headers, data=": bad\n") + assert r.status_code == 400 + + r2 = c.post("/api/v2/serve/models", headers=headers, data="- 1\n- 2\n") + assert r2.status_code == 400 + + +def test_serving_api_engine_kwargs_binary_rejected(tmp_path: Path, monkeypatch): + """ + yaml !!binary is parsed as bytes, which is not JSON-serializable. + """ + from argus.service import app as app_mod + + cfg_path = _write_config(tmp_path) + monkeypatch.setenv("MVP_INTERNAL_TOKEN", "admin-token") + + class _Scheduler: + def __init__(self, **kwargs): + self.tool = object() + + def run_forever(self, stop_flag): + return None + + monkeypatch.setattr(app_mod, "Scheduler", _Scheduler) + monkeypatch.setattr(app_mod, "new_model_key", lambda user_id: f"mk-{user_id}") + app = app_mod.create_app(str(cfg_path)) + + admin_headers = {"authorization": "Bearer admin-token"} + with TestClient(app) as c: + c.post("/api/v2/users", headers=admin_headers, json={"user_id": "alice"}) + token = c.post("/api/v2/users/alice/tokens", headers=admin_headers).json()["token"] + headers = {"authorization": f"Bearer {token}"} + + spec_yaml = ( + "model_id: x\n" + "model_source: $HOME/common/hf/x\n" + "engine_kwargs:\n" + " blob: !!binary \"AQID\"\n" + ) + r = c.post("/api/v2/serve/models", headers=headers, data=spec_yaml) + assert r.status_code == 400 + + +def test_serving_api_list_include_deleted_and_forwarded_base_url(tmp_path: Path, monkeypatch): + from argus.service import app as app_mod + from argus.service.config import V2Config + from argus.service.db import Db + + cfg_path = _write_config(tmp_path) + monkeypatch.setenv("MVP_INTERNAL_TOKEN", "admin-token") + + class _Scheduler: + def __init__(self, **kwargs): + self.tool = object() + + def run_forever(self, stop_flag): + return None + + monkeypatch.setattr(app_mod, "Scheduler", _Scheduler) + keys = iter(["mk-alice-1", "mk-alice-2"]) + monkeypatch.setattr(app_mod, "new_model_key", lambda user_id: next(keys)) + + app = app_mod.create_app(str(cfg_path)) + + admin_headers = {"authorization": "Bearer admin-token"} + with TestClient(app) as c: + c.post("/api/v2/users", headers=admin_headers, json={"user_id": "alice"}) + token = c.post("/api/v2/users/alice/tokens", headers=admin_headers).json()["token"] + headers = {"authorization": f"Bearer {token}"} + + spec_yaml = "model_id: x\nmodel_source: $HOME/common/hf/x\n" + c.post("/api/v2/serve/models", headers=headers, data=spec_yaml) + c.post("/api/v2/serve/models", headers=headers, data=spec_yaml) + + # Mark one model as DELETED directly in DB (sets deleted_at). + root = yaml.safe_load(cfg_path.read_text(encoding="utf-8")) + v2_cfg = V2Config.from_root_dict(root) + db = Db(v2_cfg.sqlite.db_path) + db.set_serve_model_state(model_key="mk-alice-2", state="DELETED") + + r1 = c.get( + "/api/v2/serve/models?limit=10&offset=0&include_deleted=0", + headers={**headers, "x-forwarded-host": "example.com:8080", "x-forwarded-proto": "https"}, + ) + assert r1.status_code == 200 + assert r1.json()["openai_base_url"] == "https://example.com:8000/v1" + assert {m["model_key"] for m in r1.json()["items"]} == {"mk-alice-1"} + + r2 = c.get("/api/v2/serve/models?include_deleted=1", headers=headers) + assert r2.status_code == 200 + assert {m["model_key"] for m in r2.json()["items"]} == {"mk-alice-1", "mk-alice-2"} + + +def test_serving_api_patch_invalid_num_replicas(tmp_path: Path, monkeypatch): + from argus.service import app as app_mod + + cfg_path = _write_config(tmp_path) + monkeypatch.setenv("MVP_INTERNAL_TOKEN", "admin-token") + + class _Scheduler: + def __init__(self, **kwargs): + self.tool = object() + + def run_forever(self, stop_flag): + return None + + monkeypatch.setattr(app_mod, "Scheduler", _Scheduler) + monkeypatch.setattr(app_mod, "new_model_key", lambda user_id: "mk-alice") + + app = app_mod.create_app(str(cfg_path)) + + admin_headers = {"authorization": "Bearer admin-token"} + with TestClient(app) as c: + c.post("/api/v2/users", headers=admin_headers, json={"user_id": "alice"}) + token = c.post("/api/v2/users/alice/tokens", headers=admin_headers).json()["token"] + headers = {"authorization": f"Bearer {token}"} + + c.post("/api/v2/serve/models", headers=headers, data="model_id: x\nmodel_source: $HOME/common/hf/x\n") + r = c.patch("/api/v2/serve/models/mk-alice", headers=headers, json={"num_replicas": 0}) + assert r.status_code == 422 diff --git a/src/mvp/py/tests/test_db_serving.py b/src/mvp/py/tests/test_db_serving.py new file mode 100644 index 0000000..1a1c829 --- /dev/null +++ b/src/mvp/py/tests/test_db_serving.py @@ -0,0 +1,79 @@ +from __future__ import annotations + +import json +from pathlib import Path + + +def test_db_serving_model_crud_and_events(tmp_path: Path) -> None: + from argus.service.db import Db + + db = Db(str(tmp_path / "mvp.sqlite3")) + db.init() + + m1 = db.create_serve_model( + model_key="svc-001", + user_id="alice", + model_id_suffix="qwen-0.5b", + model_id_prefix="alice-202601061235", + model_id="alice-202601061235-qwen-0.5b", + model_source="/private/hf/hub/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots/sha", + num_replicas=1, + gpus_per_replica=1, + engine_kwargs_json=json.dumps({"max_model_len": 8192}), + spec_yaml="model_id: qwen-0.5b\nmodel_source: $HOME/common/hf/...\n", + resolved_spec_yaml="model_id: alice-202601061235-qwen-0.5b\nmodel_source: /private/hf/...\n", + ) + assert m1["model_key"] == "svc-001" + assert m1["state"] == "QUEUED" + + # Same suffix may be created again; model_key is the identity. + m2 = db.create_serve_model( + model_key="svc-002", + user_id="alice", + model_id_suffix="qwen-0.5b", + model_id_prefix="alice-202601061236", + model_id="alice-202601061236-qwen-0.5b", + model_source="/private/hf/hub/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots/sha", + num_replicas=1, + gpus_per_replica=2, + engine_kwargs_json=None, + spec_yaml="model_id: qwen-0.5b\nmodel_source: $HOME/common/hf/...\n", + resolved_spec_yaml="model_id: alice-202601061236-qwen-0.5b\nmodel_source: /private/hf/...\n", + ) + assert m2["model_key"] == "svc-002" + assert m2["model_id"] != m1["model_id"] + + got = db.get_serve_model("svc-001") + assert got is not None + assert got["gpus_per_replica"] == 1 + + items = db.list_serve_models(user_id="alice") + assert {i["model_key"] for i in items} == {"svc-001", "svc-002"} + + # State transition writes a serve event. + db.set_serve_model_state(model_key="svc-001", state="DEPLOYING") + got2 = db.get_serve_model("svc-001") + assert got2 is not None + assert got2["state"] == "DEPLOYING" + + events = db.list_serve_events("svc-001", limit=50) + assert len(events) >= 2 + assert {e["event_type"] for e in events}.issuperset({"SERVE_MODEL_CREATED", "SERVE_STATE_UPDATE"}) + + # Reconciler pick: QUEUED/DELETING only. + picked = db.pick_next_runnable_serve_change() + assert picked is not None + assert picked["state"] == "QUEUED" + + db.set_serve_model_state(model_key="svc-002", state="DELETING") + picked2 = db.pick_next_runnable_serve_change() + assert picked2 is not None + assert picked2["state"] in ("QUEUED", "DELETING") + + # Deleted models are hidden unless include_deleted. + db.set_serve_model_state(model_key="svc-002", state="DELETED") + items2 = db.list_serve_models(user_id="alice", include_deleted=False) + assert {i["model_key"] for i in items2} == {"svc-001"} + items3 = db.list_serve_models(user_id="alice", include_deleted=True) + assert {i["model_key"] for i in items3} == {"svc-001", "svc-002"} + diff --git a/src/mvp/py/tests/test_ids.py b/src/mvp/py/tests/test_ids.py index ad2a8fe..fd422e8 100644 --- a/src/mvp/py/tests/test_ids.py +++ b/src/mvp/py/tests/test_ids.py @@ -44,3 +44,32 @@ def test_attempt_submission_id_format(): assert attempt_submission_id("t", 1) == "t--a01" assert attempt_submission_id("t", 12) == "t--a12" + + +def test_new_model_key_includes_user(monkeypatch): + import argus.core.ids as ids + + class _FakeDatetime: + @staticmethod + def now(): + class _DT: + def strftime(self, fmt: str) -> str: + assert fmt == "%Y%m%d-%H%M%S" + return "20250101-010203" + + return _DT() + + monkeypatch.setattr(ids, "datetime", _FakeDatetime) + monkeypatch.setattr(ids.secrets, "token_hex", lambda n: "abcd") + + assert ids.new_model_key(user_id="Alice_01") == "mvp2-alice_01-serve-20250101-010203-abcd" + + +def test_new_model_key_requires_user_id(): + from argus.core.ids import new_model_key + + try: + new_model_key(user_id="") + assert False, "expected ValueError" + except ValueError as e: + assert "user_id is required" in str(e) diff --git a/src/mvp/py/tests/test_llm_config_builder.py b/src/mvp/py/tests/test_llm_config_builder.py new file mode 100644 index 0000000..78aeac7 --- /dev/null +++ b/src/mvp/py/tests/test_llm_config_builder.py @@ -0,0 +1,78 @@ +from __future__ import annotations + +import pytest + + +def test_build_llm_config_dict_maps_tp_and_bundles(): + from argus.service.serve_llm_config import build_llm_config_dict + from argus.service.serving_spec import ResolvedServingSpec + + resolved = ResolvedServingSpec( + user_id="alice", + model_id_suffix="qwen-0.5b", + model_id_prefix="alice-202601061235", + model_id="alice-202601061235-qwen-0.5b", + model_source="/private/hf/x", + num_replicas=2, + gpus_per_replica=4, + engine_kwargs={"gpu_memory_utilization": 0.9}, + ) + + cfg = build_llm_config_dict( + resolved, + accelerator_type="H20", + runtime_env_env_vars={"HF_ENDPOINT": "https://hf-mirror.com"}, + cpu_per_gpu=2.0, + ) + assert cfg["model_loading_config"]["model_id"] == "alice-202601061235-qwen-0.5b" + assert cfg["model_loading_config"]["model_source"] == "/private/hf/x" + assert cfg["accelerator_type"] == "H20" + assert cfg["deployment_config"]["num_replicas"] == 2 + + # gpus_per_replica -> tensor_parallel_size + assert cfg["engine_kwargs"]["tensor_parallel_size"] == 4 + assert cfg["engine_kwargs"]["gpu_memory_utilization"] == 0.9 + + # resources_per_bundle reserves the full TP GPU set for each replica. + bundle = cfg["resources_per_bundle"] + assert bundle["GPU"] == 4.0 + assert bundle["CPU"] == 8.0 + + +def test_build_llm_config_dict_injects_hf_offline_defaults(): + from argus.service.serve_llm_config import build_llm_config_dict + from argus.service.serving_spec import ResolvedServingSpec + + resolved = ResolvedServingSpec( + user_id="alice", + model_id_suffix="x", + model_id_prefix="alice-202601061235", + model_id="alice-202601061235-x", + model_source="/private/users/alice/models/x", + num_replicas=1, + gpus_per_replica=1, + engine_kwargs=None, + ) + cfg = build_llm_config_dict(resolved, accelerator_type="H20", runtime_env_env_vars={}) + env = cfg["runtime_env"]["env_vars"] + assert env["HF_HUB_OFFLINE"] == "1" + assert env["HF_HOME"] == "/private/hf" + assert env["HUGGINGFACE_HUB_CACHE"].startswith("/private/hf/") + + +def test_build_llm_config_dict_requires_accelerator_type(): + from argus.service.serve_llm_config import build_llm_config_dict + from argus.service.serving_spec import ResolvedServingSpec + + resolved = ResolvedServingSpec( + user_id="alice", + model_id_suffix="x", + model_id_prefix="alice-202601061235", + model_id="alice-202601061235-x", + model_source="/private/hf/x", + num_replicas=1, + gpus_per_replica=1, + engine_kwargs=None, + ) + with pytest.raises(ValueError, match="accelerator_type is required"): + build_llm_config_dict(resolved, accelerator_type="", runtime_env_env_vars={}) diff --git a/src/mvp/py/tests/test_serve_client.py b/src/mvp/py/tests/test_serve_client.py new file mode 100644 index 0000000..1f79d60 --- /dev/null +++ b/src/mvp/py/tests/test_serve_client.py @@ -0,0 +1,55 @@ +from __future__ import annotations + +import sys +import types + + +def test_ray_serve_client_calls_start_run_status(monkeypatch): + import ray # provided by conftest stub + + calls: list[tuple[str, object]] = [] + + def _init(*args, **kwargs): + calls.append(("ray.init", {"args": args, "kwargs": kwargs})) + + monkeypatch.setattr(ray, "init", _init, raising=False) + + serve = types.ModuleType("ray.serve") + + def _start(**kwargs): + calls.append(("serve.start", kwargs)) + return None + + def _run(app, name=None, route_prefix=None): + calls.append(("serve.run", {"app": app, "name": name, "route_prefix": route_prefix})) + return {"deployed": True} + + def _status(): + calls.append(("serve.status", None)) + return {"ok": True} + + serve.start = _start # type: ignore[attr-defined] + serve.run = _run # type: ignore[attr-defined] + serve.status = _status # type: ignore[attr-defined] + + sys.modules["ray.serve"] = serve + ray.serve = serve # type: ignore[attr-defined] + + from argus.service.serve_client import RayServeClient + + client = RayServeClient(http_port=8000, proxy_location="HeadOnly", ray_init_address="auto") + client.ensure_started() + out = client.apply_app(app="APP", app_name="argus_llm_app", route_prefix="/") + st = client.get_status() + + assert out == {"deployed": True} + assert st == {"ok": True} + + # Verify call order and key args. + assert calls[0][0] == "ray.init" + assert calls[0][1]["kwargs"].get("ignore_reinit_error") is True + assert calls[1][0] == "serve.start" + assert calls[1][1]["http_options"]["port"] == 8000 + assert calls[2][0] == "serve.run" + assert calls[2][1]["name"] == "argus_llm_app" + assert calls[3][0] == "serve.status" diff --git a/src/mvp/py/tests/test_service_config.py b/src/mvp/py/tests/test_service_config.py index d75b93f..219dfd2 100644 --- a/src/mvp/py/tests/test_service_config.py +++ b/src/mvp/py/tests/test_service_config.py @@ -23,6 +23,7 @@ def test_v2_config_from_root_dict_new_format_defaults(): assert cfg.sqlite.db_path.endswith(".sqlite3") assert cfg.scheduler.max_running_tasks == 3 assert cfg.tracking.wandb.enabled is False + assert cfg.serving.enabled is False def test_v2_config_backward_compat_v2_section_and_default_db_path(): @@ -57,6 +58,27 @@ def test_v2_config_requires_data_mappings(): V2Config.from_root_dict({**base, "data": {"sftpgo": ["x"], "retention": {}}}) +def test_v2_config_requires_tracking_and_serving_mappings(): + from argus.service.config import V2Config + + base = { + "ray": {"shared_root": "/private"}, + "service": {"api": {}, "auth": {}, "sqlite": {}, "scheduler": {}}, + "data": {"sftpgo": {}, "retention": {}}, + } + + with pytest.raises(ValueError, match="config\\.tracking must be a mapping"): + V2Config.from_root_dict({**base, "tracking": ["nope"]}) + + with pytest.raises(ValueError, match="config\\.tracking\\.wandb must be a mapping"): + V2Config.from_root_dict({**base, "tracking": {"wandb": ["nope"]}}) + + with pytest.raises(ValueError, match="config\\.serving must be a mapping"): + V2Config.from_root_dict({**base, "serving": ["nope"]}) + + with pytest.raises(ValueError, match="config\\.serving\\.\\{serve,llm\\} must be mappings"): + V2Config.from_root_dict({**base, "serving": {"serve": ["x"], "llm": {}}}) + def test_tracking_wandb_defaults_disabled(): from argus.service.config import V2Config diff --git a/src/mvp/py/tests/test_serving_model_id_prefix.py b/src/mvp/py/tests/test_serving_model_id_prefix.py new file mode 100644 index 0000000..a7790d5 --- /dev/null +++ b/src/mvp/py/tests/test_serving_model_id_prefix.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest + +from argus.service.serving_spec import make_model_id_prefix + + +def test_make_model_id_prefix_uses_utc_minutes(): + dt = datetime(2026, 1, 6, 12, 35, 59, tzinfo=timezone.utc) + assert make_model_id_prefix(user_id="alice", now_utc=dt) == "alice-202601061235" + + +def test_make_model_id_prefix_rejects_empty_user_id(): + with pytest.raises(ValueError, match="user_id is required"): + make_model_id_prefix(user_id="", now_utc=datetime.now(timezone.utc)) + + +def test_make_model_id_prefix_rejects_slash(): + with pytest.raises(ValueError, match="must not contain"): + make_model_id_prefix(user_id="bad/user", now_utc=datetime.now(timezone.utc)) + diff --git a/src/mvp/py/tests/test_serving_reconciler.py b/src/mvp/py/tests/test_serving_reconciler.py new file mode 100644 index 0000000..0859f20 --- /dev/null +++ b/src/mvp/py/tests/test_serving_reconciler.py @@ -0,0 +1,207 @@ +from __future__ import annotations + +import json +from pathlib import Path + + +class _FakeServeClient: + def __init__(self): + self.started = 0 + self.applied = [] + self.status_calls = 0 + self.fail_apply = False + self.fail_status = False + + def ensure_started(self) -> None: + self.started += 1 + + def apply_app(self, *, app, app_name: str, route_prefix: str = "/"): + if self.fail_apply: + raise RuntimeError("boom") + self.applied.append({"app": app, "app_name": app_name, "route_prefix": route_prefix}) + return {"ok": True} + + def get_status(self): + self.status_calls += 1 + if self.fail_status: + raise RuntimeError("status boom") + return {"ok": True} + + +def _seed_model(db, *, model_key: str, user_id: str, state: str, num_replicas: int = 1, gpus_per_replica: int = 1): + spec_yaml = "model_id: x\nmodel_source: $HOME/common/hf/x\n" + resolved_yaml = f"user_id: {user_id}\nmodel_id: {user_id}-202601061235-x\n" + db.create_serve_model( + model_key=model_key, + user_id=user_id, + model_id_suffix="x", + model_id_prefix=f"{user_id}-202601061235", + model_id=f"{user_id}-202601061235-x", + model_source="/private/hf/x", + num_replicas=num_replicas, + gpus_per_replica=gpus_per_replica, + engine_kwargs_json=json.dumps({"gpu_memory_utilization": 0.9}), + spec_yaml=spec_yaml, + resolved_spec_yaml=resolved_yaml, + ) + db.set_serve_model_state(model_key=model_key, state=state, event_type="TEST_SEED") + + +def test_reconciler_skips_when_no_changes(tmp_path: Path): + from argus.service.config import V2Config + from argus.service.db import Db + from argus.service.serving_reconciler import ServingReconciler + + root = { + "ray": {"shared_root": "/private"}, + "service": {"api": {}, "auth": {}, "sqlite": {"db_path": str(tmp_path / "mvp.sqlite3")}, "scheduler": {}}, + "data": {"sftpgo": {}, "retention": {}}, + "serving": {"serve": {"http_port": 8000}, "llm": {"accelerator_type": "H20"}}, + } + cfg = V2Config.from_root_dict(root) + db = Db(cfg.sqlite.db_path) + db.init() + + client = _FakeServeClient() + rec = ServingReconciler(db=db, v2_cfg=cfg, ray_runtime_env_env_vars={}, serve_client=client, get_available_fn=lambda: type("A", (), {"total_available_gpus": 8, "total_available_npus": 0})()) + rec.tick() + assert client.started == 0 + assert client.applied == [] + + +def test_reconciler_pending_resources_no_apply(tmp_path: Path): + from argus.service.config import V2Config + from argus.service.db import Db + from argus.service.serving_reconciler import ServingReconciler + + cfg = V2Config.from_root_dict( + { + "ray": {"shared_root": "/private"}, + "service": {"api": {}, "auth": {}, "sqlite": {"db_path": str(tmp_path / "mvp.sqlite3")}, "scheduler": {}}, + "data": {"sftpgo": {}, "retention": {}}, + "serving": {"serve": {"http_port": 8000}, "llm": {"accelerator_type": "H20"}}, + } + ) + db = Db(cfg.sqlite.db_path) + db.init() + _seed_model(db, model_key="mk1", user_id="alice", state="QUEUED", num_replicas=2, gpus_per_replica=4) + + client = _FakeServeClient() + rec = ServingReconciler( + db=db, + v2_cfg=cfg, + ray_runtime_env_env_vars={}, + serve_client=client, + get_available_fn=lambda: type("A", (), {"total_available_gpus": 1, "total_available_npus": 0})(), + ) + rec.tick() + # Serve may be started even when resources are insufficient, but apply should not happen. + assert client.started == 1 + assert client.applied == [] + # State remains QUEUED. + row = db.get_serve_model("mk1") + assert row and row["state"] == "QUEUED" + ev = db.list_serve_events("mk1", limit=50) + assert any(e["event_type"] == "SERVE_PENDING_RESOURCES" for e in ev) + + +def test_reconciler_apply_success_marks_running(tmp_path: Path): + from argus.service.config import V2Config + from argus.service.db import Db + from argus.service.serving_reconciler import ServingReconciler + + cfg = V2Config.from_root_dict( + { + "ray": {"shared_root": "/private"}, + "service": {"api": {}, "auth": {}, "sqlite": {"db_path": str(tmp_path / "mvp.sqlite3")}, "scheduler": {}}, + "data": {"sftpgo": {}, "retention": {}}, + "serving": {"serve": {"http_port": 8000}, "llm": {"accelerator_type": "H20"}}, + } + ) + db = Db(cfg.sqlite.db_path) + db.init() + _seed_model(db, model_key="mk1", user_id="alice", state="QUEUED", num_replicas=1, gpus_per_replica=1) + + client = _FakeServeClient() + rec = ServingReconciler( + db=db, + v2_cfg=cfg, + ray_runtime_env_env_vars={"HF_ENDPOINT": "https://hf-mirror.com"}, + serve_client=client, + get_available_fn=lambda: type("A", (), {"total_available_gpus": 8, "total_available_npus": 0})(), + ) + rec.tick() + assert client.started == 1 + assert len(client.applied) == 1 + applied = client.applied[0]["app"]["llm_configs"] + assert applied[0]["engine_kwargs"]["tensor_parallel_size"] == 1 + assert applied[0]["runtime_env"]["env_vars"]["HF_HUB_OFFLINE"] == "1" + row = db.get_serve_model("mk1") + assert row and row["state"] == "RUNNING" + + +def test_reconciler_delete_removes_and_marks_deleted(tmp_path: Path): + from argus.service.config import V2Config + from argus.service.db import Db + from argus.service.serving_reconciler import ServingReconciler + + cfg = V2Config.from_root_dict( + { + "ray": {"shared_root": "/private"}, + "service": {"api": {}, "auth": {}, "sqlite": {"db_path": str(tmp_path / "mvp.sqlite3")}, "scheduler": {}}, + "data": {"sftpgo": {}, "retention": {}}, + "serving": {"serve": {"http_port": 8000}, "llm": {"accelerator_type": "H20"}}, + } + ) + db = Db(cfg.sqlite.db_path) + db.init() + _seed_model(db, model_key="keep", user_id="alice", state="RUNNING", num_replicas=1, gpus_per_replica=1) + _seed_model(db, model_key="del", user_id="alice", state="DELETING", num_replicas=1, gpus_per_replica=1) + + client = _FakeServeClient() + rec = ServingReconciler( + db=db, + v2_cfg=cfg, + ray_runtime_env_env_vars={}, + serve_client=client, + get_available_fn=lambda: type("A", (), {"total_available_gpus": 8, "total_available_npus": 0})(), + ) + rec.tick() + assert len(client.applied) == 1 + cfgs = client.applied[0]["app"]["llm_configs"] + assert {c["model_loading_config"]["model_id"] for c in cfgs} == {"alice-202601061235-x"} # only keep remains + row = db.get_serve_model("del") + assert row and row["state"] == "DELETED" + assert row.get("deleted_at") + + +def test_reconciler_apply_failure_marks_failed(tmp_path: Path): + from argus.service.config import V2Config + from argus.service.db import Db + from argus.service.serving_reconciler import ServingReconciler + + cfg = V2Config.from_root_dict( + { + "ray": {"shared_root": "/private"}, + "service": {"api": {}, "auth": {}, "sqlite": {"db_path": str(tmp_path / "mvp.sqlite3")}, "scheduler": {}}, + "data": {"sftpgo": {}, "retention": {}}, + "serving": {"serve": {"http_port": 8000}, "llm": {"accelerator_type": "H20"}}, + } + ) + db = Db(cfg.sqlite.db_path) + db.init() + _seed_model(db, model_key="mk1", user_id="alice", state="QUEUED") + + client = _FakeServeClient() + client.fail_apply = True + rec = ServingReconciler( + db=db, + v2_cfg=cfg, + ray_runtime_env_env_vars={}, + serve_client=client, + get_available_fn=lambda: type("A", (), {"total_available_gpus": 8, "total_available_npus": 0})(), + ) + rec.tick() + row = db.get_serve_model("mk1") + assert row and row["state"] == "FAILED" + assert row.get("error_summary") diff --git a/src/mvp/py/tests/test_serving_spec_paths.py b/src/mvp/py/tests/test_serving_spec_paths.py new file mode 100644 index 0000000..2e6e903 --- /dev/null +++ b/src/mvp/py/tests/test_serving_spec_paths.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest + +from argus.service.serving_spec import ServingSpec, resolve_serving_spec + + +def test_expand_home_macro_and_validate_user_path_ok(): + spec = ServingSpec( + model_id="qwen-0.5b", + model_source="$HOME/models/my_model", + num_replicas=1, + gpus_per_replica=1, + ) + r = resolve_serving_spec(spec=spec, user_id="alice", now_utc=datetime(2026, 1, 6, 12, 35, tzinfo=timezone.utc)) + assert r.model_source == "/private/users/alice/models/my_model" + assert r.model_id == "alice-202601061235-qwen-0.5b" + + +def test_expand_common_hf_macro_ok(): + spec = ServingSpec( + model_id="qwen-0.5b", + model_source="$HOME/common/hf/hub/models--Qwen--Qwen2.5/snapshots/abc", + num_replicas=1, + gpus_per_replica=1, + ) + r = resolve_serving_spec(spec=spec, user_id="alice", now_utc=datetime(2026, 1, 6, 12, 35, tzinfo=timezone.utc)) + assert r.model_source.startswith("/private/hf/") + + +@pytest.mark.parametrize( + "src", + [ + "/etc/passwd", + "relative/path", + "/private/users/bob/models/x", + "/private/users/alice/../bob/x", + "/private/common/hf/x", + ], +) +def test_model_source_path_rejected(src: str): + spec = ServingSpec(model_id="qwen-0.5b", model_source=src, num_replicas=1, gpus_per_replica=1) + with pytest.raises((ValueError, PermissionError)): + resolve_serving_spec(spec=spec, user_id="alice", now_utc=datetime(2026, 1, 6, 12, 35, tzinfo=timezone.utc)) + diff --git a/src/mvp/py/tests/test_serving_spec_validation.py b/src/mvp/py/tests/test_serving_spec_validation.py new file mode 100644 index 0000000..066cc45 --- /dev/null +++ b/src/mvp/py/tests/test_serving_spec_validation.py @@ -0,0 +1,72 @@ +from __future__ import annotations + +import pytest + +from argus.service.serving_spec import ServingSpec, parse_serving_spec, validate_model_id_suffix + + +@pytest.mark.parametrize( + "suffix", + [ + "a", + "qwen-0.5b", + "Qwen2.5-0.5B", + "a_b", + "a.b-c", + "a" * 64, + ], +) +def test_validate_model_id_suffix_accepts(suffix: str): + validate_model_id_suffix(suffix) + + +@pytest.mark.parametrize( + "suffix", + [ + "", + " a", + "a ", + "-bad", + ".bad", + "bad/", + "bad..", + "bad\n", + "bad\t", + "a" * 65, + ], +) +def test_validate_model_id_suffix_rejects(suffix: str): + with pytest.raises(ValueError): + validate_model_id_suffix(suffix) + + +def test_parse_serving_spec_smoke_defaults(): + spec = parse_serving_spec( + { + "model_id": "qwen-0.5b", + "model_source": "/private/hf/x", + } + ) + assert isinstance(spec, ServingSpec) + assert spec.num_replicas == 1 + assert spec.gpus_per_replica == 1 + assert spec.engine_kwargs is None + + +def test_parse_serving_spec_rejects_missing_fields(): + with pytest.raises(ValueError, match="missing required field: model_id"): + parse_serving_spec({"model_source": "/private/hf/x"}) + with pytest.raises(ValueError, match="missing required field: model_source"): + parse_serving_spec({"model_id": "x"}) + + +def test_parse_serving_spec_rejects_bad_types(): + with pytest.raises(ValueError, match="serving spec must be a mapping"): + parse_serving_spec(["nope"]) + with pytest.raises(ValueError, match="num_replicas"): + parse_serving_spec({"model_id": "x", "model_source": "/private/hf/x", "num_replicas": 0}) + with pytest.raises(ValueError, match="gpus_per_replica"): + parse_serving_spec({"model_id": "x", "model_source": "/private/hf/x", "gpus_per_replica": 0}) + with pytest.raises(ValueError, match="engine_kwargs"): + parse_serving_spec({"model_id": "x", "model_source": "/private/hf/x", "engine_kwargs": "nope"}) + diff --git a/src/mvp/py/tests/test_ui.py b/src/mvp/py/tests/test_ui.py index 7b86cf7..111f4a6 100644 --- a/src/mvp/py/tests/test_ui.py +++ b/src/mvp/py/tests/test_ui.py @@ -42,10 +42,13 @@ def test_ui_routes_render_200(tmp_path, monkeypatch): "/ui/login", "/ui/tasks", "/ui/tasks/new", + "/ui/serving", + "/ui/serving/new", "/ui/data", "/ui/admin", "/ui/tasks/any-task-id", "/ui/tasks/any-task-id/logs", + "/ui/serving/any-model-key", ): r = c.get(path, allow_redirects=True) assert r.status_code == 200 @@ -60,7 +63,7 @@ def test_ui_contains_sidebar_links(tmp_path, monkeypatch): r = c.get("/ui/tasks") assert r.status_code == 200 - for link in ("/ui/tasks", "/ui/tasks/new", "/ui/data", "/ui/login", "/ui/admin"): + for link in ("/ui/tasks", "/ui/tasks/new", "/ui/serving", "/ui/data", "/ui/login", "/ui/admin"): assert link in r.text assert "Ray Dashboard" in r.text diff --git a/src/mvp/py/tests/test_ui_serving.py b/src/mvp/py/tests/test_ui_serving.py new file mode 100644 index 0000000..8b022cd --- /dev/null +++ b/src/mvp/py/tests/test_ui_serving.py @@ -0,0 +1,56 @@ +from __future__ import annotations + +from pathlib import Path + +from fastapi.testclient import TestClient + +from argus.service.app import create_app + + +def _write_config(tmp_path: Path) -> Path: + p = tmp_path / "cfg.yaml" + p.write_text( + """ +ray: + address: "http://127.0.0.1:8265" + shared_root: "/private" + entrypoint_num_cpus: 1 + entrypoint_resources: { worker_node: 1 } + runtime_env: { env_vars: { PYTHONUNBUFFERED: "1" } } +service: + api: { host: "127.0.0.1", port: 8080 } + auth: { token_env: "MVP_INTERNAL_TOKEN" } + sqlite: { db_path: "%(db)s" } +data: + user_root: "%(users)s" + sftpgo: { enabled: false } + retention: { jobs_trash_after_days: 3, jobs_purge_after_days: 7, janitor_interval_s: 3600 } +serving: {} +""" + % {"db": str(tmp_path / "mvp.sqlite3"), "users": str(tmp_path / "users")} + ) + return p + + +def test_ui_serving_pages_render(tmp_path, monkeypatch): + cfg = _write_config(tmp_path) + monkeypatch.setenv("MVP_INTERNAL_TOKEN", "admin-token") + app = create_app(str(cfg)) + c = TestClient(app) + + for path in ("/ui/serving", "/ui/serving/new", "/ui/serving/any-model-key"): + r = c.get(path) + assert r.status_code == 200 + assert "/dev/null 2>&1; then + if ! docker image inspect "${RAY_NODE_IMAGE}" >/dev/null 2>&1; then BUILD="1" fi fi diff --git a/src/mvp/scripts/debug_serve_llm_smoke.sh b/src/mvp/scripts/debug_serve_llm_smoke.sh new file mode 100644 index 0000000..2269164 --- /dev/null +++ b/src/mvp/scripts/debug_serve_llm_smoke.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +set -euo pipefail + +container="${MVP_HEAD_CONTAINER:-argus-ray-head}" +model_source="${MODEL_SOURCE:-}" +if [[ -n "${1:-}" ]]; then + model_source="$1" +fi + +argv=(python3 /workspace/mvp/scripts/serve_llm_smoke.py) +if [[ -n "${model_source}" ]]; then + argv+=(--model-source "${model_source}") +fi +argv+=(--accelerator-type "${ARGUS_ACCELERATOR_TYPE:-H20}") + +echo "[host] run Ray Serve LLM smoke test in container: ${container}" >&2 +docker exec -it "${container}" bash -lc "$(printf '%q ' "${argv[@]}")" + diff --git a/src/mvp/scripts/run_all_v38_serving.sh b/src/mvp/scripts/run_all_v38_serving.sh new file mode 100755 index 0000000..2c6f172 --- /dev/null +++ b/src/mvp/scripts/run_all_v38_serving.sh @@ -0,0 +1,193 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=lib.sh +source "${SCRIPT_DIR}/lib.sh" + +API_ADDR="${API_ADDR:-http://127.0.0.1:8080}" +OPENAI_BASE_URL="${OPENAI_BASE_URL:-http://127.0.0.1:8000/v1}" +ADMIN_TOKEN="${MVP_INTERNAL_TOKEN:-}" +USER_ID="${USER_ID:-alice}" +EXPECTED_RAY_NODES="${EXPECTED_RAY_NODES:-3}" # head + 2 workers + +CONFIG_IN_CONTAINER="${CONFIG_IN_CONTAINER:-/workspace/mvp/configs/dev.yaml}" +SFTPGO_ADMIN_PASSWORD="${SFTPGO_ADMIN_PASSWORD:-my-dev-sftpgo-admin}" +export SFTPGO_ADMIN_PASSWORD + +if [[ -z "${ADMIN_TOKEN}" ]]; then + echo "ERROR: MVP_INTERNAL_TOKEN must be set in host env (admin token)" >&2 + exit 1 +fi + +api_curl_admin() { + curl -sS -H "Authorization: Bearer ${ADMIN_TOKEN}" "$@" +} + +api_wait_ready() { + local tries="${1:-60}" + for i in $(seq 1 "${tries}"); do + if curl -sS -m 2 "${API_ADDR}/docs" >/dev/null 2>&1; then + echo "[host] api_ready: ${API_ADDR}" + return 0 + fi + echo "[host] waiting api... (${i}/${tries})" + sleep 2 + done + echo "ERROR: api not ready: ${API_ADDR}" >&2 + return 1 +} + +ray_wait_ready() { + local tries="${1:-60}" + for i in $(seq 1 "${tries}"); do + if curl -sS -m 2 "${RAY_DASHBOARD_ADDR}/api/version" >/dev/null 2>&1; then + echo "[host] ray_dashboard_ready: ${RAY_DASHBOARD_ADDR}" + return 0 + fi + echo "[host] waiting ray dashboard... (${i}/${tries})" + sleep 2 + done + echo "ERROR: ray dashboard not ready: ${RAY_DASHBOARD_ADDR}" >&2 + return 1 +} + +ray_wait_nodes() { + local want="${1:-3}" + local tries="${2:-60}" + for i in $(seq 1 "${tries}"); do + local out n + out="$(docker exec -i "${HEAD_CONTAINER}" python3 -c "import ray; ray.init(address='auto', ignore_reinit_error=True, log_to_driver=False, logging_level='ERROR'); print(sum(1 for n in ray.nodes() if n.get('Alive')))" 2>/dev/null || true)" + n="$(printf '%s\n' "${out}" | tail -n 1 | tr -cd '0-9' || true)" + if [[ "${n}" =~ ^[0-9]+$ ]]; then + echo "[host] ray_nodes_alive=${n} (want>=${want})" + if [[ "${n}" -ge "${want}" ]]; then + return 0 + fi + else + echo "[host] waiting ray nodes... (${i}/${tries})" + fi + sleep 2 + done + echo "ERROR: ray nodes not ready (want>=${want})" >&2 + docker exec -i "${HEAD_CONTAINER}" bash -lc "ray status || true" >&2 || true + return 1 +} + +openai_wait_ready() { + local tries="${1:-120}" + for i in $(seq 1 "${tries}"); do + if curl -sS -m 2 "${OPENAI_BASE_URL}/models" >/dev/null 2>&1; then + echo "[host] openai_ready: ${OPENAI_BASE_URL}" + return 0 + fi + echo "[host] waiting openai... (${i}/${tries})" + sleep 2 + done + echo "ERROR: openai not ready: ${OPENAI_BASE_URL}" >&2 + return 1 +} + +wait_model_state() { + local token="$1" + local model_key="$2" + local want="$3" + local tries="${4:-120}" + for i in $(seq 1 "${tries}"); do + local body state + body="$(curl -sS -H "Authorization: Bearer ${token}" "${API_ADDR}/api/v2/serve/models/${model_key}")" + state="$(printf '%s' "${body}" | python3 -c 'import sys,json; print(json.load(sys.stdin)["model"]["state"])' 2>/dev/null || true)" + echo "[host] model ${model_key}: ${state}" + if [[ "${state}" == "${want}" ]]; then + return 0 + fi + if [[ "${state}" == "FAILED" ]]; then + echo "[host] model failed; detail:" >&2 + printf '%s\n' "${body}" | python3 -m json.tool >&2 || true + return 1 + fi + sleep 2 + done + echo "ERROR: model not in state ${want} after timeout" >&2 + return 1 +} + +echo "[host] ===== run_all_v38_serving.sh begin =====" + +"${SCRIPT_DIR}/00_prereq_check.sh" +"${SCRIPT_DIR}/03_cleanup_v1_legacy.sh" +"${SCRIPT_DIR}/04_cleanup_v2_legacy.sh" + +echo "[host] bring down existing containers (best-effort)" +"${SCRIPT_DIR}/02_down.sh" || true + +echo "[host] (re)create containers (Ray + SFTPGo + W&B)" +# For v3.8, we need the latest ray-node image (ray[llm] deps). Force build once. +BUILD="${BUILD:-1}" "${SCRIPT_DIR}/01_up.sh" + +echo "[host] wait ray ready" +ray_wait_ready 60 +ray_wait_nodes "${EXPECTED_RAY_NODES}" 120 + +echo "[host] prepare data/model (best-effort; uses shared caches)" +"${SCRIPT_DIR}/30_prepare_data_and_model.sh" || true + +echo "[host] start api" +CONFIG_IN_CONTAINER="${CONFIG_IN_CONTAINER}" MVP_INTERNAL_TOKEN="${ADMIN_TOKEN}" "${SCRIPT_DIR}/60_start_api.sh" +api_wait_ready 60 + +echo "[host] create user (idempotent)" +api_curl_admin -X POST "${API_ADDR}/api/v2/users" -H "Content-Type: application/json" --data-binary "{\"user_id\":\"${USER_ID}\"}" >/dev/null || true + +echo "[host] issue user token" +USER_TOKEN="$(api_curl_admin -X POST "${API_ADDR}/api/v2/users/${USER_ID}/tokens" | python3 -c 'import sys,json; print(json.load(sys.stdin)["token"])')" + +echo "[host] resolve local model snapshot path (offline)" +LOCAL_MODEL_PATH="$(dexec "${HEAD_CONTAINER}" bash -lc "python3 -c \"import os; from huggingface_hub import snapshot_download; os.environ.setdefault('HF_HOME','/private/hf'); print(snapshot_download(repo_id='Qwen/Qwen2.5-0.5B-Instruct', local_files_only=True))\" " | tail -n 1)" +if [[ -z "${LOCAL_MODEL_PATH}" || "${LOCAL_MODEL_PATH}" != /* ]]; then + echo "ERROR: failed to resolve LOCAL_MODEL_PATH: ${LOCAL_MODEL_PATH}" >&2 + exit 1 +fi +echo "[host] local_model_path: ${LOCAL_MODEL_PATH}" + +echo "[host] submit serving model via API" +SERVE_SPEC=$'model_id: qwen-0.5b\nmodel_source: '"${LOCAL_MODEL_PATH}"$'\nnum_replicas: 1\ngpus_per_replica: 1\n' +CREATE_RESP="$(curl -sS -H "Authorization: Bearer ${USER_TOKEN}" -H "Content-Type: application/yaml" --data-binary "${SERVE_SPEC}" "${API_ADDR}/api/v2/serve/models")" +echo "[host] create_model_resp: ${CREATE_RESP}" +MODEL_KEY="$(printf '%s' "${CREATE_RESP}" | python3 -c 'import sys,json; print(json.load(sys.stdin)["model_key"])')" + +echo "[host] wait model RUNNING" +wait_model_state "${USER_TOKEN}" "${MODEL_KEY}" "RUNNING" 300 + +echo "[host] wait OpenAI ingress ready" +openai_wait_ready 120 + +echo "[host] verify /v1/models contains model" +MODEL_ID="$( + curl -sS "${OPENAI_BASE_URL}/models" \ + | python3 -c 'import sys,json; obj=json.load(sys.stdin); print("\n".join([m.get("id","") for m in obj.get("data",[]) if isinstance(m,dict)]))' \ + | grep -E "^${USER_ID}-[0-9]{12}-qwen-0\\.5b$" \ + | head -n1 \ + || true +)" +if [[ -z "${MODEL_ID}" ]]; then + echo "ERROR: model id not found in /v1/models" >&2 + curl -sS "${OPENAI_BASE_URL}/models" | python3 -m json.tool >&2 || true + exit 1 +fi +echo "[host] model_id: ${MODEL_ID}" + +echo "[host] chat completion (best-effort)" +CHAT_RESP="$(curl -sS -H "Content-Type: application/json" -H "Authorization: Bearer FAKE_KEY" -X POST "${OPENAI_BASE_URL}/chat/completions" --data-binary "{\"model\":\"${MODEL_ID}\",\"messages\":[{\"role\":\"user\",\"content\":\"hello\"}],\"max_tokens\":16,\"stream\":false}")" +printf '%s\n' "${CHAT_RESP}" | python3 -m json.tool >/dev/null 2>&1 || { + echo "ERROR: invalid chat response" >&2 + printf '%s\n' "${CHAT_RESP}" >&2 + exit 1 +} +echo "[host] chat_ok" + +echo "[host] delete model" +curl -sS -H "Authorization: Bearer ${USER_TOKEN}" -X DELETE "${API_ADDR}/api/v2/serve/models/${MODEL_KEY}" >/dev/null +wait_model_state "${USER_TOKEN}" "${MODEL_KEY}" "DELETED" 300 + +echo "[host] ===== run_all_v38_serving.sh done =====" diff --git a/src/mvp/scripts/serve_llm_smoke.py b/src/mvp/scripts/serve_llm_smoke.py new file mode 100644 index 0000000..17b0e61 --- /dev/null +++ b/src/mvp/scripts/serve_llm_smoke.py @@ -0,0 +1,102 @@ +from __future__ import annotations + +import argparse +import json +import os +import time +import urllib.request +from pathlib import Path +from typing import Any + + +def _pick_qwen_snapshot() -> str | None: + base = Path("/private/hf/hub/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots") + if not base.exists(): + return None + snaps = sorted([p for p in base.iterdir() if p.is_dir()], reverse=True) + return str(snaps[0]) if snaps else None + + +def _http_get_json(url: str) -> Any: + with urllib.request.urlopen(url, timeout=10) as resp: + raw = resp.read().decode("utf-8") + return json.loads(raw) + + +def _wait_http_json(url: str, *, timeout_s: int) -> Any: + deadline = time.time() + float(timeout_s) + last_err: Exception | None = None + while time.time() < deadline: + try: + return _http_get_json(url) + except Exception as e: + last_err = e + time.sleep(2) + raise RuntimeError(f"timeout waiting for {url}: {last_err!r}") + + +def main(argv: list[str] | None = None) -> int: + ap = argparse.ArgumentParser(description="Ray Serve LLM smoke test (deploy + /v1/models probe).") + ap.add_argument("--ray-address", default="auto") + ap.add_argument("--http-port", type=int, default=8000) + ap.add_argument("--app-name", default="argus_llm_smoke") + ap.add_argument("--route-prefix", default="/") + ap.add_argument("--accelerator-type", default=os.environ.get("ARGUS_ACCELERATOR_TYPE") or "H20") + ap.add_argument("--model-id", default="smoke-qwen-0.5b") + ap.add_argument("--model-source", default=None, help="Local path or HF id. Default: cached Qwen snapshot under /private/hf.") + ap.add_argument("--tensor-parallel-size", type=int, default=1) + ap.add_argument("--num-replicas", type=int, default=1) + ap.add_argument("--wait-s", type=int, default=600) + args = ap.parse_args(argv) + + model_source = str(args.model_source or _pick_qwen_snapshot() or "") + if not model_source: + raise SystemExit("missing --model-source and no cached Qwen snapshot found under /private/hf") + + # Force offline HF behavior for the smoke test. + os.environ.setdefault("HF_HOME", "/private/hf") + os.environ.setdefault("HUGGINGFACE_HUB_CACHE", "/private/hf/hub") + os.environ.setdefault("TRANSFORMERS_CACHE", "/private/hf/transformers") + os.environ["HF_HUB_OFFLINE"] = "1" + + import ray + + ray.init(address=str(args.ray_address), ignore_reinit_error=True, log_to_driver=False) + + from ray import serve + + try: + serve.start(proxy_location="HeadOnly", http_options={"host": "0.0.0.0", "port": int(args.http_port)}) + except Exception: + # Best-effort: Serve may already be running in the container (e.g., started by the MVP API scheduler). + pass + + from ray.serve.llm import LLMConfig, build_openai_app + + # Build a config dict and filter by the current Ray's LLMConfig schema, since fields + # may differ between Ray versions. + cfg_dict: dict[str, Any] = { + "model_loading_config": {"model_id": str(args.model_id), "model_source": model_source}, + "accelerator_type": str(args.accelerator_type), + "deployment_config": {"num_replicas": int(args.num_replicas)}, + "engine_kwargs": {"tensor_parallel_size": int(args.tensor_parallel_size)}, + "runtime_env": {"env_vars": {"HF_HUB_OFFLINE": "1", "HF_HOME": "/private/hf"}}, + } + allowed = set(getattr(LLMConfig, "model_fields", {}).keys()) + if allowed: + cfg_dict = {k: v for k, v in cfg_dict.items() if k in allowed} + + llm_cfg = LLMConfig(**cfg_dict) + app = build_openai_app({"llm_configs": [llm_cfg]}) + + serve.run(app, name=str(args.app_name), route_prefix=str(args.route_prefix)) + + models_url = f"http://127.0.0.1:{int(args.http_port)}/v1/models" + payload = _wait_http_json(models_url, timeout_s=int(args.wait_s)) + print(json.dumps(payload, indent=2, sort_keys=True)) + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main())