argus-cluster/src/mvp/py/argus/ray/head_publisher.py

66 lines
1.9 KiB
Python

from __future__ import annotations
import argparse
import time
from .discovery import build_head_record, write_head_record_atomic
def publish_once(
*,
cluster_name: str,
head_ip_file: str,
head_ip: str,
gcs_port: int,
dashboard_port: int,
ttl_s: int,
) -> None:
rec = build_head_record(
cluster_name=cluster_name,
head_ip=head_ip,
gcs_port=gcs_port,
dashboard_port=dashboard_port,
ttl_s=ttl_s,
)
write_head_record_atomic(head_ip_file, rec)
def main(argv: list[str] | None = None) -> int:
ap = argparse.ArgumentParser(description="Publish Ray head address to shared storage (head.json).")
ap.add_argument("--cluster-name", required=True)
ap.add_argument("--head-ip-file", required=True)
ap.add_argument("--head-ip", required=True)
ap.add_argument("--gcs-port", type=int, default=6379)
ap.add_argument("--dashboard-port", type=int, default=8265)
ap.add_argument("--ttl-s", type=int, default=60)
ap.add_argument("--refresh-s", type=int, default=10)
ap.add_argument("--once", action="store_true", help="Write once then exit (for testing/debug).")
args = ap.parse_args(argv)
if args.once:
publish_once(
cluster_name=args.cluster_name,
head_ip_file=args.head_ip_file,
head_ip=args.head_ip,
gcs_port=args.gcs_port,
dashboard_port=args.dashboard_port,
ttl_s=args.ttl_s,
)
return 0
refresh_s = max(1, int(args.refresh_s))
while True:
publish_once(
cluster_name=args.cluster_name,
head_ip_file=args.head_ip_file,
head_ip=args.head_ip,
gcs_port=args.gcs_port,
dashboard_port=args.dashboard_port,
ttl_s=args.ttl_s,
)
time.sleep(refresh_s)
if __name__ == "__main__":
raise SystemExit(main())