argus-cluster/src/mvp/images/argus-ray-node/Dockerfile

ARG BASE_IMAGE=verlai/verl:vllm011.latest
FROM ${BASE_IMAGE}

SHELL ["/bin/bash", "-lc"]

# Install supervisord (prefer pip to avoid relying on distro package manager).
RUN python3 -m pip install --no-cache-dir supervisor

# v3.8: Ray Serve LLM deps (keep Ray version pinned to what's already in the base image).
# NOTE: base image already includes Ray; we only add extras.
RUN RAY_VER="$(python3 -c 'import ray; print(ray.__version__)')" && \
    python3 -m pip install --no-cache-dir "ray[serve,llm]==${RAY_VER}"
# Ray Serve LLM's import chain currently pulls in ray.rllib which requires extra deps.
# Install them explicitly to make `from ray.serve.llm import ...` work reliably.
RUN python3 -m pip install --no-cache-dir gymnasium dm-tree && \
    python3 -c "from ray.serve.llm import LLMConfig, build_openai_app; print('ray_serve_llm_ok')"

RUN mkdir -p /opt/argus/py/argus/ray

# Minimal embedded code for stateless pool (API code is intentionally excluded).
COPY py/argus/__init__.py /opt/argus/py/argus/__init__.py
COPY py/argus/ray/__init__.py /opt/argus/py/argus/ray/__init__.py
COPY py/argus/ray/discovery.py /opt/argus/py/argus/ray/discovery.py
COPY py/argus/ray/head_publisher.py /opt/argus/py/argus/ray/head_publisher.py
COPY py/argus/ray/worker_watchdog.py /opt/argus/py/argus/ray/worker_watchdog.py

COPY images/argus-ray-node/entrypoint.sh /usr/local/bin/argus-entrypoint.sh
COPY images/argus-ray-node/argus-head-ray.sh /usr/local/bin/argus-head-ray.sh
COPY images/argus-ray-node/argus-head-publisher.sh /usr/local/bin/argus-head-publisher.sh
COPY images/argus-ray-node/argus-worker-watchdog.sh /usr/local/bin/argus-worker-watchdog.sh
RUN chmod +x /usr/local/bin/argus-entrypoint.sh /usr/local/bin/argus-head-ray.sh /usr/local/bin/argus-head-publisher.sh /usr/local/bin/argus-worker-watchdog.sh

ENV PYTHONUNBUFFERED=1

ENTRYPOINT ["/usr/local/bin/argus-entrypoint.sh"]