#!/usr/bin/env bash set -euo pipefail show_help() { cat <<'EOF' ARGUS Unified Build System - Image Build Tool Usage: $0 [OPTIONS] Options: --intranet Use intranet mirror for log/bind builds --master-offline Build master offline image (requires src/master/offline_wheels.tar.gz) --metric Build metric module images (ftp, prometheus, grafana, test nodes) --no-cache Build all images without using Docker layer cache --only LIST Comma-separated targets to build: core,master,metric,web,alert,sys,gpu_bundle,server_pkg,client_pkg,all --version DATE Date tag used by gpu_bundle/server_pkg/client_pkg (e.g. 20251112) --client-semver X.Y.Z Override client semver used in all-in-one-full artifact (optional) --cuda VER CUDA runtime version for NVIDIA base (default: 12.2.2) -h, --help Show this help message Examples: $0 # Build with default sources $0 --intranet # Build with intranet mirror $0 --master-offline # Additionally build argus-master:offline $0 --metric # Additionally build metric module images $0 --intranet --master-offline --metric EOF } use_intranet=false build_core=true build_master=true build_master_offline=false build_metric=true build_web=true build_alert=true build_sys=true build_gpu_bundle=false build_server_pkg=false build_client_pkg=false no_cache=false bundle_date="" client_semver="" cuda_ver="12.2.2" while [[ $# -gt 0 ]]; do case $1 in --intranet) use_intranet=true shift ;; --master) build_master=true shift ;; --master-offline) build_master=true build_master_offline=true shift ;; --metric) build_metric=true shift ;; --no-cache) no_cache=true shift ;; --only) if [[ -z ${2:-} ]]; then echo "--only requires a target list" >&2; exit 1 fi sel="$2"; shift 2 # reset all, then enable selected build_core=false; build_master=false; build_metric=false; build_web=false; build_alert=false; build_sys=false; build_gpu_bundle=false; build_server_pkg=false; build_client_pkg=false IFS=',' read -ra parts <<< "$sel" for p in "${parts[@]}"; do case "$p" in core) build_core=true ;; master) build_master=true ;; metric) build_metric=true ;; web) build_web=true ;; alert) build_alert=true ;; sys) build_sys=true ;; gpu_bundle) build_gpu_bundle=true ;; server_pkg) build_server_pkg=true; build_core=true; build_master=true; build_metric=true; build_web=true; build_alert=true ;; client_pkg) build_client_pkg=true ;; all) build_core=true; build_master=true; build_metric=true; build_web=true; build_alert=true; build_sys=true ;; *) echo "Unknown --only target: $p" >&2; exit 1 ;; esac done ;; --version) if [[ -z ${2:-} ]]; then echo "--version requires a value like 20251112" >&2; exit 1; fi bundle_date="$2"; shift 2 ;; --client-semver) if [[ -z ${2:-} ]]; then echo "--client-semver requires a value like 1.43.0" >&2; exit 1; fi client_semver="$2"; shift 2 ;; --cuda) if [[ -z ${2:-} ]]; then echo "--cuda requires a value like 12.2.2" >&2; exit 1; fi cuda_ver="$2"; shift 2 ;; -h|--help) show_help exit 0 ;; *) echo "Unknown option: $1" >&2 show_help exit 1 ;; esac done root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" . "$root/scripts/common/build_user.sh" declare -a build_args=() if [[ "$use_intranet" == true ]]; then build_args+=("--build-arg" "USE_INTRANET=true") fi cd "$root" load_build_user build_args+=("--build-arg" "ARGUS_BUILD_UID=${ARGUS_BUILD_UID}" "--build-arg" "ARGUS_BUILD_GID=${ARGUS_BUILD_GID}") if [[ "$no_cache" == true ]]; then build_args+=("--no-cache") fi master_root="$root/src/master" master_offline_tar="$master_root/offline_wheels.tar.gz" master_offline_dir="$master_root/offline_wheels" if [[ "$build_master_offline" == true ]]; then if [[ ! -f "$master_offline_tar" ]]; then echo "❌ offline wheels tar not found: $master_offline_tar" >&2 echo " 请提前准备好 offline_wheels.tar.gz 后再执行 --master-offline" >&2 exit 1 fi echo "📦 Preparing offline wheels for master (extracting $master_offline_tar)" rm -rf "$master_offline_dir" mkdir -p "$master_offline_dir" tar -xzf "$master_offline_tar" -C "$master_root" has_wheel=$(find "$master_offline_dir" -maxdepth 1 -type f -name '*.whl' -print -quit) if [[ -z "$has_wheel" ]]; then echo "❌ offline_wheels extraction failed或无 wheel: $master_offline_dir" >&2 exit 1 fi fi echo "=======================================" echo "ARGUS Unified Build System" echo "=======================================" if [[ "$use_intranet" == true ]]; then echo "🌐 Mode: Intranet (Using internal mirror: 10.68.64.1)" else echo "🌐 Mode: Public (Using default package sources)" fi echo "👤 Build user UID:GID -> ${ARGUS_BUILD_UID}:${ARGUS_BUILD_GID}" echo "📁 Build context: $root" echo "" build_image() { local image_name=$1 local dockerfile_path=$2 local tag=$3 local context="." shift 3 if [[ $# -gt 0 ]]; then context=$1 shift fi local extra_args=("$@") echo "🔄 Building $image_name image..." echo " Dockerfile: $dockerfile_path" echo " Tag: $tag" echo " Context: $context" if docker build "${build_args[@]}" "${extra_args[@]}" -f "$dockerfile_path" -t "$tag" "$context"; then echo "✅ $image_name image built successfully" return 0 else echo "❌ Failed to build $image_name image" return 1 fi } pull_base_image() { local image_ref=$1 local attempts=${2:-3} local delay=${3:-5} # If the image already exists locally, skip pulling. if docker image inspect "$image_ref" >/dev/null 2>&1; then echo " Local image present; skip pull: $image_ref" return 0 fi for ((i=1; i<=attempts; i++)); do echo " Pulling base image ($i/$attempts): $image_ref" if docker pull "$image_ref" >/dev/null; then echo " Base image ready: $image_ref" return 0 fi echo " Pull failed: $image_ref" if (( i < attempts )); then echo " Retrying in ${delay}s..." sleep "$delay" fi done echo "❌ Unable to pull base image after ${attempts} attempts: $image_ref" return 1 } images_built=() build_failed=false build_gpu_bundle_image() { local date_tag="$1" # e.g. 20251112 local cuda_ver_local="$2" # e.g. 12.2.2 local client_ver="$3" # semver like 1.43.0 if [[ -z "$date_tag" ]]; then echo "❌ gpu_bundle requires --version YYMMDD (e.g. 20251112)" >&2 return 1 fi # sanitize cuda version (trim trailing dots like '12.2.') while [[ "$cuda_ver_local" == *"." ]]; do cuda_ver_local="${cuda_ver_local%.}"; done # Resolve effective CUDA base tag local resolve_cuda_base_tag resolve_cuda_base_tag() { local want="$1" # can be 12, 12.2 or 12.2.2 local major minor patch if [[ "$want" =~ ^([0-9]+)\.([0-9]+)\.([0-9]+)$ ]]; then major="${BASH_REMATCH[1]}"; minor="${BASH_REMATCH[2]}"; patch="${BASH_REMATCH[3]}" echo "nvidia/cuda:${major}.${minor}.${patch}-runtime-ubuntu22.04"; return 0 elif [[ "$want" =~ ^([0-9]+)\.([0-9]+)$ ]]; then major="${BASH_REMATCH[1]}"; minor="${BASH_REMATCH[2]}" # try to find best local patch for major.minor local best best=$(docker images --format '{{.Repository}}:{{.Tag}}' nvidia/cuda 2>/dev/null | \ grep -E "^nvidia/cuda:${major}\.${minor}\\.[0-9]+-runtime-ubuntu22\.04$" | \ sed -E 's#^nvidia/cuda:([0-9]+\.[0-9]+\.)([0-9]+)-runtime-ubuntu22\.04$#\1\2#g' | \ sort -V | tail -n1 || true) if [[ -n "$best" ]]; then echo "nvidia/cuda:${best}-runtime-ubuntu22.04"; return 0 fi # fallback patch if none local echo "nvidia/cuda:${major}.${minor}.2-runtime-ubuntu22.04"; return 0 elif [[ "$want" =~ ^([0-9]+)$ ]]; then major="${BASH_REMATCH[1]}" # try to find best local for this major local best best=$(docker images --format '{{.Repository}}:{{.Tag}}' nvidia/cuda 2>/dev/null | \ grep -E "^nvidia/cuda:${major}\\.[0-9]+\\.[0-9]+-runtime-ubuntu22\.04$" | \ sed -E 's#^nvidia/cuda:([0-9]+\.[0-9]+\.[0-9]+)-runtime-ubuntu22\.04$#\1#g' | \ sort -V | tail -n1 || true) if [[ -n "$best" ]]; then echo "nvidia/cuda:${best}-runtime-ubuntu22.04"; return 0 fi echo "nvidia/cuda:${major}.2.2-runtime-ubuntu22.04"; return 0 else # invalid format, fallback to default echo "nvidia/cuda:12.2.2-runtime-ubuntu22.04"; return 0 fi } local base_image base_image=$(resolve_cuda_base_tag "$cuda_ver_local") echo echo "🔧 Preparing one-click GPU bundle build" echo " CUDA runtime base: ${base_image}" echo " Bundle tag : ${date_tag}" # 1) Ensure NVIDIA base image (skip pull if local) if ! pull_base_image "$base_image"; then # try once more with default if resolution failed if ! pull_base_image "nvidia/cuda:12.2.2-runtime-ubuntu22.04"; then return 1 else base_image="nvidia/cuda:12.2.2-runtime-ubuntu22.04" fi fi # 2) Build latest argus-agent from source echo "\n🛠 Building argus-agent from src/agent" pushd "$root/src/agent" >/dev/null if ! bash scripts/build_binary.sh; then echo "❌ argus-agent build failed" >&2 popd >/dev/null return 1 fi if [[ ! -f "dist/argus-agent" ]]; then echo "❌ argus-agent binary missing after build" >&2 popd >/dev/null return 1 fi popd >/dev/null # 3) Inject agent into all-in-one-full plugin and package artifact local aio_root="$root/src/metric/client-plugins/all-in-one-full" local agent_bin_src="$root/src/agent/dist/argus-agent" local agent_bin_dst="$aio_root/plugins/argus-agent/bin/argus-agent" echo "\n📦 Updating all-in-one-full agent binary → $agent_bin_dst" cp -f "$agent_bin_src" "$agent_bin_dst" chmod +x "$agent_bin_dst" || true pushd "$aio_root" >/dev/null local prev_version prev_version="$(cat config/VERSION 2>/dev/null || echo "1.0.0")" local use_version="$prev_version" if [[ -n "$client_semver" ]]; then echo "${client_semver}" > config/VERSION use_version="$client_semver" fi echo " Packaging all-in-one-full artifact version: $use_version" if ! bash scripts/package_artifact.sh --force; then echo "❌ package_artifact.sh failed" >&2 # restore VERSION if changed if [[ -n "$client_semver" ]]; then echo "$prev_version" > config/VERSION; fi popd >/dev/null return 1 fi local artifact_dir="$aio_root/artifact/$use_version" local artifact_tar artifact_tar="$(ls -1 "$artifact_dir"/argus-metric_*.tar.gz 2>/dev/null | head -n1 || true)" if [[ -z "$artifact_tar" ]]; then echo " No argus-metric_*.tar.gz found; invoking publish_artifact.sh to assemble..." local owner="$(id -u):$(id -g)" if ! bash scripts/publish_artifact.sh "$use_version" --output-dir "$artifact_dir" --owner "$owner"; then echo "❌ publish_artifact.sh failed" >&2 if [[ -n "$client_semver" ]]; then echo "$prev_version" > config/VERSION; fi popd >/dev/null return 1 fi artifact_tar="$(ls -1 "$artifact_dir"/argus-metric_*.tar.gz 2>/dev/null | head -n1 || true)" fi if [[ -z "$artifact_tar" ]]; then echo "❌ artifact tar not found under $artifact_dir" >&2 if [[ -n "$client_semver" ]]; then echo "$prev_version" > config/VERSION; fi popd >/dev/null return 1 fi # restore VERSION if changed (keep filesystem clean) if [[ -n "$client_semver" ]]; then echo "$prev_version" > config/VERSION; fi popd >/dev/null # 4) Stage docker build context local bundle_ctx="$root/src/bundle/gpu-node-bundle/.build-$date_tag" echo "\n🧰 Staging docker build context: $bundle_ctx" rm -rf "$bundle_ctx" mkdir -p "$bundle_ctx/bundle" "$bundle_ctx/private" cp "$root/src/bundle/gpu-node-bundle/Dockerfile" "$bundle_ctx/" cp "$root/src/bundle/gpu-node-bundle/node-bootstrap.sh" "$bundle_ctx/" # bundle tar cp "$artifact_tar" "$bundle_ctx/bundle/" # offline fluent-bit assets (optional but useful) if [[ -d "$root/src/log/fluent-bit/build/etc" ]]; then cp -r "$root/src/log/fluent-bit/build/etc" "$bundle_ctx/private/" fi if [[ -d "$root/src/log/fluent-bit/build/packages" ]]; then cp -r "$root/src/log/fluent-bit/build/packages" "$bundle_ctx/private/" fi if [[ -f "$root/src/log/fluent-bit/build/start-fluent-bit.sh" ]]; then cp "$root/src/log/fluent-bit/build/start-fluent-bit.sh" "$bundle_ctx/private/" fi # 5) Build the final bundle image (directly from NVIDIA base) local image_tag="argus-sys-metric-test-node-bundle-gpu:${date_tag}" echo "\n🔄 Building GPU Bundle image" if build_image "GPU Bundle" "$bundle_ctx/Dockerfile" "$image_tag" "$bundle_ctx" \ --build-arg CUDA_VER="$(echo "$base_image" | sed -E 's#^nvidia/cuda:([0-9]+\.[0-9]+\.[0-9]+)-runtime-ubuntu22\.04$#\1#')" \ --build-arg CLIENT_VER="$use_version" \ --build-arg BUNDLE_DATE="$date_tag"; then images_built+=("$image_tag") # also tag latest for convenience docker tag "$image_tag" argus-sys-metric-test-node-bundle-gpu:latest >/dev/null 2>&1 || true return 0 else return 1 fi } # Tag helper: ensure : exists for a list of repos ensure_version_tags() { local date_tag="$1"; shift local repos=("$@") for repo in "${repos[@]}"; do if docker image inspect "$repo:$date_tag" >/dev/null 2>&1; then : elif docker image inspect "$repo:latest" >/dev/null 2>&1; then docker tag "$repo:latest" "$repo:$date_tag" || true else echo "❌ missing image for tagging: $repo (need :latest or :$date_tag)" >&2 return 1 fi done return 0 } # Build server package after images are built build_server_pkg_bundle() { local date_tag="$1" if [[ -z "$date_tag" ]]; then echo "❌ server_pkg requires --version YYMMDD" >&2 return 1 fi local repos=( argus-bind9 argus-master argus-elasticsearch argus-kibana \ argus-metric-ftp argus-metric-prometheus argus-metric-grafana \ argus-alertmanager argus-web-frontend argus-web-proxy ) echo "\n🔖 Tagging server images with :$date_tag and collecting digests" if ! ensure_version_tags "$date_tag" "${repos[@]}"; then return 1 fi # Optional: show digests for repo in "${repos[@]}"; do local digest digest=$(docker images --digests --format '{{.Repository}}:{{.Tag}} {{.Digest}}' | awk -v r="$repo:$date_tag" '$1==r{print $2}' | head -n1) printf ' • %s@%s\n' "$repo:$date_tag" "${digest:-}" done echo "\n📦 Building server package via deployment_new/build/make_server_package.sh --version $date_tag" if ! "$root/deployment_new/build/make_server_package.sh" --version "$date_tag"; then echo "❌ make_server_package.sh failed" >&2 return 1 fi return 0 } # Build client package: ensure gpu bundle image exists, then package client_gpu build_client_pkg_bundle() { local date_tag="$1" local semver="$2" local cuda="$3" if [[ -z "$date_tag" ]]; then echo "❌ client_pkg requires --version YYMMDD" >&2 return 1 fi local bundle_tag="argus-sys-metric-test-node-bundle-gpu:${date_tag}" if ! docker image inspect "$bundle_tag" >/dev/null 2>&1; then echo "\n🧩 GPU bundle image $bundle_tag missing; building it first..." if ! build_gpu_bundle_image "$date_tag" "$cuda" "$semver"; then return 1 fi else echo "\n✅ Using existing GPU bundle image: $bundle_tag" fi echo "\n📦 Building client GPU package via deployment_new/build/make_client_gpu_package.sh --version $date_tag --image $bundle_tag" if ! "$root/deployment_new/build/make_client_gpu_package.sh" --version "$date_tag" --image "$bundle_tag"; then echo "❌ make_client_gpu_package.sh failed" >&2 return 1 fi return 0 } if [[ "$build_core" == true ]]; then if build_image "Elasticsearch" "src/log/elasticsearch/build/Dockerfile" "argus-elasticsearch:latest"; then images_built+=("argus-elasticsearch:latest") else build_failed=true fi echo "" if build_image "Kibana" "src/log/kibana/build/Dockerfile" "argus-kibana:latest"; then images_built+=("argus-kibana:latest") else build_failed=true fi echo "" if build_image "BIND9" "src/bind/build/Dockerfile" "argus-bind9:latest"; then images_built+=("argus-bind9:latest") else build_failed=true fi fi echo "" if [[ "$build_master" == true ]]; then echo "" echo "🔄 Building Master image..." pushd "$master_root" >/dev/null master_args=("--tag" "argus-master:latest") if [[ "$use_intranet" == true ]]; then master_args+=("--intranet") fi if [[ "$build_master_offline" == true ]]; then master_args+=("--offline") fi if [[ "$no_cache" == true ]]; then master_args+=("--no-cache") fi if ./scripts/build_images.sh "${master_args[@]}"; then if [[ "$build_master_offline" == true ]]; then images_built+=("argus-master:offline") else images_built+=("argus-master:latest") fi else build_failed=true fi popd >/dev/null fi if [[ "$build_metric" == true ]]; then echo "" echo "Building Metric module images..." metric_base_images=( "ubuntu:22.04" "ubuntu/prometheus:3-24.04_stable" "grafana/grafana:11.1.0" ) for base_image in "${metric_base_images[@]}"; do if ! pull_base_image "$base_image"; then build_failed=true fi done metric_builds=( "Metric FTP|src/metric/ftp/build/Dockerfile|argus-metric-ftp:latest|src/metric/ftp/build" "Metric Prometheus|src/metric/prometheus/build/Dockerfile|argus-metric-prometheus:latest|src/metric/prometheus/build" "Metric Grafana|src/metric/grafana/build/Dockerfile|argus-metric-grafana:latest|src/metric/grafana/build" ) for build_spec in "${metric_builds[@]}"; do IFS='|' read -r image_label dockerfile_path image_tag build_context <<< "$build_spec" if build_image "$image_label" "$dockerfile_path" "$image_tag" "$build_context"; then images_built+=("$image_tag") else build_failed=true fi echo "" done fi # ======================================= # Sys (system tests) node images # ======================================= if [[ "$build_sys" == true ]]; then echo "" echo "Building Sys node images..." sys_base_images=( "ubuntu:22.04" "nvidia/cuda:12.2.2-runtime-ubuntu22.04" ) for base_image in "${sys_base_images[@]}"; do if ! pull_base_image "$base_image"; then build_failed=true fi done sys_builds=( "Sys Node|src/sys/build/node/Dockerfile|argus-sys-node:latest|." "Sys Metric Test Node|src/sys/build/test-node/Dockerfile|argus-sys-metric-test-node:latest|." "Sys Metric Test GPU Node|src/sys/build/test-gpu-node/Dockerfile|argus-sys-metric-test-gpu-node:latest|." ) for build_spec in "${sys_builds[@]}"; do IFS='|' read -r image_label dockerfile_path image_tag build_context <<< "$build_spec" if build_image "$image_label" "$dockerfile_path" "$image_tag" "$build_context"; then images_built+=("$image_tag") else build_failed=true fi echo "" done fi # ======================================= # Web & Alert module images # ======================================= if [[ "$build_web" == true || "$build_alert" == true ]]; then echo "" echo "Building Web and Alert module images..." # Pre-pull commonly used base images for stability web_alert_base_images=( "node:20" "ubuntu:24.04" ) for base_image in "${web_alert_base_images[@]}"; do if ! pull_base_image "$base_image"; then build_failed=true fi done if [[ "$build_web" == true ]]; then web_builds=( "Web Frontend|src/web/build_tools/frontend/Dockerfile|argus-web-frontend:latest|." "Web Proxy|src/web/build_tools/proxy/Dockerfile|argus-web-proxy:latest|." ) for build_spec in "${web_builds[@]}"; do IFS='|' read -r image_label dockerfile_path image_tag build_context <<< "$build_spec" if build_image "$image_label" "$dockerfile_path" "$image_tag" "$build_context"; then images_built+=("$image_tag") else build_failed=true fi echo "" done fi if [[ "$build_alert" == true ]]; then alert_builds=( "Alertmanager|src/alert/alertmanager/build/Dockerfile|argus-alertmanager:latest|." ) for build_spec in "${alert_builds[@]}"; do IFS='|' read -r image_label dockerfile_path image_tag build_context <<< "$build_spec" if build_image "$image_label" "$dockerfile_path" "$image_tag" "$build_context"; then images_built+=("$image_tag") else build_failed=true fi echo "" done fi fi # ======================================= # One-click GPU bundle (direct NVIDIA base) # ======================================= if [[ "$build_gpu_bundle" == true ]]; then echo "" echo "Building one-click GPU bundle image..." if ! build_gpu_bundle_image "$bundle_date" "$cuda_ver" "$client_semver"; then build_failed=true fi fi # ======================================= # One-click Server/Client packaging # ======================================= if [[ "$build_server_pkg" == true ]]; then echo "" echo "🧳 Building one-click Server package..." if ! build_server_pkg_bundle "${bundle_date}"; then build_failed=true fi fi if [[ "$build_client_pkg" == true ]]; then echo "" echo "🧳 Building one-click Client-GPU package..." if ! build_client_pkg_bundle "${bundle_date}" "${client_semver}" "${cuda_ver}"; then build_failed=true fi fi echo "=======================================" echo "📦 Build Summary" echo "=======================================" if [[ ${#images_built[@]} -gt 0 ]]; then echo "✅ Successfully built images:" for image in "${images_built[@]}"; do echo " • $image" done fi if [[ "$build_failed" == true ]]; then echo "" echo "❌ Some images failed to build. Please check the errors above." exit 1 fi if [[ "$use_intranet" == true ]]; then echo "" echo "🌐 Built with intranet mirror configuration" fi if [[ "$build_master_offline" == true ]]; then echo "" echo "🧳 Master offline wheels 已解压到 $master_offline_dir" fi echo "" echo "🚀 Next steps:" echo " ./build/save_images.sh --compress # 导出镜像" echo " cd src/master/tests && MASTER_IMAGE_TAG=argus-master:offline ./scripts/00_e2e_test.sh" echo ""