From 79a77f6f6e2f7b7f6fe134ecb7e3288650914a56 Mon Sep 17 00:00:00 2001 From: yuyr Date: Sat, 27 Jun 2026 11:08:39 +0800 Subject: [PATCH] =?UTF-8?q?20260626=20ARM64=20Docker=20Compose=E9=83=A8?= =?UTF-8?q?=E7=BD=B2=E4=B8=8Elive=20TA=E9=A6=96=E8=BD=AE=E5=88=B7=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .dockerignore | 17 ++ deploy/arm64-compose/.env.example | 87 ++++++++ deploy/arm64-compose/docker-compose.yml | 94 +++++++++ .../dashboards/ours-rp-arm64-overview.json | 80 ++++++++ .../provisioning/dashboards/dashboard.yml | 12 ++ .../provisioning/datasources/prometheus.yml | 10 + deploy/arm64-compose/prometheus.yml | 13 ++ docker/ours-rp-runtime.Dockerfile | 94 +++++++++ scripts/docker/build_arm64_runtime_image.sh | 185 ++++++++++++++++++ .../docker/deploy_remote233_arm64_compose.sh | 159 +++++++++++++++ scripts/soak/portable-soak.env.example | 10 +- scripts/soak/run_soak.sh | 76 ++++++- 12 files changed, 832 insertions(+), 5 deletions(-) create mode 100644 .dockerignore create mode 100644 deploy/arm64-compose/.env.example create mode 100644 deploy/arm64-compose/docker-compose.yml create mode 100644 deploy/arm64-compose/grafana/dashboards/ours-rp-arm64-overview.json create mode 100644 deploy/arm64-compose/grafana/provisioning/dashboards/dashboard.yml create mode 100644 deploy/arm64-compose/grafana/provisioning/datasources/prometheus.yml create mode 100644 deploy/arm64-compose/prometheus.yml create mode 100644 docker/ours-rp-runtime.Dockerfile create mode 100755 scripts/docker/build_arm64_runtime_image.sh create mode 100755 scripts/docker/deploy_remote233_arm64_compose.sh diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..193ed56 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,17 @@ +target/ +.git/ +.gitignore +perf.* +**/* copy.excalidraw +ui/rpki-explorer/node_modules/ +ui/rpki-explorer/dist/ +ui/rpki-explorer/playwright-report/ +ui/rpki-explorer/test-results/ +ui/rpki-explorer/.vite/ +deploy/arm64-compose/.env +target/ +*.profraw +*.profdata +*.tar +*.tar.gz +*.zip diff --git a/deploy/arm64-compose/.env.example b/deploy/arm64-compose/.env.example new file mode 100644 index 0000000..ff4d61c --- /dev/null +++ b/deploy/arm64-compose/.env.example @@ -0,0 +1,87 @@ +# ours RP ARM64 Docker Compose 示例配置 +# 复制为 .env 后再启动: +# cp .env.example .env + +# 业务镜像名称;需要先通过 docker load 导入远端 Docker。 +RPKI_IMAGE=ours-rp-runtime-arm64:dev + +# 核心 RP 镜像强制以 ARM64 运行;远端233为 x86_64,需要 binfmt/qemu 支持。 +RPKI_PLATFORM=linux/arm64 + +# 验收默认只跑 APNIC;all5 可改成 afrinic,apnic,arin,lacnic,ripe。 +RIRS=apnic + +# 固定跑两轮:首轮 snapshot,第二轮 delta。 +# 负数表示持续运行;ARM64 QEMU 首版验收不建议默认持续运行。 +MAX_RUNS=2 + +# 两轮之间间隔秒数。 +INTERVAL_SECS=0 + +# 保留最近多少个 run 目录。 +RETAIN_RUNS=5 + +# TA/TAL 输入模式: +# - file-with-ta:完全使用镜像内 fixture; +# - file-live-ta:snapshot 轮先完成 live TA 获取,delta 轮后台刷新 TA; +# - url:从 TAL URL 拉取。 +TAL_INPUT_MODE=file-live-ta + +# rsync 默认按 module-root 批量拉取,和当前 ours RP soak 默认优化配置一致。 +RSYNC_SCOPE=module-root + +# 容器里不需要杀宿主机上的其他 RP 进程;远端宿主机如有竞争进程,请在宿主机侧处理。 +DISABLE_COMPETING_RPS=0 + +# 运行态数据目录。容器内固定路径,外部由 compose volume 保存。 +RUN_ROOT=/var/lib/ours-rp +DB_DIR=/var/lib/ours-rp/state/db +RSYNC_MIRROR_ROOT=/var/lib/ours-rp/state/rsync-mirror + +# 每轮结束后清理 daemon 临时目录。 +# Docker Compose 中 tmp 是独立 volume 挂载点,不能删除挂载点本身;默认关闭。 +CLEAN_TMP_AFTER_RUN=0 + +# 报告使用 compact JSON,降低写盘体积。 +OUTPUT_COMPACT_REPORT=1 + +# 复用 rsync mirror,避免 delta 每轮从零拉取。 +ALLOW_RSYNC_MIRROR_REUSE=1 + +# 前一轮失败时,新一轮从 snapshot 恢复。 +FAILURE_SNAPSHOT_RESET=1 + +# QEMU 验收优先降低额外统计开销;需要精确 DB 统计时改为 3 或其他正整数。 +DB_STATS_EXACT_EVERY=0 + +# 开启当前主线使用的验证缓存与请求预取能力。 +ENABLE_CHILD_CERTIFICATE_VALIDATION_CACHE=1 +RPKI_ANALYZE=1 +RPKI_EXTRA_ARGS="--enable-transport-request-prefetch --enable-publication-point-validation-cache --enable-roa-validation-cache --parallel-max-repo-sync-workers-global 4 --parallel-phase2-object-workers 4 --memory-trim-after-validation" + +# 进度日志阈值。QEMU 下执行较慢,阈值不宜过低。 +RPKI_PROGRESS_LOG=1 +RPKI_PROGRESS_SLOW_SECS=20 +RPKI_PROGRESS_STAGE_FRESH_SLOW_MS=2000 +RPKI_PROGRESS_PP_CONTROL_SLOW_MS=200 +RPKI_PROGRESS_PP_CACHE_SLOW_MS=100 +RPKI_PROGRESS_CONTROL_LOOP_SLOW_MS=2000 + +# live TA 刷新超时。 +LIVE_TA_REFRESH_CONNECT_TIMEOUT_SECS=15 +LIVE_TA_REFRESH_MAX_TIME_SECS=120 + +# file-live-ta 在 snapshot 轮先完成 live TA 获取再启动子进程,避免首轮使用旧 fixture TA。 +LIVE_TA_REFRESH_BEFORE_SNAPSHOT=1 + +# metrics sidecar 配置。 +METRICS_INSTANCE=remote233-arm64-qemu +METRICS_PORT=9556 +METRICS_POLL_SECS=10 + +# Prometheus / Grafana 配置。 +PROMETHEUS_PORT=9090 +PROMETHEUS_RETENTION=7d +GRAFANA_PORT=3000 +GRAFANA_ADMIN_USER=admin +GRAFANA_ADMIN_PASSWORD=admin diff --git a/deploy/arm64-compose/docker-compose.yml b/deploy/arm64-compose/docker-compose.yml new file mode 100644 index 0000000..5016931 --- /dev/null +++ b/deploy/arm64-compose/docker-compose.yml @@ -0,0 +1,94 @@ +services: + ours-rp-soak: + image: ${RPKI_IMAGE:-ours-rp-runtime-arm64:dev} + platform: ${RPKI_PLATFORM:-linux/arm64} + container_name: ours-rp-arm64-soak + env_file: + - ./.env + environment: + PACKAGE_ROOT: /opt/ours-rp + ENV_FILE: /opt/ours-rp/.env + RUN_ROOT: /var/lib/ours-rp + BIN_DIR: /opt/ours-rp/bin + FIXTURE_DIR: /opt/ours-rp/fixtures + volumes: + - ./.env:/opt/ours-rp/.env:ro + - rpki-state:/var/lib/ours-rp/state + - rpki-runs:/var/lib/ours-rp/runs + - rpki-logs:/var/lib/ours-rp/logs + - rpki-tmp:/var/lib/ours-rp/tmp + restart: "no" + profiles: + - core + + artifact-metrics: + image: ${RPKI_IMAGE:-ours-rp-runtime-arm64:dev} + platform: ${RPKI_PLATFORM:-linux/arm64} + container_name: ours-rp-arm64-artifact-metrics + env_file: + - ./.env + command: + - /opt/ours-rp/bin/rpki_artifact_metrics + - --run-root + - /var/lib/ours-rp + - --listen + - 0.0.0.0:9556 + - --poll-secs + - ${METRICS_POLL_SECS:-10} + - --instance + - ${METRICS_INSTANCE:-remote233-arm64-qemu} + ports: + - "${METRICS_PORT:-9556}:9556" + volumes: + - rpki-state:/var/lib/ours-rp/state:ro + - rpki-runs:/var/lib/ours-rp/runs:ro + - rpki-logs:/var/lib/ours-rp/logs:ro + restart: unless-stopped + profiles: + - sidecar + + prometheus: + image: ${PROMETHEUS_IMAGE:-prom/prometheus:v2.55.1} + container_name: ours-rp-arm64-prometheus + command: + - --config.file=/etc/prometheus/prometheus.yml + - --storage.tsdb.path=/prometheus + - --storage.tsdb.retention.time=${PROMETHEUS_RETENTION:-7d} + - --web.enable-lifecycle + depends_on: + - artifact-metrics + ports: + - "${PROMETHEUS_PORT:-9090}:9090" + volumes: + - ./prometheus.yml:/etc/prometheus/prometheus.yml:ro + - prometheus-data:/prometheus + restart: unless-stopped + profiles: + - monitor + + grafana: + image: ${GRAFANA_IMAGE:-grafana/grafana:11.3.1} + container_name: ours-rp-arm64-grafana + depends_on: + - prometheus + ports: + - "${GRAFANA_PORT:-3000}:3000" + environment: + GF_SECURITY_ADMIN_USER: ${GRAFANA_ADMIN_USER:-admin} + GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_ADMIN_PASSWORD:-admin} + GF_USERS_ALLOW_SIGN_UP: "false" + volumes: + - grafana-data:/var/lib/grafana + - ./grafana/provisioning:/etc/grafana/provisioning:ro + - ./grafana/dashboards:/var/lib/grafana/dashboards:ro + restart: unless-stopped + profiles: + - monitor + +volumes: + rpki-state: + rpki-runs: + rpki-logs: + rpki-tmp: + prometheus-data: + grafana-data: diff --git a/deploy/arm64-compose/grafana/dashboards/ours-rp-arm64-overview.json b/deploy/arm64-compose/grafana/dashboards/ours-rp-arm64-overview.json new file mode 100644 index 0000000..a095a1a --- /dev/null +++ b/deploy/arm64-compose/grafana/dashboards/ours-rp-arm64-overview.json @@ -0,0 +1,80 @@ +{ + "annotations": { + "list": [] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}}, "overrides": []}, + "gridPos": {"h": 5, "w": 6, "x": 0, "y": 0}, + "id": 1, + "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto", "wideLayout": true}, + "pluginVersion": "11.3.1", + "targets": [{"expr": "ours_rp_latest_run_sequence", "legendFormat": "latest run", "refId": "A"}], + "title": "Latest Run Sequence", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}}, "overrides": []}, + "gridPos": {"h": 5, "w": 6, "x": 6, "y": 0}, + "id": 2, + "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto", "wideLayout": true}, + "pluginVersion": "11.3.1", + "targets": [{"expr": "ours_rp_latest_run_wall_ms / 1000", "legendFormat": "wall seconds", "refId": "A"}], + "title": "Latest Wall Seconds", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"drawStyle": "line", "lineInterpolation": "linear", "lineWidth": 2, "pointSize": 4, "showPoints": "never"}}, "overrides": []}, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 0}, + "id": 3, + "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "multi", "sort": "none"}}, + "targets": [{"expr": "ours_rp_latest_run_wall_ms / 1000", "legendFormat": "wall seconds", "refId": "A"}], + "title": "Wall Time Trend", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"drawStyle": "line", "lineInterpolation": "linear", "lineWidth": 2, "pointSize": 4, "showPoints": "never"}}, "overrides": []}, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 5}, + "id": 4, + "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "multi", "sort": "none"}}, + "targets": [ + {"expr": "ours_rp_latest_run_vrps", "legendFormat": "VRPs", "refId": "A"}, + {"expr": "ours_rp_latest_run_aspas", "legendFormat": "VAPs", "refId": "B"}, + {"expr": "ours_rp_latest_run_publication_points", "legendFormat": "PP", "refId": "C"} + ], + "title": "Output Counts", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"drawStyle": "line", "lineInterpolation": "linear", "lineWidth": 2, "pointSize": 4, "showPoints": "never"}}, "overrides": []}, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 8}, + "id": 5, + "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "multi", "sort": "none"}}, + "targets": [{"expr": "ours_rp_latest_run_max_rss_kb / 1024", "legendFormat": "Max RSS MiB", "refId": "A"}], + "title": "Max RSS Trend", + "type": "timeseries" + } + ], + "preload": false, + "refresh": "10s", + "schemaVersion": 40, + "tags": ["ours-rp", "arm64", "qemu"], + "templating": {"list": []}, + "time": {"from": "now-1h", "to": "now"}, + "timepicker": {}, + "timezone": "browser", + "title": "Ours RP ARM64 Compose Overview", + "uid": "ours-rp-arm64-overview", + "version": 1, + "weekStart": "" +} diff --git a/deploy/arm64-compose/grafana/provisioning/dashboards/dashboard.yml b/deploy/arm64-compose/grafana/provisioning/dashboards/dashboard.yml new file mode 100644 index 0000000..8cba889 --- /dev/null +++ b/deploy/arm64-compose/grafana/provisioning/dashboards/dashboard.yml @@ -0,0 +1,12 @@ +apiVersion: 1 + +providers: + - name: ours-rp-arm64 + orgId: 1 + folder: Ours RP ARM64 + type: file + disableDeletion: false + updateIntervalSeconds: 10 + allowUiUpdates: true + options: + path: /var/lib/grafana/dashboards diff --git a/deploy/arm64-compose/grafana/provisioning/datasources/prometheus.yml b/deploy/arm64-compose/grafana/provisioning/datasources/prometheus.yml new file mode 100644 index 0000000..25e8d70 --- /dev/null +++ b/deploy/arm64-compose/grafana/provisioning/datasources/prometheus.yml @@ -0,0 +1,10 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + uid: Prometheus + type: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: true + editable: true diff --git a/deploy/arm64-compose/prometheus.yml b/deploy/arm64-compose/prometheus.yml new file mode 100644 index 0000000..658f425 --- /dev/null +++ b/deploy/arm64-compose/prometheus.yml @@ -0,0 +1,13 @@ +global: + scrape_interval: 5s + evaluation_interval: 5s + +scrape_configs: + - job_name: ours-rp-artifact-metrics + metrics_path: /metrics + static_configs: + - targets: + - artifact-metrics:9556 + labels: + rp: ours-rp + source: arm64-compose-artifact-sidecar diff --git a/docker/ours-rp-runtime.Dockerfile b/docker/ours-rp-runtime.Dockerfile new file mode 100644 index 0000000..1e15d42 --- /dev/null +++ b/docker/ours-rp-runtime.Dockerfile @@ -0,0 +1,94 @@ +ARG BUILDER_IMAGE=ours-rp-base-rust-amd64:1-bookworm +ARG RUNTIME_IMAGE=ours-rp-base-debian-arm64:bookworm-slim + +FROM --platform=$BUILDPLATFORM ${BUILDER_IMAGE} AS builder + +WORKDIR /src + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + build-essential \ + ca-certificates \ + clang \ + cmake \ + g++-aarch64-linux-gnu \ + gcc-aarch64-linux-gnu \ + git \ + libclang-dev \ + libc6-dev-arm64-cross \ + make \ + perl \ + pkg-config \ + python3 \ + && rm -rf /var/lib/apt/lists/* \ + && rustup target add aarch64-unknown-linux-gnu + +ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \ + CC_aarch64_unknown_linux_gnu=aarch64-linux-gnu-gcc \ + CXX_aarch64_unknown_linux_gnu=aarch64-linux-gnu-g++ \ + AR_aarch64_unknown_linux_gnu=aarch64-linux-gnu-ar \ + CARGO_BUILD_TARGET=aarch64-unknown-linux-gnu \ + PKG_CONFIG_ALLOW_CROSS=1 + +COPY . . + +RUN --mount=type=cache,target=/usr/local/cargo/registry \ + --mount=type=cache,target=/usr/local/cargo/git \ + --mount=type=cache,target=/src/target \ + cargo build --release --target aarch64-unknown-linux-gnu \ + --bin rpki \ + --bin rpki_daemon \ + --bin db_stats \ + --bin rpki_artifact_metrics \ + && mkdir -p /build-out/bin \ + && cp \ + target/aarch64-unknown-linux-gnu/release/rpki \ + target/aarch64-unknown-linux-gnu/release/rpki_daemon \ + target/aarch64-unknown-linux-gnu/release/db_stats \ + target/aarch64-unknown-linux-gnu/release/rpki_artifact_metrics \ + /build-out/bin/ + +FROM --platform=$TARGETPLATFORM ${RUNTIME_IMAGE} AS runtime + +LABEL org.opencontainers.image.title="ours-rp-runtime" \ + org.opencontainers.image.description="Ours RP runtime image for ARM64 Docker Compose deployment" + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + bash \ + ca-certificates \ + coreutils \ + curl \ + findutils \ + iputils-ping \ + jq \ + procps \ + python3 \ + rsync \ + time \ + tzdata \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /opt/ours-rp + +COPY --from=builder /build-out/bin/ /opt/ours-rp/bin/ +COPY scripts/soak/run_soak.sh /opt/ours-rp/run_soak.sh +COPY scripts/soak/portable-soak.env.example /opt/ours-rp/portable-soak.env.example +COPY tests/fixtures/tal/ /opt/ours-rp/fixtures/tal/ +COPY tests/fixtures/ta/ /opt/ours-rp/fixtures/ta/ +COPY fixtures/live_20260619/tal/ /opt/ours-rp/fixtures/live_20260619/tal/ +COPY fixtures/live_20260619/ta/ /opt/ours-rp/fixtures/live_20260619/ta/ + +RUN chmod +x /opt/ours-rp/run_soak.sh /opt/ours-rp/bin/* \ + && mkdir -p /var/lib/ours-rp/state /var/lib/ours-rp/runs /var/lib/ours-rp/logs /var/lib/ours-rp/tmp + +ENV PACKAGE_ROOT=/opt/ours-rp \ + ENV_FILE=/opt/ours-rp/.env \ + RUN_ROOT=/var/lib/ours-rp \ + BIN_DIR=/opt/ours-rp/bin \ + FIXTURE_DIR=/opt/ours-rp/fixtures \ + RUST_BACKTRACE=1 + +VOLUME ["/var/lib/ours-rp"] + +CMD ["/opt/ours-rp/run_soak.sh"] diff --git a/scripts/docker/build_arm64_runtime_image.sh b/scripts/docker/build_arm64_runtime_image.sh new file mode 100755 index 0000000..c955651 --- /dev/null +++ b/scripts/docker/build_arm64_runtime_image.sh @@ -0,0 +1,185 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" + +IMAGE_TAG="${IMAGE_TAG:-ours-rp-runtime-arm64:dev}" +BUILDER_IMAGE="${BUILDER_IMAGE:-ours-rp-base-rust-amd64:1-bookworm}" +RUNTIME_IMAGE="${RUNTIME_IMAGE:-ours-rp-base-debian-arm64:bookworm-slim}" +OUT_DIR="${OUT_DIR:-$REPO_ROOT/target/arm64-docker}" +DOCKERFILE="${DOCKERFILE:-$REPO_ROOT/docker/ours-rp-runtime.Dockerfile}" +BUILDER_NAME="${BUILDER_NAME:-default}" +INSTALL_BINFMT="${INSTALL_BINFMT:-1}" +SAVE_IMAGE="${SAVE_IMAGE:-1}" +LOAD_IMAGE="${LOAD_IMAGE:-1}" + +usage() { + cat <<'USAGE' +Usage: + scripts/docker/build_arm64_runtime_image.sh [options] + +Options: + --image Docker image tag (default: ours-rp-runtime-arm64:dev) + --out-dir Directory for docker save tar.gz (default: target/arm64-docker) + --dockerfile Dockerfile path + --builder buildx builder name + --no-binfmt Do not install binfmt/qemu + --no-save Build image but do not docker save it + --no-load Use buildx output tar instead of --load + -h, --help Show this help +USAGE +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --image) + IMAGE_TAG="$2" + shift 2 + ;; + --out-dir) + OUT_DIR="$2" + shift 2 + ;; + --dockerfile) + DOCKERFILE="$2" + shift 2 + ;; + --builder) + BUILDER_NAME="$2" + shift 2 + ;; + --no-binfmt) + INSTALL_BINFMT=0 + shift + ;; + --no-save) + SAVE_IMAGE=0 + shift + ;; + --no-load) + LOAD_IMAGE=0 + shift + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "unknown option: $1" >&2 + usage >&2 + exit 2 + ;; + esac +done + +require_command() { + command -v "$1" >/dev/null 2>&1 || { + echo "missing required command: $1" >&2 + exit 2 + } +} + +safe_tag_name() { + printf '%s' "$1" | tr '/:' '--' +} + +require_command docker +mkdir -p "$OUT_DIR" + +require_docker_image() { + local image="$1" + if ! docker image inspect "$image" >/dev/null 2>&1; then + cat >&2 </dev/null 2>&1; then + docker buildx create --name "$BUILDER_NAME" --driver docker-container --use >/dev/null +else + docker buildx use "$BUILDER_NAME" >/dev/null +fi +docker buildx inspect --bootstrap >/dev/null + +metadata_path="$OUT_DIR/$(safe_tag_name "$IMAGE_TAG").build-metadata.json" +tar_path="$OUT_DIR/$(safe_tag_name "$IMAGE_TAG").tar.gz" +build_log="$OUT_DIR/$(safe_tag_name "$IMAGE_TAG").build.log" + +echo "building linux/arm64 image: $IMAGE_TAG" +echo "repo: $REPO_ROOT" +echo "dockerfile: $DOCKERFILE" +echo "builder_image: $BUILDER_IMAGE" +echo "runtime_image: $RUNTIME_IMAGE" +start_epoch="$(date +%s)" + +if [[ "$LOAD_IMAGE" == "1" ]]; then + docker buildx build \ + --platform linux/arm64 \ + --builder "$BUILDER_NAME" \ + --load \ + --build-arg BUILDKIT_INLINE_CACHE=1 \ + --build-arg "BUILDER_IMAGE=$BUILDER_IMAGE" \ + --build-arg "RUNTIME_IMAGE=$RUNTIME_IMAGE" \ + --metadata-file "$metadata_path" \ + -t "$IMAGE_TAG" \ + -f "$DOCKERFILE" \ + "$REPO_ROOT" 2>&1 | tee "$build_log" +else + raw_tar_path="$OUT_DIR/$(safe_tag_name "$IMAGE_TAG").tar" + docker buildx build \ + --platform linux/arm64 \ + --builder "$BUILDER_NAME" \ + --output "type=docker,dest=$raw_tar_path" \ + --build-arg BUILDKIT_INLINE_CACHE=1 \ + --build-arg "BUILDER_IMAGE=$BUILDER_IMAGE" \ + --build-arg "RUNTIME_IMAGE=$RUNTIME_IMAGE" \ + --metadata-file "$metadata_path" \ + -t "$IMAGE_TAG" \ + -f "$DOCKERFILE" \ + "$REPO_ROOT" 2>&1 | tee "$build_log" + gzip -f "$raw_tar_path" + tar_path="${raw_tar_path}.gz" +fi + +elapsed_secs=$(( $(date +%s) - start_epoch )) + +if [[ "$SAVE_IMAGE" == "1" && "$LOAD_IMAGE" == "1" ]]; then + echo "saving image to $tar_path" + docker save "$IMAGE_TAG" | gzip -c > "$tar_path" +fi + +{ + echo "image=$IMAGE_TAG" + echo "platform=linux/arm64" + echo "builder_image=$BUILDER_IMAGE" + echo "runtime_image=$RUNTIME_IMAGE" + echo "elapsed_secs=$elapsed_secs" + echo "metadata=$metadata_path" + echo "tar=$tar_path" + echo "tar_size_bytes=$(wc -c < "$tar_path" 2>/dev/null || echo 0)" + echo "git_commit=$(git -C "$REPO_ROOT" rev-parse --short HEAD 2>/dev/null || echo unknown)" + echo "git_status_count=$(git -C "$REPO_ROOT" status --short 2>/dev/null | wc -l | tr -d ' ')" + echo "built_at_utc=$(date -u +%Y-%m-%dT%H:%M:%SZ)" +} > "$OUT_DIR/$(safe_tag_name "$IMAGE_TAG").build-summary.env" + +echo "build complete: elapsed=${elapsed_secs}s tar=$tar_path" diff --git a/scripts/docker/deploy_remote233_arm64_compose.sh b/scripts/docker/deploy_remote233_arm64_compose.sh new file mode 100755 index 0000000..efc33e2 --- /dev/null +++ b/scripts/docker/deploy_remote233_arm64_compose.sh @@ -0,0 +1,159 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" + +REMOTE_HOST="${REMOTE_HOST:-root@47.77.204.233}" +REMOTE_ROOT="${REMOTE_ROOT:-/root/ours-rp-arm64-compose}" +IMAGE_TAG="${IMAGE_TAG:-ours-rp-runtime-arm64:dev}" +IMAGE_TAR="${IMAGE_TAR:-$REPO_ROOT/target/arm64-docker/ours-rp-runtime-arm64-dev.tar.gz}" +EXECUTE="${EXECUTE:-0}" +INSTALL_DOCKER="${INSTALL_DOCKER:-1}" +INSTALL_BINFMT="${INSTALL_BINFMT:-1}" +START_CORE="${START_CORE:-0}" +START_SIDECARS="${START_SIDECARS:-0}" + +usage() { + cat <<'USAGE' +Usage: + scripts/docker/deploy_remote233_arm64_compose.sh [options] + +Default is dry-run. Pass --execute to modify the remote host. + +Options: + --execute Actually install/copy/load/start on remote + --remote SSH host (default: root@47.77.204.233) + --remote-root Remote compose root + --image Image tag loaded by docker load + --image-tar Local docker save tar.gz + --no-install-docker Skip Docker installation + --no-binfmt Skip binfmt/qemu installation + --start-core Start ours-rp-soak after deploy + --start-sidecars Start artifact metrics, Prometheus and Grafana after deploy + -h, --help Show this help +USAGE +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --execute) + EXECUTE=1 + shift + ;; + --remote) + REMOTE_HOST="$2" + shift 2 + ;; + --remote-root) + REMOTE_ROOT="$2" + shift 2 + ;; + --image) + IMAGE_TAG="$2" + shift 2 + ;; + --image-tar) + IMAGE_TAR="$2" + shift 2 + ;; + --no-install-docker) + INSTALL_DOCKER=0 + shift + ;; + --no-binfmt) + INSTALL_BINFMT=0 + shift + ;; + --start-core) + START_CORE=1 + shift + ;; + --start-sidecars) + START_SIDECARS=1 + shift + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "unknown option: $1" >&2 + usage >&2 + exit 2 + ;; + esac +done + +require_command() { + command -v "$1" >/dev/null 2>&1 || { + echo "missing required command: $1" >&2 + exit 2 + } +} + +run_or_echo() { + if [[ "$EXECUTE" == "1" ]]; then + "$@" + else + printf 'DRY-RUN:' + printf ' %q' "$@" + printf '\n' + fi +} + +remote_run() { + if [[ "$EXECUTE" == "1" ]]; then + ssh "$REMOTE_HOST" "$@" + else + printf 'DRY-RUN: ssh %q %q\n' "$REMOTE_HOST" "$*" + fi +} + +require_command ssh +require_command rsync + +[[ -f "$IMAGE_TAR" ]] || { + echo "missing image tar: $IMAGE_TAR" >&2 + exit 2 +} + +compose_src="$REPO_ROOT/deploy/arm64-compose/" +[[ -f "$compose_src/docker-compose.yml" ]] || { + echo "missing compose source: $compose_src" >&2 + exit 2 +} + +echo "remote=$REMOTE_HOST" +echo "remote_root=$REMOTE_ROOT" +echo "image=$IMAGE_TAG" +echo "image_tar=$IMAGE_TAR" + +if [[ "$INSTALL_DOCKER" == "1" ]]; then + remote_run "if ! command -v docker >/dev/null 2>&1; then apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y docker.io; fi; if ! docker compose version >/dev/null 2>&1; then if apt-cache show docker-compose-plugin >/dev/null 2>&1; then DEBIAN_FRONTEND=noninteractive apt-get install -y docker-compose-plugin; elif apt-cache show docker-compose-v2 >/dev/null 2>&1; then DEBIAN_FRONTEND=noninteractive apt-get install -y docker-compose-v2; elif apt-cache show docker-compose >/dev/null 2>&1; then DEBIAN_FRONTEND=noninteractive apt-get install -y docker-compose; else echo 'no docker compose package found' >&2; exit 2; fi; fi; systemctl enable --now docker || true" +fi + +remote_run "mkdir -p '$REMOTE_ROOT/images'" +run_or_echo rsync -a --delete "$compose_src" "$REMOTE_HOST:$REMOTE_ROOT/" +run_or_echo rsync -a "$IMAGE_TAR" "$REMOTE_HOST:$REMOTE_ROOT/images/" + +remote_tar="$REMOTE_ROOT/images/$(basename "$IMAGE_TAR")" +remote_run "cd '$REMOTE_ROOT' && test -f .env || cp .env.example .env" +remote_run "gunzip -c '$remote_tar' | docker load" + +if [[ "$INSTALL_BINFMT" == "1" ]]; then + remote_run "docker run --rm --privileged tonistiigi/binfmt --install arm64" +fi + +remote_run "docker run --rm --platform linux/arm64 '$IMAGE_TAG' uname -m" +remote_run "docker run --rm --platform linux/arm64 '$IMAGE_TAG' /opt/ours-rp/bin/rpki --help >/tmp/ours-rp-arm64-help.txt && head -5 /tmp/ours-rp-arm64-help.txt" + +if [[ "$START_CORE" == "1" ]]; then + remote_run "cd '$REMOTE_ROOT' && docker compose --profile core up -d ours-rp-soak" +fi + +if [[ "$START_SIDECARS" == "1" ]]; then + remote_run "cd '$REMOTE_ROOT' && docker compose --profile sidecar --profile monitor up -d artifact-metrics prometheus grafana" +fi + +remote_run "cd '$REMOTE_ROOT' && docker compose ps" diff --git a/scripts/soak/portable-soak.env.example b/scripts/soak/portable-soak.env.example index 44eb188..018e321 100644 --- a/scripts/soak/portable-soak.env.example +++ b/scripts/soak/portable-soak.env.example @@ -19,15 +19,19 @@ RIRS=afrinic,apnic,arin,lacnic,ripe # TAL/TA 输入模式。 # file-with-ta:使用 package 内置 fixtures/tal + fixtures/ta,完全离线固定输入。 -# file-live-ta:使用 package 内置 fixtures/tal;每轮后台 best-effort 刷新 TA 到 state/live-ta, -# 子进程不等待刷新,直接使用当前已有的 state/live-ta,首次缺失时从 fixtures/ta 初始化。 +# file-live-ta:使用 package 内置 fixtures/tal;snapshot 轮默认先阻塞完成 live TA 获取再启动子进程, +# delta 轮后台 best-effort 刷新 TA 到 state/live-ta,子进程使用当前已有的 state/live-ta。 # url:直接把 TAL URL 传给子进程,由子进程处理 TAL/TA 获取。 TAL_INPUT_MODE=file-with-ta -# file-live-ta 后台刷新 TA 的 curl 超时配置。刷新失败只写日志,不阻断本轮 run。 +# file-live-ta live TA 刷新 curl 超时配置。 LIVE_TA_REFRESH_CONNECT_TIMEOUT_SECS=15 LIVE_TA_REFRESH_MAX_TIME_SECS=120 +# file-live-ta 在 snapshot 轮是否先完成 live TA 获取再启动子进程。 +# 建议保持 1,避免 clean state 首轮 snapshot 使用旧 fixture TA。 +LIVE_TA_REFRESH_BEFORE_SNAPSHOT=1 + # 运行根目录。默认使用 package 根目录;如需把产物写到独立数据盘,可改成绝对路径。 RUN_ROOT="${PACKAGE_ROOT}" diff --git a/scripts/soak/run_soak.sh b/scripts/soak/run_soak.sh index bb70a70..94aaa5c 100755 --- a/scripts/soak/run_soak.sh +++ b/scripts/soak/run_soak.sh @@ -47,6 +47,7 @@ INVALID_ROOT="$STATE_ROOT/invalid" LIVE_TA_REFRESH_DIR="${LIVE_TA_REFRESH_DIR:-$META_DIR/live-ta-refresh}" LIVE_TA_REFRESH_CONNECT_TIMEOUT_SECS="${LIVE_TA_REFRESH_CONNECT_TIMEOUT_SECS:-15}" LIVE_TA_REFRESH_MAX_TIME_SECS="${LIVE_TA_REFRESH_MAX_TIME_SECS:-120}" +LIVE_TA_REFRESH_BEFORE_SNAPSHOT="${LIVE_TA_REFRESH_BEFORE_SNAPSHOT:-1}" RPKI_BIN="$BIN_DIR/rpki" RPKI_DAEMON_BIN="$BIN_DIR/rpki_daemon" @@ -309,12 +310,83 @@ start_live_ta_refresh_for_rir() { printf '%s\n' "$pid" > "$pid_file" } -prepare_live_ta_inputs_for_run() { +wait_for_previous_live_ta_refresh_for_rir() { + local rir_name="$1" + local pid_file + local pid + local deadline_epoch + pid_file="$(live_ta_refresh_pid_file_for_rir "$rir_name")" + [[ -f "$pid_file" ]] || return 0 + if reap_finished_live_ta_refresh_for_rir "$rir_name"; then + return 0 + fi + pid="$(cat "$pid_file" 2>/dev/null || true)" + deadline_epoch=$(( $(date +%s) + LIVE_TA_REFRESH_MAX_TIME_SECS + 60 )) + echo "live-ta-refresh wait rir=$rir_name reason=previous_refresh_running pid=$pid" + while ! reap_finished_live_ta_refresh_for_rir "$rir_name"; do + if (( $(date +%s) > deadline_epoch )); then + die "timed out waiting for previous live TA refresh for $rir_name pid=$pid" + fi + sleep 1 + done +} + +refresh_live_ta_blocking_for_run() { local run_id="$1" local rir_name + local pid + local failed=0 + local pids=() + local names=() + local log_path + local pid_file + + for rir_name in "${RIR_LIST[@]}"; do + wait_for_previous_live_ta_refresh_for_rir "$rir_name" + done + + for rir_name in "${RIR_LIST[@]}"; do + mkdir -p "$LIVE_TA_REFRESH_DIR" "$LOG_ROOT" + pid_file="$(live_ta_refresh_pid_file_for_rir "$rir_name")" + log_path="$LOG_ROOT/live-ta-refresh-$run_id-$rir_name.log" + refresh_live_ta_for_rir "$rir_name" "$run_id" "$log_path" & + pid=$! + printf '%s\n' "$pid" > "$pid_file" + pids+=("$pid") + names+=("$rir_name") + done + + local index + for index in "${!pids[@]}"; do + pid="${pids[$index]}" + rir_name="${names[$index]}" + pid_file="$(live_ta_refresh_pid_file_for_rir "$rir_name")" + if wait "$pid"; then + rm -f "$pid_file" + else + failed=1 + rm -f "$pid_file" + echo "live-ta-refresh failed before snapshot rir=$rir_name log=$LOG_ROOT/live-ta-refresh-$run_id-$rir_name.log" >&2 + fi + done + + if (( failed != 0 )); then + die "live TA refresh failed before snapshot run=$run_id; see $LOG_ROOT/live-ta-refresh-$run_id-*.log" + fi + echo "live-ta-refresh completed before snapshot run=$run_id rirs=${#RIR_LIST[@]}" +} + +prepare_live_ta_inputs_for_run() { + local run_id="$1" + local sync_mode="$2" + local rir_name if [[ "$TAL_INPUT_MODE" != "file-live-ta" ]]; then return 0 fi + if [[ "$sync_mode" == "snapshot" ]] && is_true "$LIVE_TA_REFRESH_BEFORE_SNAPSHOT"; then + refresh_live_ta_blocking_for_run "$run_id" + return 0 + fi for rir_name in "${RIR_LIST[@]}"; do ensure_live_ta_for_rir "$rir_name" done @@ -726,7 +798,7 @@ run_one_round() { "$snapshot_reason" "$previous_run_id" "$previous_success_value" "$started_at" "" \ "$INVALID_DB_PATH" "$INVALID_STATE_PATH" "$INVALID_TMP_PATH" "" "$PACKAGE_ROOT" "$ENV_FILE" - prepare_live_ta_inputs_for_run "$run_id" + prepare_live_ta_inputs_for_run "$run_id" "$sync_mode" build_child_args if is_true "$RPKI_ANALYZE"; then CHILD_ARGS+=(--analyze --analysis-out "$run_dir/analyze")