From a87a73559b2e16ceafc57496a96659f62c445645 Mon Sep 17 00:00:00 2001 From: yuyr Date: Sat, 27 Jun 2026 12:34:29 +0800 Subject: [PATCH] =?UTF-8?q?20260627=20=E5=AE=8C=E6=88=90ARM64=E5=AE=8C?= =?UTF-8?q?=E6=95=B4=E5=AE=89=E8=A3=85=E5=8C=85=E5=92=8C=E8=BF=90=E7=BB=B4?= =?UTF-8?q?=E8=84=9A=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- deploy/arm64-installer/.env.example | 75 ++++++ deploy/arm64-installer/cleanup.sh | 56 ++++ .../compose/docker-compose.yml | 88 +++++++ .../dashboards/ours-rp-arm64-overview.json | 80 ++++++ .../provisioning/dashboards/dashboard.yml | 12 + .../provisioning/datasources/prometheus.yml | 10 + deploy/arm64-installer/compose/prometheus.yml | 13 + deploy/arm64-installer/docs/README.en.md | 94 +++++++ deploy/arm64-installer/docs/README.zh-CN.md | 96 +++++++ deploy/arm64-installer/docs/operations.en.md | 68 +++++ .../arm64-installer/docs/operations.zh-CN.md | 68 +++++ .../docs/troubleshooting.en.md | 68 +++++ .../docs/troubleshooting.zh-CN.md | 68 +++++ deploy/arm64-installer/install.sh | 44 ++++ deploy/arm64-installer/logs.sh | 7 + deploy/arm64-installer/restart.sh | 5 + deploy/arm64-installer/scripts/common.sh | 245 ++++++++++++++++++ deploy/arm64-installer/self-check.sh | 37 +++ deploy/arm64-installer/start.sh | 58 +++++ deploy/arm64-installer/status.sh | 55 ++++ deploy/arm64-installer/stop.sh | 7 + deploy/arm64-installer/uninstall.sh | 32 +++ deploy/arm64-installer/upgrade.sh | 14 + .../docker/build_arm64_installer_package.sh | 123 +++++++++ 24 files changed, 1423 insertions(+) create mode 100644 deploy/arm64-installer/.env.example create mode 100755 deploy/arm64-installer/cleanup.sh create mode 100644 deploy/arm64-installer/compose/docker-compose.yml create mode 100644 deploy/arm64-installer/compose/grafana/dashboards/ours-rp-arm64-overview.json create mode 100644 deploy/arm64-installer/compose/grafana/provisioning/dashboards/dashboard.yml create mode 100644 deploy/arm64-installer/compose/grafana/provisioning/datasources/prometheus.yml create mode 100644 deploy/arm64-installer/compose/prometheus.yml create mode 100644 deploy/arm64-installer/docs/README.en.md create mode 100644 deploy/arm64-installer/docs/README.zh-CN.md create mode 100644 deploy/arm64-installer/docs/operations.en.md create mode 100644 deploy/arm64-installer/docs/operations.zh-CN.md create mode 100644 deploy/arm64-installer/docs/troubleshooting.en.md create mode 100644 deploy/arm64-installer/docs/troubleshooting.zh-CN.md create mode 100755 deploy/arm64-installer/install.sh create mode 100755 deploy/arm64-installer/logs.sh create mode 100755 deploy/arm64-installer/restart.sh create mode 100755 deploy/arm64-installer/scripts/common.sh create mode 100755 deploy/arm64-installer/self-check.sh create mode 100755 deploy/arm64-installer/start.sh create mode 100755 deploy/arm64-installer/status.sh create mode 100755 deploy/arm64-installer/stop.sh create mode 100755 deploy/arm64-installer/uninstall.sh create mode 100755 deploy/arm64-installer/upgrade.sh create mode 100755 scripts/docker/build_arm64_installer_package.sh diff --git a/deploy/arm64-installer/.env.example b/deploy/arm64-installer/.env.example new file mode 100644 index 0000000..63afec4 --- /dev/null +++ b/deploy/arm64-installer/.env.example @@ -0,0 +1,75 @@ +# ours RP ARM64 installer configuration +# 中文说明见 docs/README.zh-CN.md。English guide: docs/README.en.md + +# Compose project name. +COMPOSE_PROJECT_NAME=ours-rp-arm64 + +# Runtime image loaded from images/*.tar.gz by install.sh. +RPKI_IMAGE=ours-rp-runtime-arm64:dev +RPKI_PLATFORM=linux/arm64 + +# Restart policy for the soak container. Production default keeps the daemon alive. +# For finite acceptance tests such as MAX_RUNS=3, set SOAK_RESTART_POLICY=no to avoid an extra restarted run. +SOAK_RESTART_POLICY=unless-stopped + +# Host-side persistent data directory. All state/runs/logs/monitoring data are bind-mounted here. +HOST_DATA_DIR=/var/lib/ours-rp-arm64 + +# RIR list. Options: afrinic,apnic,arin,lacnic,ripe +RIRS=afrinic,apnic,arin,lacnic,ripe + +# Negative MAX_RUNS means keep running forever. Default production interval is 10 minutes. +MAX_RUNS=-1 +INTERVAL_SECS=600 +RETAIN_RUNS=100 + +# TAL/TA input mode: +# file-with-ta: use packaged fixture TAL + TA only. +# file-live-ta: use packaged fixture TAL; snapshot waits for live TA refresh, delta refreshes TA in background. +# url: pass TAL URL to child process. +TAL_INPUT_MODE=file-live-ta +LIVE_TA_REFRESH_BEFORE_SNAPSHOT=1 +LIVE_TA_REFRESH_CONNECT_TIMEOUT_SECS=15 +LIVE_TA_REFRESH_MAX_TIME_SECS=120 + +# Sync and runtime behavior. +RSYNC_SCOPE=module-root +DISABLE_COMPETING_RPS=0 +RUN_ROOT=/var/lib/ours-rp +DB_DIR=/var/lib/ours-rp/state/db +RSYNC_MIRROR_ROOT=/var/lib/ours-rp/state/rsync-mirror +CLEAN_TMP_AFTER_RUN=0 +OUTPUT_COMPACT_REPORT=1 +ALLOW_RSYNC_MIRROR_REUSE=1 +FAILURE_SNAPSHOT_RESET=1 +DB_STATS_EXACT_EVERY=0 + +# Validation and performance options aligned with current optimized soak defaults. +ENABLE_CHILD_CERTIFICATE_VALIDATION_CACHE=1 +RPKI_ANALYZE=1 +RPKI_EXTRA_ARGS="--enable-transport-request-prefetch --enable-publication-point-validation-cache --enable-roa-validation-cache --parallel-max-repo-sync-workers-global 4 --parallel-phase2-object-workers 4 --memory-trim-after-validation" + +# Progress logs. +RPKI_PROGRESS_LOG=1 +RPKI_PROGRESS_SLOW_SECS=20 +RPKI_PROGRESS_STAGE_FRESH_SLOW_MS=2000 +RPKI_PROGRESS_PP_CONTROL_SLOW_MS=200 +RPKI_PROGRESS_PP_CACHE_SLOW_MS=100 +RPKI_PROGRESS_CONTROL_LOOP_SLOW_MS=2000 + +# Metrics sidecar. +METRICS_INSTANCE=arm64-installer +METRICS_PORT=9556 +METRICS_POLL_SECS=10 + +# Prometheus / Grafana. +PROMETHEUS_IMAGE=prom/prometheus:v2.55.1 +GRAFANA_IMAGE=grafana/grafana:11.3.1 +PROMETHEUS_PORT=9090 +PROMETHEUS_RETENTION=7d +GRAFANA_PORT=3000 +GRAFANA_ADMIN_USER=admin +GRAFANA_ADMIN_PASSWORD=admin + +# First snapshot waiting timeout used by start.sh. +FIRST_RUN_WAIT_TIMEOUT_SECS=7200 diff --git a/deploy/arm64-installer/cleanup.sh b/deploy/arm64-installer/cleanup.sh new file mode 100755 index 0000000..9b7c672 --- /dev/null +++ b/deploy/arm64-installer/cleanup.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=scripts/common.sh +source "$SCRIPT_DIR/scripts/common.sh" + +DRY_RUN=1 +KEEP_RUNS="" +usage() { + cat <<'USAGE' +Usage: ./cleanup.sh [--execute] [--keep-runs N] + +By default this is a dry-run. It removes old run_* directories beyond KEEP_RUNS +and clears tmp contents. +USAGE +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --execute) + DRY_RUN=0 + shift + ;; + --keep-runs) + KEEP_RUNS="$2" + shift 2 + ;; + -h|--help) + usage + exit 0 + ;; + *) + die "unknown option: $1" + ;; + esac +done + +load_env +keep="${KEEP_RUNS:-${RETAIN_RUNS:-100}}" +mapfile -t runs < <(find "$HOST_DATA_DIR/runs" -maxdepth 1 -type d -name 'run_*' 2>/dev/null | sort) +delete_count=$(( ${#runs[@]} - keep )) +if (( delete_count > 0 )); then + for ((i=0; i/dev/null || true diff --git a/deploy/arm64-installer/compose/docker-compose.yml b/deploy/arm64-installer/compose/docker-compose.yml new file mode 100644 index 0000000..9a092e0 --- /dev/null +++ b/deploy/arm64-installer/compose/docker-compose.yml @@ -0,0 +1,88 @@ +services: + ours-rp-soak: + image: ${RPKI_IMAGE:-ours-rp-runtime-arm64:dev} + platform: ${RPKI_PLATFORM:-linux/arm64} + container_name: ${COMPOSE_PROJECT_NAME:-ours-rp-arm64}-soak + env_file: + - ../.env + environment: + PACKAGE_ROOT: /opt/ours-rp + ENV_FILE: /opt/ours-rp/.env + RUN_ROOT: /var/lib/ours-rp + BIN_DIR: /opt/ours-rp/bin + FIXTURE_DIR: /opt/ours-rp/fixtures + volumes: + - ../.env:/opt/ours-rp/.env:ro + - ${HOST_DATA_DIR:-/var/lib/ours-rp-arm64}/state:/var/lib/ours-rp/state + - ${HOST_DATA_DIR:-/var/lib/ours-rp-arm64}/runs:/var/lib/ours-rp/runs + - ${HOST_DATA_DIR:-/var/lib/ours-rp-arm64}/logs:/var/lib/ours-rp/logs + - ${HOST_DATA_DIR:-/var/lib/ours-rp-arm64}/tmp:/var/lib/ours-rp/tmp + restart: ${SOAK_RESTART_POLICY:-unless-stopped} + profiles: + - core + + artifact-metrics: + image: ${RPKI_IMAGE:-ours-rp-runtime-arm64:dev} + platform: ${RPKI_PLATFORM:-linux/arm64} + container_name: ${COMPOSE_PROJECT_NAME:-ours-rp-arm64}-artifact-metrics + env_file: + - ../.env + command: + - /opt/ours-rp/bin/rpki_artifact_metrics + - --run-root + - /var/lib/ours-rp + - --listen + - 0.0.0.0:9556 + - --poll-secs + - ${METRICS_POLL_SECS:-10} + - --instance + - ${METRICS_INSTANCE:-arm64-installer} + ports: + - "${METRICS_PORT:-9556}:9556" + volumes: + - ${HOST_DATA_DIR:-/var/lib/ours-rp-arm64}/state:/var/lib/ours-rp/state:ro + - ${HOST_DATA_DIR:-/var/lib/ours-rp-arm64}/runs:/var/lib/ours-rp/runs:ro + - ${HOST_DATA_DIR:-/var/lib/ours-rp-arm64}/logs:/var/lib/ours-rp/logs:ro + restart: unless-stopped + profiles: + - sidecar + + prometheus: + image: ${PROMETHEUS_IMAGE:-prom/prometheus:v2.55.1} + container_name: ${COMPOSE_PROJECT_NAME:-ours-rp-arm64}-prometheus + command: + - --config.file=/etc/prometheus/prometheus.yml + - --storage.tsdb.path=/prometheus + - --storage.tsdb.retention.time=${PROMETHEUS_RETENTION:-7d} + - --web.enable-lifecycle + depends_on: + - artifact-metrics + user: "0:0" + ports: + - "${PROMETHEUS_PORT:-9090}:9090" + volumes: + - ./prometheus.yml:/etc/prometheus/prometheus.yml:ro + - ${HOST_DATA_DIR:-/var/lib/ours-rp-arm64}/prometheus:/prometheus + restart: unless-stopped + profiles: + - monitor + + grafana: + image: ${GRAFANA_IMAGE:-grafana/grafana:11.3.1} + container_name: ${COMPOSE_PROJECT_NAME:-ours-rp-arm64}-grafana + depends_on: + - prometheus + user: "0:0" + ports: + - "${GRAFANA_PORT:-3000}:3000" + environment: + GF_SECURITY_ADMIN_USER: ${GRAFANA_ADMIN_USER:-admin} + GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_ADMIN_PASSWORD:-admin} + GF_USERS_ALLOW_SIGN_UP: "false" + volumes: + - ${HOST_DATA_DIR:-/var/lib/ours-rp-arm64}/grafana:/var/lib/grafana + - ./grafana/provisioning:/etc/grafana/provisioning:ro + - ./grafana/dashboards:/var/lib/grafana/dashboards:ro + restart: unless-stopped + profiles: + - monitor diff --git a/deploy/arm64-installer/compose/grafana/dashboards/ours-rp-arm64-overview.json b/deploy/arm64-installer/compose/grafana/dashboards/ours-rp-arm64-overview.json new file mode 100644 index 0000000..a095a1a --- /dev/null +++ b/deploy/arm64-installer/compose/grafana/dashboards/ours-rp-arm64-overview.json @@ -0,0 +1,80 @@ +{ + "annotations": { + "list": [] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}}, "overrides": []}, + "gridPos": {"h": 5, "w": 6, "x": 0, "y": 0}, + "id": 1, + "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto", "wideLayout": true}, + "pluginVersion": "11.3.1", + "targets": [{"expr": "ours_rp_latest_run_sequence", "legendFormat": "latest run", "refId": "A"}], + "title": "Latest Run Sequence", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}}, "overrides": []}, + "gridPos": {"h": 5, "w": 6, "x": 6, "y": 0}, + "id": 2, + "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto", "wideLayout": true}, + "pluginVersion": "11.3.1", + "targets": [{"expr": "ours_rp_latest_run_wall_ms / 1000", "legendFormat": "wall seconds", "refId": "A"}], + "title": "Latest Wall Seconds", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"drawStyle": "line", "lineInterpolation": "linear", "lineWidth": 2, "pointSize": 4, "showPoints": "never"}}, "overrides": []}, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 0}, + "id": 3, + "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "multi", "sort": "none"}}, + "targets": [{"expr": "ours_rp_latest_run_wall_ms / 1000", "legendFormat": "wall seconds", "refId": "A"}], + "title": "Wall Time Trend", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"drawStyle": "line", "lineInterpolation": "linear", "lineWidth": 2, "pointSize": 4, "showPoints": "never"}}, "overrides": []}, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 5}, + "id": 4, + "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "multi", "sort": "none"}}, + "targets": [ + {"expr": "ours_rp_latest_run_vrps", "legendFormat": "VRPs", "refId": "A"}, + {"expr": "ours_rp_latest_run_aspas", "legendFormat": "VAPs", "refId": "B"}, + {"expr": "ours_rp_latest_run_publication_points", "legendFormat": "PP", "refId": "C"} + ], + "title": "Output Counts", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"drawStyle": "line", "lineInterpolation": "linear", "lineWidth": 2, "pointSize": 4, "showPoints": "never"}}, "overrides": []}, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 8}, + "id": 5, + "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "multi", "sort": "none"}}, + "targets": [{"expr": "ours_rp_latest_run_max_rss_kb / 1024", "legendFormat": "Max RSS MiB", "refId": "A"}], + "title": "Max RSS Trend", + "type": "timeseries" + } + ], + "preload": false, + "refresh": "10s", + "schemaVersion": 40, + "tags": ["ours-rp", "arm64", "qemu"], + "templating": {"list": []}, + "time": {"from": "now-1h", "to": "now"}, + "timepicker": {}, + "timezone": "browser", + "title": "Ours RP ARM64 Compose Overview", + "uid": "ours-rp-arm64-overview", + "version": 1, + "weekStart": "" +} diff --git a/deploy/arm64-installer/compose/grafana/provisioning/dashboards/dashboard.yml b/deploy/arm64-installer/compose/grafana/provisioning/dashboards/dashboard.yml new file mode 100644 index 0000000..8cba889 --- /dev/null +++ b/deploy/arm64-installer/compose/grafana/provisioning/dashboards/dashboard.yml @@ -0,0 +1,12 @@ +apiVersion: 1 + +providers: + - name: ours-rp-arm64 + orgId: 1 + folder: Ours RP ARM64 + type: file + disableDeletion: false + updateIntervalSeconds: 10 + allowUiUpdates: true + options: + path: /var/lib/grafana/dashboards diff --git a/deploy/arm64-installer/compose/grafana/provisioning/datasources/prometheus.yml b/deploy/arm64-installer/compose/grafana/provisioning/datasources/prometheus.yml new file mode 100644 index 0000000..25e8d70 --- /dev/null +++ b/deploy/arm64-installer/compose/grafana/provisioning/datasources/prometheus.yml @@ -0,0 +1,10 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + uid: Prometheus + type: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: true + editable: true diff --git a/deploy/arm64-installer/compose/prometheus.yml b/deploy/arm64-installer/compose/prometheus.yml new file mode 100644 index 0000000..658f425 --- /dev/null +++ b/deploy/arm64-installer/compose/prometheus.yml @@ -0,0 +1,13 @@ +global: + scrape_interval: 5s + evaluation_interval: 5s + +scrape_configs: + - job_name: ours-rp-artifact-metrics + metrics_path: /metrics + static_configs: + - targets: + - artifact-metrics:9556 + labels: + rp: ours-rp + source: arm64-compose-artifact-sidecar diff --git a/deploy/arm64-installer/docs/README.en.md b/deploy/arm64-installer/docs/README.en.md new file mode 100644 index 0000000..75a95d2 --- /dev/null +++ b/deploy/arm64-installer/docs/README.en.md @@ -0,0 +1,94 @@ +# ours RP ARM64 Installer Guide + +## Goal + +This package deploys ours RP on a `linux/arm64` server with Docker Compose and continuously runs all five RIR validation. + +The package includes the ARM64 runtime image. Runtime state, run artifacts, logs, Prometheus data and Grafana data are persisted through host bind mounts. + +## Quick Start + +```bash +tar -xzf ours-rp-arm64-installer-*.tar.gz +cd ours-rp-arm64-installer-* + +./install.sh +cp .env.example .env # install.sh creates .env automatically if missing +vim .env +./start.sh +./status.sh +``` + +Defaults: + +- `RIRS=afrinic,apnic,arin,lacnic,ripe` +- `MAX_RUNS=-1` +- `INTERVAL_SECS=600` +- `TAL_INPUT_MODE=file-live-ta` +- `LIVE_TA_REFRESH_BEFORE_SNAPSHOT=1` +- `HOST_DATA_DIR=/var/lib/ours-rp-arm64` +- `SOAK_RESTART_POLICY=unless-stopped` + +## First Start Semantics + +If there is no successful run under `HOST_DATA_DIR/runs`, `start.sh` starts the core `ours-rp-soak` service first and waits for the first snapshot to succeed before starting metrics, Prometheus and Grafana. + +The first snapshot refreshes live TA certificates before starting the RP process. + +## Ports + +Default ports: + +- metrics: `http://:9556/metrics` +- Prometheus: `http://:9090` +- Grafana: `http://:3000` + +Grafana credentials come from `.env`: + +```bash +GRAFANA_ADMIN_USER=admin +GRAFANA_ADMIN_PASSWORD=admin +``` + +Change the password and restrict public access for production deployments. + +## Data Directory + +Default host directory: + +```text +/var/lib/ours-rp-arm64/ + state/ + runs/ + logs/ + tmp/ + prometheus/ + grafana/ +``` + +Each `runs/run_XXXX/` directory contains `report.json`, `result.ccr`, `input.cir`, `vrps.csv`, `vaps.csv`, `stage-timing.json`, logs and metadata. + +## Common Commands + +```bash +./status.sh +./logs.sh ours-rp-soak --tail 200 +./restart.sh +./stop.sh +./cleanup.sh --keep-runs 100 --execute +./uninstall.sh +``` + +`uninstall.sh` keeps data by default. Use the following only when you really want to delete `HOST_DATA_DIR`: + +```bash +./uninstall.sh --purge-data +``` + +For finite acceptance tests, for example `MAX_RUNS=3`, also set: + +```bash +SOAK_RESTART_POLICY=no +``` + +Otherwise Compose `unless-stopped` will restart the container after it exits successfully. diff --git a/deploy/arm64-installer/docs/README.zh-CN.md b/deploy/arm64-installer/docs/README.zh-CN.md new file mode 100644 index 0000000..a11e2e9 --- /dev/null +++ b/deploy/arm64-installer/docs/README.zh-CN.md @@ -0,0 +1,96 @@ +# ours RP ARM64 安装包使用说明 + +## 目标 + +本安装包用于在 `linux/arm64` 服务器上通过 Docker Compose 部署 ours RP,并持续运行 all5 RIR 同步验证任务。 + +安装包内置 ours RP ARM64 runtime 镜像,运行产物、状态数据库、日志、Prometheus 和 Grafana 数据均通过宿主机目录挂载保存。 + +## 快速开始 + +```bash +tar -xzf ours-rp-arm64-installer-*.tar.gz +cd ours-rp-arm64-installer-* + +./install.sh +cp .env.example .env # 如 install.sh 已自动创建,可直接编辑现有 .env +vim .env +./start.sh +./status.sh +``` + +默认配置: + +- `RIRS=afrinic,apnic,arin,lacnic,ripe` +- `MAX_RUNS=-1` +- `INTERVAL_SECS=600` +- `TAL_INPUT_MODE=file-live-ta` +- `LIVE_TA_REFRESH_BEFORE_SNAPSHOT=1` +- `HOST_DATA_DIR=/var/lib/ours-rp-arm64` +- `SOAK_RESTART_POLICY=unless-stopped` + +## 首次启动语义 + +如果 `HOST_DATA_DIR/runs` 下没有成功 run,`start.sh` 会先启动核心 `ours-rp-soak`,等待第一轮 snapshot 成功后再启动 metrics、Prometheus 和 Grafana。 + +第一轮 snapshot 会先拉取 live TA,避免 clean state 使用旧 fixture TA。 + +## 访问端口 + +默认端口: + +- metrics: `http://:9556/metrics` +- Prometheus: `http://:9090` +- Grafana: `http://:3000` + +Grafana 默认账号密码来自 `.env`: + +```bash +GRAFANA_ADMIN_USER=admin +GRAFANA_ADMIN_PASSWORD=admin +``` + +生产部署时应修改密码并限制外部访问。 + +## 数据目录 + +默认宿主机目录: + +```text +/var/lib/ours-rp-arm64/ + state/ + runs/ + logs/ + tmp/ + prometheus/ + grafana/ +``` + +`runs/run_XXXX/` 中包含每轮 `report.json`、`result.ccr`、`input.cir`、`vrps.csv`、`vaps.csv`、`stage-timing.json`、日志和元数据。 + +## 常用命令 + +```bash +./status.sh +./logs.sh ours-rp-soak --tail 200 +./restart.sh +./stop.sh +./cleanup.sh --keep-runs 100 --execute +./uninstall.sh +``` + +如果做有限轮次验收,例如 `MAX_RUNS=3`,建议同时设置: + +```bash +SOAK_RESTART_POLICY=no +``` + +否则 Compose 的 `unless-stopped` 策略会在容器正常退出后再次拉起下一轮。 + +`uninstall.sh` 默认不删除数据。只有显式执行: + +```bash +./uninstall.sh --purge-data +``` + +才会删除 `HOST_DATA_DIR`。 diff --git a/deploy/arm64-installer/docs/operations.en.md b/deploy/arm64-installer/docs/operations.en.md new file mode 100644 index 0000000..5dcda8a --- /dev/null +++ b/deploy/arm64-installer/docs/operations.en.md @@ -0,0 +1,68 @@ +# Operations Guide + +## Install + +```bash +./install.sh +``` + +The installer is idempotent: + +- existing `.env` is kept; +- existing Docker/Compose installation is reused; +- repeated image loading is safe; +- existing data directory is reused. + +## Start + +```bash +./start.sh +``` + +Start without waiting for the first snapshot: + +```bash +./start.sh --no-wait-first-run +``` + +## Stop and Restart + +```bash +./stop.sh +./restart.sh +``` + +## Status Checks + +```bash +./status.sh +./self-check.sh +``` + +Important checks: + +- Docker/Compose availability; +- runtime image exists; +- `HOST_DATA_DIR` is writable; +- Compose config is valid; +- latest run status; +- metrics, Prometheus and Grafana endpoints. + +## Upgrade + +Extract the new package, reuse the existing `.env` and `HOST_DATA_DIR`, then run: + +```bash +./upgrade.sh +``` + +Upgrade does not delete runtime data. + +## Cleanup + +```bash +./cleanup.sh --keep-runs 100 +./cleanup.sh --keep-runs 100 --execute +``` + +Cleanup is dry-run by default. Add `--execute` after reviewing the output. diff --git a/deploy/arm64-installer/docs/operations.zh-CN.md b/deploy/arm64-installer/docs/operations.zh-CN.md new file mode 100644 index 0000000..aa7b5af --- /dev/null +++ b/deploy/arm64-installer/docs/operations.zh-CN.md @@ -0,0 +1,68 @@ +# 运维手册 + +## 安装 + +```bash +./install.sh +``` + +安装脚本是幂等的: + +- 已有 `.env` 不覆盖; +- 已安装 Docker/Compose 则跳过; +- 镜像重复加载是安全的; +- 数据目录已存在则复用。 + +## 启动 + +```bash +./start.sh +``` + +如需后台启动后不等待首轮 snapshot: + +```bash +./start.sh --no-wait-first-run +``` + +## 停止和重启 + +```bash +./stop.sh +./restart.sh +``` + +## 状态检查 + +```bash +./status.sh +./self-check.sh +``` + +重点检查项: + +- Docker/Compose 可用; +- runtime 镜像存在; +- `HOST_DATA_DIR` 可写; +- Compose 配置合法; +- 最新 run 状态; +- metrics、Prometheus、Grafana endpoint。 + +## 升级 + +把新安装包解压到新目录后,可以复用旧 `.env` 和 `HOST_DATA_DIR`。执行: + +```bash +./upgrade.sh +``` + +升级不会删除运行数据。 + +## 清理 + +```bash +./cleanup.sh --keep-runs 100 +./cleanup.sh --keep-runs 100 --execute +``` + +默认 dry-run,确认后加 `--execute`。 diff --git a/deploy/arm64-installer/docs/troubleshooting.en.md b/deploy/arm64-installer/docs/troubleshooting.en.md new file mode 100644 index 0000000..8299d80 --- /dev/null +++ b/deploy/arm64-installer/docs/troubleshooting.en.md @@ -0,0 +1,68 @@ +# Troubleshooting + +## Docker or Compose Is Unavailable + +Run: + +```bash +docker version +docker compose version +``` + +If missing, run: + +```bash +./install.sh +``` + +## ARM64 Image Cannot Run + +Running ARM64 images on x86_64 requires binfmt/qemu: + +```bash +docker run --rm --privileged tonistiigi/binfmt --install arm64 +docker run --rm --platform linux/arm64 debian:bookworm-slim uname -m +``` + +Expected output: `aarch64`. + +## First Snapshot Times Out + +All-five snapshot can be slow, especially under QEMU. Increase timeout: + +```bash +./start.sh --timeout-secs 14400 +``` + +## Output Counts Are Too Low + +Check: + +```bash +grep LIVE_TA_REFRESH_BEFORE_SNAPSHOT .env +ls -l /var/lib/ours-rp-arm64/state/live-ta +tail -100 /var/lib/ours-rp-arm64/logs/live-ta-refresh-*.log +``` + +In `file-live-ta` mode, snapshot should wait until live TA refresh succeeds. + +## Grafana Login Fails + +Check `.env`: + +```bash +GRAFANA_ADMIN_USER=admin +GRAFANA_ADMIN_PASSWORD=admin +``` + +If Grafana has already started, changing `.env` may not reset the existing Grafana database. Stop services and back up/clean `${HOST_DATA_DIR}/grafana` if needed. + +## A Finite Acceptance Test Starts an Extra Run + +If `.env` sets a finite `MAX_RUNS=3` while `SOAK_RESTART_POLICY=unless-stopped`, Docker Compose restarts the soak container after it exits successfully. + +For finite tests, set: + +```bash +SOAK_RESTART_POLICY=no +``` diff --git a/deploy/arm64-installer/docs/troubleshooting.zh-CN.md b/deploy/arm64-installer/docs/troubleshooting.zh-CN.md new file mode 100644 index 0000000..df5edcd --- /dev/null +++ b/deploy/arm64-installer/docs/troubleshooting.zh-CN.md @@ -0,0 +1,68 @@ +# 故障排查 + +## Docker 或 Compose 不可用 + +执行: + +```bash +docker version +docker compose version +``` + +如果缺失,重新执行: + +```bash +./install.sh +``` + +## ARM64 镜像无法运行 + +在 x86_64 机器上运行 ARM64 镜像需要 binfmt/qemu: + +```bash +docker run --rm --privileged tonistiigi/binfmt --install arm64 +docker run --rm --platform linux/arm64 debian:bookworm-slim uname -m +``` + +预期输出 `aarch64`。 + +## 首轮 snapshot 超时 + +all5 snapshot 可能很慢,尤其在 QEMU 环境。可以提高超时: + +```bash +./start.sh --timeout-secs 14400 +``` + +## 产物数量异常偏低 + +检查: + +```bash +grep LIVE_TA_REFRESH_BEFORE_SNAPSHOT .env +ls -l /var/lib/ours-rp-arm64/state/live-ta +tail -100 /var/lib/ours-rp-arm64/logs/live-ta-refresh-*.log +``` + +`file-live-ta` 模式下,snapshot 应等待 live TA 成功刷新。 + +## Grafana 无法登录 + +确认 `.env` 中: + +```bash +GRAFANA_ADMIN_USER=admin +GRAFANA_ADMIN_PASSWORD=admin +``` + +如果曾经启动过 Grafana,修改 `.env` 不一定重置已有 Grafana 数据库账号。可以停止服务后按需备份并清理 `${HOST_DATA_DIR}/grafana`。 + +## 有限轮次验收后又多跑了一轮 + +如果 `.env` 中设置了 `MAX_RUNS=3` 这类有限轮次,同时 `SOAK_RESTART_POLICY=unless-stopped`,Docker Compose 会在 soak 容器正常退出后重新启动容器。 + +有限验收建议设置: + +```bash +SOAK_RESTART_POLICY=no +``` diff --git a/deploy/arm64-installer/install.sh b/deploy/arm64-installer/install.sh new file mode 100755 index 0000000..82cd4e6 --- /dev/null +++ b/deploy/arm64-installer/install.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=scripts/common.sh +source "$SCRIPT_DIR/scripts/common.sh" + +usage() { + cat <<'USAGE' +Usage: ./install.sh [--skip-dep-install] + +Install or update the ours RP ARM64 compose package idempotently. +USAGE +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --skip-dep-install) + export SKIP_DEP_INSTALL=1 + shift + ;; + -h|--help) + usage + exit 0 + ;; + *) + die "unknown option: $1" + ;; + esac +done + +load_env +install_docker_if_missing +require_cmd curl +require_cmd jq +require_cmd rsync +require_cmd gzip +require_cmd tar +create_data_dirs +load_installer_images +ensure_binfmt_if_needed +verify_runtime_image +compose_cmd --profile core --profile sidecar --profile monitor config >/tmp/ours-rp-arm64-compose-config.yml +"$SCRIPT_DIR/self-check.sh" --quick +log "install complete" diff --git a/deploy/arm64-installer/logs.sh b/deploy/arm64-installer/logs.sh new file mode 100755 index 0000000..33975c3 --- /dev/null +++ b/deploy/arm64-installer/logs.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=scripts/common.sh +source "$SCRIPT_DIR/scripts/common.sh" +load_env +compose_cmd --profile core --profile sidecar --profile monitor logs "$@" diff --git a/deploy/arm64-installer/restart.sh b/deploy/arm64-installer/restart.sh new file mode 100755 index 0000000..f9b3669 --- /dev/null +++ b/deploy/arm64-installer/restart.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash +set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +"$SCRIPT_DIR/stop.sh" || true +"$SCRIPT_DIR/start.sh" "$@" diff --git a/deploy/arm64-installer/scripts/common.sh b/deploy/arm64-installer/scripts/common.sh new file mode 100755 index 0000000..ef37af5 --- /dev/null +++ b/deploy/arm64-installer/scripts/common.sh @@ -0,0 +1,245 @@ +#!/usr/bin/env bash +set -euo pipefail + +INSTALLER_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +ENV_FILE="${ENV_FILE:-$INSTALLER_ROOT/.env}" +ENV_EXAMPLE="$INSTALLER_ROOT/.env.example" +COMPOSE_FILE="$INSTALLER_ROOT/compose/docker-compose.yml" + +log() { + printf '[ours-rp-installer] %s\n' "$*" +} + +warn() { + printf '[ours-rp-installer][WARN] %s\n' "$*" >&2 +} + +die() { + printf '[ours-rp-installer][ERROR] %s\n' "$*" >&2 + exit 1 +} + +require_cmd() { + command -v "$1" >/dev/null 2>&1 || die "missing command: $1" +} + +load_env() { + if [[ ! -f "$ENV_FILE" ]]; then + [[ -f "$ENV_EXAMPLE" ]] || die "missing $ENV_EXAMPLE" + cp "$ENV_EXAMPLE" "$ENV_FILE" + log "created .env from .env.example" + fi + set -a + # shellcheck disable=SC1090 + source "$ENV_FILE" + set +a + HOST_DATA_DIR="${HOST_DATA_DIR:-/var/lib/ours-rp-arm64}" + COMPOSE_PROJECT_NAME="${COMPOSE_PROJECT_NAME:-ours-rp-arm64}" + RPKI_IMAGE="${RPKI_IMAGE:-ours-rp-runtime-arm64:dev}" + RPKI_PLATFORM="${RPKI_PLATFORM:-linux/arm64}" + FIRST_RUN_WAIT_TIMEOUT_SECS="${FIRST_RUN_WAIT_TIMEOUT_SECS:-7200}" +} + +compose_cmd() { + docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" -p "${COMPOSE_PROJECT_NAME:-ours-rp-arm64}" "$@" +} + +create_data_dirs() { + load_env + mkdir -p \ + "$HOST_DATA_DIR/state" \ + "$HOST_DATA_DIR/runs" \ + "$HOST_DATA_DIR/logs" \ + "$HOST_DATA_DIR/tmp" \ + "$HOST_DATA_DIR/prometheus" \ + "$HOST_DATA_DIR/grafana" + chmod 755 "$HOST_DATA_DIR" "$HOST_DATA_DIR/state" "$HOST_DATA_DIR/runs" "$HOST_DATA_DIR/logs" "$HOST_DATA_DIR/tmp" || true + chmod 777 "$HOST_DATA_DIR/prometheus" "$HOST_DATA_DIR/grafana" || true +} + +latest_run_dir() { + load_env + find "$HOST_DATA_DIR/runs" -maxdepth 1 -mindepth 1 -type d -name 'run_*' 2>/dev/null | sort | tail -1 +} + +latest_success_run_dir() { + load_env + find "$HOST_DATA_DIR/runs" -maxdepth 2 -type f -path '*/run-summary.json' 2>/dev/null \ + | while read -r summary; do + if jq -e '.status == "success"' "$summary" >/dev/null 2>&1; then + dirname "$summary" + fi + done | sort | tail -1 +} + +has_success_run() { + [[ -n "$(latest_success_run_dir)" ]] +} + +print_run_summary() { + local run_dir="$1" + local summary="$run_dir/run-summary.json" + local meta="$run_dir/run-meta.json" + local timing="$run_dir/stage-timing.json" + local process_time="$run_dir/process-time.txt" + local vrps_file="$run_dir/vrps.csv" + local vaps_file="$run_dir/vaps.csv" + local status="unknown" + local sync_mode="unknown" + local wall_ms="null" + local validation_ms="null" + local repo_sync_ms="null" + local max_rss_kb="null" + local publication_points="null" + local vrps="null" + local vaps="null" + local warnings="null" + [[ -f "$summary" ]] || { + warn "missing run-summary.json in $run_dir" + return 1 + } + status="$(jq -r '.status // "unknown"' "$summary" 2>/dev/null || echo unknown)" + wall_ms="$(jq -r '.wallMs // .wall_ms // "null"' "$summary" 2>/dev/null || echo null)" + warnings="$(jq -r '.warningCount // .warnings // "null"' "$summary" 2>/dev/null || echo null)" + if [[ -f "$meta" ]]; then + sync_mode="$(jq -r '.sync_mode // .syncMode // "unknown"' "$meta" 2>/dev/null || echo unknown)" + status="$(jq -r --arg fallback "$status" '.status // $fallback' "$meta" 2>/dev/null || echo "$status")" + fi + if [[ -f "$timing" ]]; then + validation_ms="$(jq -r '.validation_ms // "null"' "$timing" 2>/dev/null || echo null)" + repo_sync_ms="$(jq -r '.repo_sync_ms_total // "null"' "$timing" 2>/dev/null || echo null)" + publication_points="$(jq -r '.publication_points // "null"' "$timing" 2>/dev/null || echo null)" + fi + if [[ -f "$process_time" ]]; then + max_rss_kb="$(awk -F': ' '/Maximum resident set size/ {print $2; found=1} END {if (!found) print "null"}' "$process_time")" + fi + if [[ -f "$vrps_file" ]]; then + vrps="$(( $(wc -l < "$vrps_file") > 0 ? $(wc -l < "$vrps_file") - 1 : 0 ))" + fi + if [[ -f "$vaps_file" ]]; then + vaps="$(( $(wc -l < "$vaps_file") > 0 ? $(wc -l < "$vaps_file") - 1 : 0 ))" + fi + jq -n \ + --arg run "$(basename "$run_dir")" \ + --arg status "$status" \ + --arg syncMode "$sync_mode" \ + --argjson wallMs "$wall_ms" \ + --argjson validationMs "$validation_ms" \ + --argjson repoSyncMs "$repo_sync_ms" \ + --argjson maxRssKb "$max_rss_kb" \ + --argjson vrps "$vrps" \ + --argjson vaps "$vaps" \ + --argjson publicationPoints "$publication_points" \ + --argjson warnings "$warnings" \ + '{run:$run,status:$status,syncMode:$syncMode,wallMs:$wallMs,validationMs:$validationMs,repoSyncMs:$repoSyncMs,maxRssKb:$maxRssKb,vrps:$vrps,vaps:$vaps,publicationPoints:$publicationPoints,warnings:$warnings}' +} + +wait_for_new_success_run() { + local before_latest="$1" + local timeout_secs="$2" + local start_epoch now run_dir summary meta status meta_status + start_epoch="$(date +%s)" + while true; do + run_dir="$(latest_run_dir || true)" + if [[ -n "$run_dir" && "$run_dir" != "$before_latest" ]]; then + summary="$run_dir/run-summary.json" + meta="$run_dir/run-meta.json" + if [[ -f "$summary" ]]; then + status="$(jq -r '.status // "unknown"' "$summary" 2>/dev/null || echo unknown)" + if [[ "$status" == "success" ]]; then + meta_status="unknown" + if [[ -f "$meta" ]]; then + meta_status="$(jq -r '.status // "unknown"' "$meta" 2>/dev/null || echo unknown)" + fi + if [[ "$meta_status" == "success" ]]; then + print_run_summary "$run_dir" || true + return 0 + fi + fi + if [[ "$status" == "failed" || "$status" == "error" ]]; then + print_run_summary "$run_dir" || true + die "run failed: $run_dir" + fi + fi + fi + now="$(date +%s)" + if (( now - start_epoch > timeout_secs )); then + die "timed out waiting for first successful run after ${timeout_secs}s" + fi + sleep 10 + done +} + +docker_compose_available() { + docker compose version >/dev/null 2>&1 +} + +install_docker_if_missing() { + if command -v docker >/dev/null 2>&1 && docker_compose_available && command -v jq >/dev/null 2>&1 && command -v rsync >/dev/null 2>&1 && command -v curl >/dev/null 2>&1; then + log "docker and docker compose are already installed" + return 0 + fi + if [[ "${SKIP_DEP_INSTALL:-0}" == "1" ]]; then + die "docker/docker compose missing and SKIP_DEP_INSTALL=1" + fi + if ! command -v apt-get >/dev/null 2>&1; then + die "docker/docker compose missing; automatic install currently supports apt-get only" + fi + log "installing missing runtime packages via apt" + apt-get update + DEBIAN_FRONTEND=noninteractive apt-get install -y ca-certificates curl jq rsync gzip tar docker.io + if ! docker_compose_available; then + if apt-cache show docker-compose-v2 >/dev/null 2>&1; then + DEBIAN_FRONTEND=noninteractive apt-get install -y docker-compose-v2 + elif apt-cache show docker-compose-plugin >/dev/null 2>&1; then + DEBIAN_FRONTEND=noninteractive apt-get install -y docker-compose-plugin + elif apt-cache show docker-compose >/dev/null 2>&1; then + DEBIAN_FRONTEND=noninteractive apt-get install -y docker-compose + fi + fi + systemctl enable --now docker >/dev/null 2>&1 || true + docker_compose_available || die "docker compose is still unavailable after install" +} + +load_installer_images() { + require_cmd docker + shopt -s nullglob + local image + local found=0 + for image in "$INSTALLER_ROOT"/images/*.tar "$INSTALLER_ROOT"/images/*.tar.gz; do + found=1 + log "loading docker image: $image" + if [[ "$image" == *.gz ]]; then + gzip -dc "$image" | docker load + else + docker load -i "$image" + fi + done + shopt -u nullglob + (( found == 1 )) || warn "no image tar found under $INSTALLER_ROOT/images" +} + +ensure_binfmt_if_needed() { + require_cmd docker + load_env + local host_arch + host_arch="$(uname -m)" + if [[ "$RPKI_PLATFORM" == "linux/arm64" && "$host_arch" != "aarch64" && "$host_arch" != "arm64" ]]; then + log "host arch is $host_arch; ensuring binfmt/qemu for arm64" + docker run --rm --privileged tonistiigi/binfmt --install arm64 + fi +} + +verify_runtime_image() { + load_env + require_cmd docker + log "verifying runtime image $RPKI_IMAGE on $RPKI_PLATFORM" + docker image inspect "$RPKI_IMAGE" >/dev/null + docker run --rm --platform "$RPKI_PLATFORM" "$RPKI_IMAGE" /opt/ours-rp/bin/rpki --help >/tmp/ours-rp-arm64-rpki-help.txt + head -5 /tmp/ours-rp-arm64-rpki-help.txt || true +} + +endpoint_ok() { + local url="$1" + curl -fsS --max-time 5 "$url" >/dev/null 2>&1 +} diff --git a/deploy/arm64-installer/self-check.sh b/deploy/arm64-installer/self-check.sh new file mode 100755 index 0000000..6f4eab4 --- /dev/null +++ b/deploy/arm64-installer/self-check.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=scripts/common.sh +source "$SCRIPT_DIR/scripts/common.sh" + +QUICK=0 +while [[ $# -gt 0 ]]; do + case "$1" in + --quick) + QUICK=1 + shift + ;; + -h|--help) + echo "Usage: ./self-check.sh [--quick]" + exit 0 + ;; + *) + die "unknown option: $1" + ;; + esac +done + +load_env +require_cmd docker +require_cmd jq +docker compose version >/dev/null +[[ -f "$COMPOSE_FILE" ]] || die "missing compose file" +[[ -f "$ENV_FILE" ]] || die "missing .env" +create_data_dirs +[[ -w "$HOST_DATA_DIR" ]] || die "data dir is not writable: $HOST_DATA_DIR" +compose_cmd --profile core --profile sidecar --profile monitor config >/dev/null +docker image inspect "$RPKI_IMAGE" >/dev/null +if [[ "$QUICK" == "0" ]]; then + verify_runtime_image +fi +log "self-check ok" diff --git a/deploy/arm64-installer/start.sh b/deploy/arm64-installer/start.sh new file mode 100755 index 0000000..4bde6b3 --- /dev/null +++ b/deploy/arm64-installer/start.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash +set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=scripts/common.sh +source "$SCRIPT_DIR/scripts/common.sh" + +WAIT_FIRST_RUN=1 +TIMEOUT_SECS="" + +usage() { + cat <<'USAGE' +Usage: ./start.sh [--no-wait-first-run] [--timeout-secs N] + +Start ours RP. If no successful run exists, wait for the first snapshot to succeed +before starting metrics, Prometheus and Grafana. +USAGE +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --no-wait-first-run) + WAIT_FIRST_RUN=0 + shift + ;; + --timeout-secs) + TIMEOUT_SECS="$2" + shift 2 + ;; + -h|--help) + usage + exit 0 + ;; + *) + die "unknown option: $1" + ;; + esac +done + +load_env +create_data_dirs +timeout_secs="${TIMEOUT_SECS:-$FIRST_RUN_WAIT_TIMEOUT_SECS}" +before_latest="$(latest_run_dir || true)" +had_success=0 +if has_success_run; then + had_success=1 +fi + +log "starting core soak service" +compose_cmd --profile core up -d ours-rp-soak + +if [[ "$had_success" == "0" && "$WAIT_FIRST_RUN" == "1" ]]; then + log "no previous successful run found; waiting for first run timeout=${timeout_secs}s" + wait_for_new_success_run "$before_latest" "$timeout_secs" +fi + +log "starting metrics and monitor services" +compose_cmd --profile sidecar --profile monitor up -d artifact-metrics prometheus grafana +"$SCRIPT_DIR/status.sh" --brief || true diff --git a/deploy/arm64-installer/status.sh b/deploy/arm64-installer/status.sh new file mode 100755 index 0000000..01a1d10 --- /dev/null +++ b/deploy/arm64-installer/status.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash +set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=scripts/common.sh +source "$SCRIPT_DIR/scripts/common.sh" + +BRIEF=0 +while [[ $# -gt 0 ]]; do + case "$1" in + --brief) + BRIEF=1 + shift + ;; + -h|--help) + echo "Usage: ./status.sh [--brief]" + exit 0 + ;; + *) + die "unknown option: $1" + ;; + esac +done + +load_env +echo "installer_root=$INSTALLER_ROOT" +echo "host_data_dir=$HOST_DATA_DIR" +echo "image=$RPKI_IMAGE" +echo "platform=$RPKI_PLATFORM" +echo "rirs=${RIRS:-}" +echo "max_runs=${MAX_RUNS:-}" +echo "interval_secs=${INTERVAL_SECS:-}" +echo +if command -v docker >/dev/null 2>&1; then + docker version --format 'docker={{.Server.Version}}' 2>/dev/null || echo "docker=unavailable" + docker compose version 2>/dev/null || true + compose_cmd --profile core --profile sidecar --profile monitor ps || true +else + echo "docker=missing" +fi +echo +df -h "$HOST_DATA_DIR" 2>/dev/null || true +echo +latest="$(latest_run_dir || true)" +if [[ -n "$latest" ]]; then + echo "latest_run=$latest" + print_run_summary "$latest" || true +else + echo "latest_run=none" +fi +if [[ "$BRIEF" == "0" ]]; then + echo + endpoint_ok "http://127.0.0.1:${METRICS_PORT:-9556}/metrics" && echo "metrics=ok" || echo "metrics=unavailable" + endpoint_ok "http://127.0.0.1:${PROMETHEUS_PORT:-9090}/-/ready" && echo "prometheus=ok" || echo "prometheus=unavailable" + endpoint_ok "http://127.0.0.1:${GRAFANA_PORT:-3000}/api/health" && echo "grafana=ok" || echo "grafana=unavailable" +fi diff --git a/deploy/arm64-installer/stop.sh b/deploy/arm64-installer/stop.sh new file mode 100755 index 0000000..3c37351 --- /dev/null +++ b/deploy/arm64-installer/stop.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=scripts/common.sh +source "$SCRIPT_DIR/scripts/common.sh" +load_env +compose_cmd --profile core --profile sidecar --profile monitor stop "$@" diff --git a/deploy/arm64-installer/uninstall.sh b/deploy/arm64-installer/uninstall.sh new file mode 100755 index 0000000..df344dc --- /dev/null +++ b/deploy/arm64-installer/uninstall.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=scripts/common.sh +source "$SCRIPT_DIR/scripts/common.sh" + +PURGE_DATA=0 +while [[ $# -gt 0 ]]; do + case "$1" in + --purge-data) + PURGE_DATA=1 + shift + ;; + -h|--help) + echo "Usage: ./uninstall.sh [--purge-data]" + exit 0 + ;; + *) + die "unknown option: $1" + ;; + esac +done + +load_env +compose_cmd --profile core --profile sidecar --profile monitor down --remove-orphans || true +if [[ "$PURGE_DATA" == "1" ]]; then + [[ "$HOST_DATA_DIR" == "/" || -z "$HOST_DATA_DIR" ]] && die "refuse to purge unsafe HOST_DATA_DIR=$HOST_DATA_DIR" + rm -rf "$HOST_DATA_DIR" + log "purged data dir $HOST_DATA_DIR" +else + log "containers removed; data kept at $HOST_DATA_DIR" +fi diff --git a/deploy/arm64-installer/upgrade.sh b/deploy/arm64-installer/upgrade.sh new file mode 100755 index 0000000..419e43d --- /dev/null +++ b/deploy/arm64-installer/upgrade.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=scripts/common.sh +source "$SCRIPT_DIR/scripts/common.sh" + +load_env +install_docker_if_missing +load_installer_images +ensure_binfmt_if_needed +verify_runtime_image +compose_cmd --profile core --profile sidecar --profile monitor pull --ignore-pull-failures || true +compose_cmd --profile core --profile sidecar --profile monitor up -d --force-recreate +"$SCRIPT_DIR/status.sh" --brief || true diff --git a/scripts/docker/build_arm64_installer_package.sh b/scripts/docker/build_arm64_installer_package.sh new file mode 100755 index 0000000..90db5a8 --- /dev/null +++ b/scripts/docker/build_arm64_installer_package.sh @@ -0,0 +1,123 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" + +IMAGE_TAG="${IMAGE_TAG:-ours-rp-runtime-arm64:dev}" +IMAGE_TAR="${IMAGE_TAR:-}" +OUT_DIR="${OUT_DIR:-$REPO_ROOT/target/arm64-installer}" +PACKAGE_PREFIX="${PACKAGE_PREFIX:-ours-rp-arm64-installer}" +TEMPLATE_DIR="${TEMPLATE_DIR:-$REPO_ROOT/deploy/arm64-installer}" + +usage() { + cat <<'USAGE' +Usage: + scripts/docker/build_arm64_installer_package.sh [options] + +Options: + --image Runtime image tag recorded in package manifest. + --image-tar Existing docker save tar/tar.gz to include. + --out-dir Output directory. + --prefix Package directory/tar prefix. + -h, --help Show help. + +If --image-tar is omitted, the script uses the newest +target/arm64-docker/*.tar.gz file. +USAGE +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --image) + IMAGE_TAG="$2" + shift 2 + ;; + --image-tar) + IMAGE_TAR="$2" + shift 2 + ;; + --out-dir) + OUT_DIR="$2" + shift 2 + ;; + --prefix) + PACKAGE_PREFIX="$2" + shift 2 + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "unknown option: $1" >&2 + usage >&2 + exit 2 + ;; + esac +done + +[[ -d "$TEMPLATE_DIR" ]] || { + echo "missing template dir: $TEMPLATE_DIR" >&2 + exit 2 +} + +if [[ -z "$IMAGE_TAR" ]]; then + IMAGE_TAR="$(find "$REPO_ROOT/target/arm64-docker" -maxdepth 1 -type f \( -name '*.tar.gz' -o -name '*.tar' \) -printf '%T@ %p\n' 2>/dev/null | sort -nr | awk 'NR==1 {print $2}')" +fi + +[[ -n "$IMAGE_TAR" && -f "$IMAGE_TAR" ]] || { + cat >&2 </dev/null || echo unknown)" +timestamp="$(date -u +%Y%m%dT%H%M%SZ)" +package_name="${PACKAGE_PREFIX}-${timestamp}-${commit}" +stage="$OUT_DIR/$package_name" +tar_path="$OUT_DIR/$package_name.tar.gz" + +rm -rf "$stage" +mkdir -p "$stage/images" +rsync -a --delete "$TEMPLATE_DIR"/ "$stage"/ +cp "$IMAGE_TAR" "$stage/images/" + +if [[ -f "$stage/.env.example" ]]; then + tmp_env="$stage/.env.example.tmp" + awk -v image="$IMAGE_TAG" ' + BEGIN { done=0 } + /^RPKI_IMAGE=/ { print "RPKI_IMAGE=" image; done=1; next } + { print } + END { if (!done) print "RPKI_IMAGE=" image } + ' "$stage/.env.example" > "$tmp_env" + mv "$tmp_env" "$stage/.env.example" +fi + +cat > "$stage/PACKAGE-MANIFEST.env" </dev/null | wc -l | tr -d ' ') +image_tag=$IMAGE_TAG +image_tar=$(basename "$IMAGE_TAR") +image_tar_size_bytes=$(wc -c < "$IMAGE_TAR") +target_platform=linux/arm64 +EOF + +chmod +x "$stage"/*.sh "$stage/scripts"/*.sh +tar -C "$OUT_DIR" -czf "$tar_path" "$package_name" + +{ + echo "package=$tar_path" + echo "package_dir=$stage" + echo "package_size_bytes=$(wc -c < "$tar_path")" + echo "manifest=$stage/PACKAGE-MANIFEST.env" +} > "$OUT_DIR/$package_name.summary.env" + +echo "package built: $tar_path"