From b6344074ce05ee251b465306775fcc9da9a9b16e Mon Sep 17 00:00:00 2001 From: yuyr Date: Thu, 18 Jun 2026 09:48:18 +0800 Subject: [PATCH] =?UTF-8?q?20260618=20=E5=A2=9E=E5=8A=A0=E8=BF=9C=E7=AB=AF?= =?UTF-8?q?231=E5=8E=9F=E5=9C=B0=E5=8F=91=E5=B8=83=E8=84=9A=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/soak/publish_remote231.sh | 368 ++++++++++++++++++++++++++++++ 1 file changed, 368 insertions(+) create mode 100755 scripts/soak/publish_remote231.sh diff --git a/scripts/soak/publish_remote231.sh b/scripts/soak/publish_remote231.sh new file mode 100755 index 0000000..ba4ba94 --- /dev/null +++ b/scripts/soak/publish_remote231.sh @@ -0,0 +1,368 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" + +REMOTE_HOST="${REMOTE_HOST:-root@47.251.127.231}" +REMOTE_ROOT="${REMOTE_ROOT:-/root/rpki_20260608_2_feature062_24h_20260608T075547Z/portable-soak}" +PACKAGE_ARCHIVE="${PACKAGE_ARCHIVE:-}" +MODE="${MODE:-dry-run}" +RESTART_QUERY_SERVICE="${RESTART_QUERY_SERVICE:-0}" +QUERY_SERVICE_PID_PATTERN="${QUERY_SERVICE_PID_PATTERN:-rpki_query_service --query-db /root/rpki_20260616_query_service_deploy/query-db}" + +usage() { + cat <<'USAGE' +Usage: + scripts/soak/publish_remote231.sh --package [--execute] [--remote-root ] + +Publishes a new portable soak package to remote231 in place: + - stops only the current soak controller/daemon/rpki child under REMOTE_ROOT; + - preserves runs/ so run numbering continues; + - backs up state/db before replacing binaries/scripts; + - moves state/db away and creates a new empty state/db so the next run is snapshot; + - leaves metrics/query/prometheus/grafana configuration untouched. + +Default mode is dry-run. Use --execute to apply changes. + +Environment overrides: + REMOTE_HOST=root@47.251.127.231 + REMOTE_ROOT=/root/rpki_20260608_2_feature062_24h_20260608T075547Z/portable-soak + RESTART_QUERY_SERVICE=0|1 +USAGE +} + +die() { + echo "error: $*" >&2 + exit 2 +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --package) + shift + PACKAGE_ARCHIVE="${1:?--package requires a value}" + ;; + --remote-host) + shift + REMOTE_HOST="${1:?--remote-host requires a value}" + ;; + --remote-root) + shift + REMOTE_ROOT="${1:?--remote-root requires a value}" + ;; + --execute) + MODE="execute" + ;; + --dry-run) + MODE="dry-run" + ;; + --restart-query-service) + RESTART_QUERY_SERVICE=1 + ;; + --help|-h) + usage + exit 0 + ;; + *) + die "unknown argument: $1" + ;; + esac + shift +done + +[[ -n "$PACKAGE_ARCHIVE" ]] || die "--package is required" +[[ -f "$PACKAGE_ARCHIVE" ]] || die "package not found: $PACKAGE_ARCHIVE" +case "$MODE" in + dry-run|execute) ;; + *) die "MODE must be dry-run or execute: $MODE" ;; +esac + +command -v ssh >/dev/null 2>&1 || die "ssh is required" +command -v scp >/dev/null 2>&1 || die "scp is required" + +REMOTE_STAGE_PARENT="/root/rpki_publish_packages" +PACKAGE_BASENAME="$(basename "$PACKAGE_ARCHIVE")" +REMOTE_ARCHIVE="$REMOTE_STAGE_PARENT/$PACKAGE_BASENAME" +LOCAL_GIT_SHA="$(git -C "$REPO_ROOT" rev-parse --short HEAD 2>/dev/null || printf 'unknown')" +LOCAL_GIT_STATUS="$(git -C "$REPO_ROOT" status --short 2>/dev/null || true)" + +echo "publish mode=$MODE remote=$REMOTE_HOST root=$REMOTE_ROOT package=$PACKAGE_ARCHIVE git=$LOCAL_GIT_SHA" +if [[ -n "$LOCAL_GIT_STATUS" ]]; then + echo "warning: local git worktree is dirty; package manifest should record provenance" >&2 +fi + +ssh "$REMOTE_HOST" "mkdir -p '$REMOTE_STAGE_PARENT'" +scp "$PACKAGE_ARCHIVE" "$REMOTE_HOST:$REMOTE_ARCHIVE" + +REMOTE_SCRIPT="$(cat <<'REMOTE' +set -euo pipefail + +remote_root="$1" +remote_archive="$2" +mode="$3" +restart_query_service="$4" +query_pattern="$5" + +log() { + printf '[publish] %s\n' "$*" +} + +run_or_echo() { + if [[ "$mode" == "execute" ]]; then + "$@" + else + printf '[dry-run] ' + printf '%q ' "$@" + printf '\n' + fi +} + +json_get_status() { + local path="$1" + python3 - "$path" <<'PY' +import json, sys +try: + print(json.load(open(sys.argv[1], encoding="utf-8")).get("status", "missing")) +except Exception: + print("missing") +PY +} + +max_run_name() { + find "$remote_root/runs" -maxdepth 1 -type d -name 'run_*' -printf '%f\n' 2>/dev/null | sort -V | tail -1 +} + +max_successful_run_name() { + local candidate + find "$remote_root/runs" -maxdepth 1 -type d -name 'run_*' -printf '%f\n' 2>/dev/null | sort -V | while read -r candidate; do + [[ -n "$candidate" ]] || continue + if [[ "$(json_get_status "$remote_root/runs/$candidate/run-meta.json")" == "success" \ + && "$(json_get_status "$remote_root/runs/$candidate/run-summary.json")" == "success" ]]; then + printf '%s\n' "$candidate" + fi + done | tail -1 +} + +wait_no_soak_children() { + local deadline=$((SECONDS + 120)) + while (( SECONDS < deadline )); do + if [[ -z "$(matching_pids "$remote_root/(run_soak.sh|bin/rpki_daemon|bin/rpki)( |$)" | head -1)" ]]; then + return 0 + fi + sleep 2 + done + return 1 +} + +matching_pids() { + local pattern="$1" + pgrep -af "$pattern" 2>/dev/null | while IFS= read -r line; do + local pid cmd + pid="${line%% *}" + cmd="${line#* }" + [[ "$pid" =~ ^[0-9]+$ ]] || continue + [[ "$pid" == "$$" || "$pid" == "$BASHPID" || "$pid" == "${PPID:-}" ]] && continue + [[ "$cmd" == *"bash -s --"* && "$cmd" == *"$remote_root"* ]] && continue + printf '%s\n' "$pid" + done | sort -u +} + +terminate_matching() { + local signal="$1" + local pattern="$2" + local -a pids=() + mapfile -t pids < <(matching_pids "$pattern") + if (( ${#pids[@]} > 0 )); then + kill "$signal" "${pids[@]}" >/dev/null 2>&1 || true + fi +} + +[[ -d "$remote_root" ]] || { echo "remote root not found: $remote_root" >&2; exit 2; } +[[ -f "$remote_archive" ]] || { echo "archive not found: $remote_archive" >&2; exit 2; } + +timestamp="$(date -u +%Y%m%dT%H%M%SZ)" +latest_run="$(max_run_name || true)" +last_successful_run="$(max_successful_run_name || true)" +if [[ -z "$last_successful_run" ]]; then + echo "no existing runs found under $remote_root/runs" >&2 + exit 2 +fi +last_run="$last_successful_run" +last_status="$(json_get_status "$remote_root/runs/$last_run/run-meta.json")" +last_summary_status="$(json_get_status "$remote_root/runs/$last_run/run-summary.json")" +next_index=$((10#${last_run#run_} + 1)) +next_run="$(printf 'run_%04d' "$next_index")" + +backup_root="$remote_root/state/backups/pre_publish_${timestamp}_after_${last_run}" +extract_root="$remote_root/state/publish-staging/$timestamp" +new_pkg="$extract_root/portable-soak" + +log "latest_run=${latest_run:-none} last_successful_run=$last_run run_meta_status=$last_status run_summary_status=$last_summary_status next_run=$next_run" +log "backup_root=$backup_root" +log "extract_root=$extract_root" +log "mode=$mode" + +if [[ "$last_status" != "success" || "$last_summary_status" != "success" ]]; then + echo "last run is not successful; refusing publish: $last_run meta=$last_status summary=$last_summary_status" >&2 + exit 3 +fi + +log "current monitored sidecars" +pgrep -af 'rpki_artifact_metrics|rpki_query_service|rpki_inter_rp_metrics|prometheus|grafana' || true + +log "current soak processes under root" +pgrep -af "$remote_root/(run_soak.sh|bin/rpki_daemon|bin/rpki)( |$)" || true + +if [[ "$mode" == "execute" ]]; then + terminate_matching -TERM "$remote_root/bin/rpki " + terminate_matching -TERM "$remote_root/bin/rpki_daemon " + terminate_matching -TERM "$remote_root/run_soak.sh" + if ! wait_no_soak_children; then + echo "soak processes did not stop cleanly; forcing kill" >&2 + terminate_matching -KILL "$remote_root/bin/rpki " + terminate_matching -KILL "$remote_root/bin/rpki_daemon " + terminate_matching -KILL "$remote_root/run_soak.sh" + wait_no_soak_children || { echo "failed to stop soak processes" >&2; exit 4; } + fi +else + log "would stop soak processes under $remote_root only" +fi + +latest_run_after_stop="$(max_run_name || true)" +last_successful_run_after_stop="$(max_successful_run_name || true)" +if [[ -z "$last_successful_run_after_stop" ]]; then + echo "no successful run remains after stopping soak" >&2 + exit 7 +fi +if [[ "$last_successful_run_after_stop" != "$last_run" ]]; then + last_run="$last_successful_run_after_stop" + last_status="$(json_get_status "$remote_root/runs/$last_run/run-meta.json")" + last_summary_status="$(json_get_status "$remote_root/runs/$last_run/run-summary.json")" + next_index=$((10#${last_run#run_} + 1)) + next_run="$(printf 'run_%04d' "$next_index")" + backup_root="$remote_root/state/backups/pre_publish_${timestamp}_after_${last_run}" + log "recomputed last_successful_run=$last_run next_run=$next_run after stopping soak" +fi +if [[ -n "${latest_run_after_stop:-}" && "$latest_run_after_stop" != "$last_run" ]]; then + log "latest run after stop is incomplete: $latest_run_after_stop; preserving it outside runs/ before publishing" + incomplete_dir="$backup_root/incomplete-runs" + run_or_echo mkdir -p "$incomplete_dir" + latest_index=$((10#${latest_run_after_stop#run_})) + stable_index=$((10#${last_run#run_})) + for ((idx = stable_index + 1; idx <= latest_index; idx++)); do + candidate="$(printf 'run_%04d' "$idx")" + if [[ -d "$remote_root/runs/$candidate" ]]; then + run_or_echo mv "$remote_root/runs/$candidate" "$incomplete_dir/$candidate" + fi + done +fi + +run_or_echo mkdir -p "$backup_root" "$extract_root" +if [[ "$mode" == "execute" ]]; then + tar -C "$extract_root" -xzf "$remote_archive" + [[ -x "$new_pkg/bin/rpki" ]] || { echo "extracted package missing bin/rpki" >&2; exit 5; } +else + log "would extract archive to $extract_root" +fi + +if [[ "$mode" == "execute" ]]; then + { + echo "timestamp_utc=$timestamp" + echo "remote_root=$remote_root" + echo "remote_archive=$remote_archive" + echo "last_run=$last_run" + echo "next_run=$next_run" + echo "last_status=$last_status" + echo "last_summary_status=$last_summary_status" + echo "mode=$mode" + } > "$backup_root/publish-meta.txt" +fi + +if [[ -d "$remote_root/state/db" ]]; then + run_or_echo mv "$remote_root/state/db" "$backup_root/db" +fi +if [[ -d "$remote_root/state/meta" ]]; then + run_or_echo cp -a "$remote_root/state/meta" "$backup_root/meta-copy" +fi +if [[ -f "$remote_root/.env" ]]; then + run_or_echo cp -a "$remote_root/.env" "$backup_root/env.before" +fi +if [[ -d "$remote_root/bin" ]]; then + run_or_echo mv "$remote_root/bin" "$backup_root/bin.before" +fi + +for path in run_soak.sh run_24h_soak_with_metrics.sh scripts monitor fixtures copied-binaries.txt missing-optional-binaries.txt fixtures.txt scripts.txt manifest.json portable-soak.env.example; do + if [[ -e "$new_pkg/$path" ]]; then + if [[ -e "$remote_root/$path" ]]; then + run_or_echo rm -rf "$remote_root/$path" + fi + run_or_echo cp -a "$new_pkg/$path" "$remote_root/$path" + fi +done +run_or_echo cp -a "$new_pkg/bin" "$remote_root/bin" + +if [[ -f "$remote_root/.env" ]]; then + run_or_echo cp -a "$remote_root/.env" "$backup_root/env.generated_from_package" +fi +if [[ -f "$backup_root/env.before" ]]; then + run_or_echo cp -a "$backup_root/env.before" "$remote_root/.env" +fi +run_or_echo mkdir -p "$remote_root/state/db" "$remote_root/state/meta" "$remote_root/tmp" "$remote_root/logs" "$remote_root/state/invalid" +if [[ -f "$backup_root/meta-copy/last-run-id" ]]; then + run_or_echo cp -a "$backup_root/meta-copy/last-run-id" "$remote_root/state/meta/last-run-id" +fi + +if [[ "$mode" == "execute" ]]; then + chmod +x "$remote_root/run_soak.sh" "$remote_root/run_24h_soak_with_metrics.sh" "$remote_root/bin/"* 2>/dev/null || true + nohup bash "$remote_root/run_soak.sh" > "$remote_root/logs/run_soak.publish-${timestamp}.stdout" 2> "$remote_root/logs/run_soak.publish-${timestamp}.stderr" & + echo $! > "$remote_root/state/meta/run_soak-pid" + sleep 3 + log "started run_soak pid=$(cat "$remote_root/state/meta/run_soak-pid")" + log "startup log" + sed -n '1,20p' "$remote_root/logs/run_soak.publish-${timestamp}.stdout" || true + if grep -q "starting run ${next_run} sync_mode=snapshot" "$remote_root/logs/run_soak.publish-${timestamp}.stdout"; then + log "verified first published run starts as snapshot: $next_run" + else + echo "failed to verify snapshot startup for $next_run" >&2 + sed -n '1,40p' "$remote_root/logs/run_soak.publish-${timestamp}.stdout" >&2 || true + exit 6 + fi +else + log "would start: nohup bash $remote_root/run_soak.sh > $remote_root/logs/run_soak.publish-${timestamp}.stdout 2> $remote_root/logs/run_soak.publish-${timestamp}.stderr &" + log "expected first line: run_soak mode=continuous max_existing_run_index=${last_run#run_} next_run=$next_run" + log "expected start: starting run $next_run sync_mode=snapshot" +fi + +if [[ "$restart_query_service" == "1" ]]; then + if [[ "$mode" == "execute" ]]; then + if [[ -n "$(matching_pids "$query_pattern" | head -1)" ]]; then + terminate_matching -TERM "$query_pattern" + sleep 2 + fi + nohup /root/rpki_20260616_query_service_deploy/bin/rpki_query_service \ + --query-db /root/rpki_20260616_query_service_deploy/query-db \ + --repo-bytes-db "$remote_root/state/db/repo-bytes.db" \ + --export-root /root/rpki_20260616_query_service_deploy/query-exports \ + --listen 0.0.0.0:9560 \ + --watch-run-root "$remote_root" \ + --watch-interval-secs 60 \ + --retain-indexed-runs 10 \ + --indexer-bin /root/rpki_20260616_query_service_deploy/bin/rpki_query_indexer \ + > /root/rpki_20260616_query_service_deploy/query-service.publish-${timestamp}.log 2>&1 & + log "restarted query service" + else + log "would restart query service to reopen repo-bytes db" + fi +else + log "query service left unchanged" +fi + +log "post-publish sidecars" +pgrep -af 'rpki_artifact_metrics|rpki_query_service|rpki_inter_rp_metrics|prometheus|grafana' || true +log "df" +df -h / /root 2>/dev/null | sort -u || true +REMOTE +)" + +ssh "$REMOTE_HOST" "bash -s -- '$REMOTE_ROOT' '$REMOTE_ARCHIVE' '$MODE' '$RESTART_QUERY_SERVICE' '$QUERY_SERVICE_PID_PATTERN'" <<< "$REMOTE_SCRIPT"