20260618 增加远端231原地发布脚本
This commit is contained in:
parent
ad2a25aede
commit
b6344074ce
368
scripts/soak/publish_remote231.sh
Executable file
368
scripts/soak/publish_remote231.sh
Executable file
@ -0,0 +1,368 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
|
||||
REMOTE_HOST="${REMOTE_HOST:-root@47.251.127.231}"
|
||||
REMOTE_ROOT="${REMOTE_ROOT:-/root/rpki_20260608_2_feature062_24h_20260608T075547Z/portable-soak}"
|
||||
PACKAGE_ARCHIVE="${PACKAGE_ARCHIVE:-}"
|
||||
MODE="${MODE:-dry-run}"
|
||||
RESTART_QUERY_SERVICE="${RESTART_QUERY_SERVICE:-0}"
|
||||
QUERY_SERVICE_PID_PATTERN="${QUERY_SERVICE_PID_PATTERN:-rpki_query_service --query-db /root/rpki_20260616_query_service_deploy/query-db}"
|
||||
|
||||
usage() {
|
||||
cat <<'USAGE'
|
||||
Usage:
|
||||
scripts/soak/publish_remote231.sh --package <portable-soak.tar.gz> [--execute] [--remote-root <path>]
|
||||
|
||||
Publishes a new portable soak package to remote231 in place:
|
||||
- stops only the current soak controller/daemon/rpki child under REMOTE_ROOT;
|
||||
- preserves runs/ so run numbering continues;
|
||||
- backs up state/db before replacing binaries/scripts;
|
||||
- moves state/db away and creates a new empty state/db so the next run is snapshot;
|
||||
- leaves metrics/query/prometheus/grafana configuration untouched.
|
||||
|
||||
Default mode is dry-run. Use --execute to apply changes.
|
||||
|
||||
Environment overrides:
|
||||
REMOTE_HOST=root@47.251.127.231
|
||||
REMOTE_ROOT=/root/rpki_20260608_2_feature062_24h_20260608T075547Z/portable-soak
|
||||
RESTART_QUERY_SERVICE=0|1
|
||||
USAGE
|
||||
}
|
||||
|
||||
die() {
|
||||
echo "error: $*" >&2
|
||||
exit 2
|
||||
}
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--package)
|
||||
shift
|
||||
PACKAGE_ARCHIVE="${1:?--package requires a value}"
|
||||
;;
|
||||
--remote-host)
|
||||
shift
|
||||
REMOTE_HOST="${1:?--remote-host requires a value}"
|
||||
;;
|
||||
--remote-root)
|
||||
shift
|
||||
REMOTE_ROOT="${1:?--remote-root requires a value}"
|
||||
;;
|
||||
--execute)
|
||||
MODE="execute"
|
||||
;;
|
||||
--dry-run)
|
||||
MODE="dry-run"
|
||||
;;
|
||||
--restart-query-service)
|
||||
RESTART_QUERY_SERVICE=1
|
||||
;;
|
||||
--help|-h)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
die "unknown argument: $1"
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
[[ -n "$PACKAGE_ARCHIVE" ]] || die "--package is required"
|
||||
[[ -f "$PACKAGE_ARCHIVE" ]] || die "package not found: $PACKAGE_ARCHIVE"
|
||||
case "$MODE" in
|
||||
dry-run|execute) ;;
|
||||
*) die "MODE must be dry-run or execute: $MODE" ;;
|
||||
esac
|
||||
|
||||
command -v ssh >/dev/null 2>&1 || die "ssh is required"
|
||||
command -v scp >/dev/null 2>&1 || die "scp is required"
|
||||
|
||||
REMOTE_STAGE_PARENT="/root/rpki_publish_packages"
|
||||
PACKAGE_BASENAME="$(basename "$PACKAGE_ARCHIVE")"
|
||||
REMOTE_ARCHIVE="$REMOTE_STAGE_PARENT/$PACKAGE_BASENAME"
|
||||
LOCAL_GIT_SHA="$(git -C "$REPO_ROOT" rev-parse --short HEAD 2>/dev/null || printf 'unknown')"
|
||||
LOCAL_GIT_STATUS="$(git -C "$REPO_ROOT" status --short 2>/dev/null || true)"
|
||||
|
||||
echo "publish mode=$MODE remote=$REMOTE_HOST root=$REMOTE_ROOT package=$PACKAGE_ARCHIVE git=$LOCAL_GIT_SHA"
|
||||
if [[ -n "$LOCAL_GIT_STATUS" ]]; then
|
||||
echo "warning: local git worktree is dirty; package manifest should record provenance" >&2
|
||||
fi
|
||||
|
||||
ssh "$REMOTE_HOST" "mkdir -p '$REMOTE_STAGE_PARENT'"
|
||||
scp "$PACKAGE_ARCHIVE" "$REMOTE_HOST:$REMOTE_ARCHIVE"
|
||||
|
||||
REMOTE_SCRIPT="$(cat <<'REMOTE'
|
||||
set -euo pipefail
|
||||
|
||||
remote_root="$1"
|
||||
remote_archive="$2"
|
||||
mode="$3"
|
||||
restart_query_service="$4"
|
||||
query_pattern="$5"
|
||||
|
||||
log() {
|
||||
printf '[publish] %s\n' "$*"
|
||||
}
|
||||
|
||||
run_or_echo() {
|
||||
if [[ "$mode" == "execute" ]]; then
|
||||
"$@"
|
||||
else
|
||||
printf '[dry-run] '
|
||||
printf '%q ' "$@"
|
||||
printf '\n'
|
||||
fi
|
||||
}
|
||||
|
||||
json_get_status() {
|
||||
local path="$1"
|
||||
python3 - "$path" <<'PY'
|
||||
import json, sys
|
||||
try:
|
||||
print(json.load(open(sys.argv[1], encoding="utf-8")).get("status", "missing"))
|
||||
except Exception:
|
||||
print("missing")
|
||||
PY
|
||||
}
|
||||
|
||||
max_run_name() {
|
||||
find "$remote_root/runs" -maxdepth 1 -type d -name 'run_*' -printf '%f\n' 2>/dev/null | sort -V | tail -1
|
||||
}
|
||||
|
||||
max_successful_run_name() {
|
||||
local candidate
|
||||
find "$remote_root/runs" -maxdepth 1 -type d -name 'run_*' -printf '%f\n' 2>/dev/null | sort -V | while read -r candidate; do
|
||||
[[ -n "$candidate" ]] || continue
|
||||
if [[ "$(json_get_status "$remote_root/runs/$candidate/run-meta.json")" == "success" \
|
||||
&& "$(json_get_status "$remote_root/runs/$candidate/run-summary.json")" == "success" ]]; then
|
||||
printf '%s\n' "$candidate"
|
||||
fi
|
||||
done | tail -1
|
||||
}
|
||||
|
||||
wait_no_soak_children() {
|
||||
local deadline=$((SECONDS + 120))
|
||||
while (( SECONDS < deadline )); do
|
||||
if [[ -z "$(matching_pids "$remote_root/(run_soak.sh|bin/rpki_daemon|bin/rpki)( |$)" | head -1)" ]]; then
|
||||
return 0
|
||||
fi
|
||||
sleep 2
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
matching_pids() {
|
||||
local pattern="$1"
|
||||
pgrep -af "$pattern" 2>/dev/null | while IFS= read -r line; do
|
||||
local pid cmd
|
||||
pid="${line%% *}"
|
||||
cmd="${line#* }"
|
||||
[[ "$pid" =~ ^[0-9]+$ ]] || continue
|
||||
[[ "$pid" == "$$" || "$pid" == "$BASHPID" || "$pid" == "${PPID:-}" ]] && continue
|
||||
[[ "$cmd" == *"bash -s --"* && "$cmd" == *"$remote_root"* ]] && continue
|
||||
printf '%s\n' "$pid"
|
||||
done | sort -u
|
||||
}
|
||||
|
||||
terminate_matching() {
|
||||
local signal="$1"
|
||||
local pattern="$2"
|
||||
local -a pids=()
|
||||
mapfile -t pids < <(matching_pids "$pattern")
|
||||
if (( ${#pids[@]} > 0 )); then
|
||||
kill "$signal" "${pids[@]}" >/dev/null 2>&1 || true
|
||||
fi
|
||||
}
|
||||
|
||||
[[ -d "$remote_root" ]] || { echo "remote root not found: $remote_root" >&2; exit 2; }
|
||||
[[ -f "$remote_archive" ]] || { echo "archive not found: $remote_archive" >&2; exit 2; }
|
||||
|
||||
timestamp="$(date -u +%Y%m%dT%H%M%SZ)"
|
||||
latest_run="$(max_run_name || true)"
|
||||
last_successful_run="$(max_successful_run_name || true)"
|
||||
if [[ -z "$last_successful_run" ]]; then
|
||||
echo "no existing runs found under $remote_root/runs" >&2
|
||||
exit 2
|
||||
fi
|
||||
last_run="$last_successful_run"
|
||||
last_status="$(json_get_status "$remote_root/runs/$last_run/run-meta.json")"
|
||||
last_summary_status="$(json_get_status "$remote_root/runs/$last_run/run-summary.json")"
|
||||
next_index=$((10#${last_run#run_} + 1))
|
||||
next_run="$(printf 'run_%04d' "$next_index")"
|
||||
|
||||
backup_root="$remote_root/state/backups/pre_publish_${timestamp}_after_${last_run}"
|
||||
extract_root="$remote_root/state/publish-staging/$timestamp"
|
||||
new_pkg="$extract_root/portable-soak"
|
||||
|
||||
log "latest_run=${latest_run:-none} last_successful_run=$last_run run_meta_status=$last_status run_summary_status=$last_summary_status next_run=$next_run"
|
||||
log "backup_root=$backup_root"
|
||||
log "extract_root=$extract_root"
|
||||
log "mode=$mode"
|
||||
|
||||
if [[ "$last_status" != "success" || "$last_summary_status" != "success" ]]; then
|
||||
echo "last run is not successful; refusing publish: $last_run meta=$last_status summary=$last_summary_status" >&2
|
||||
exit 3
|
||||
fi
|
||||
|
||||
log "current monitored sidecars"
|
||||
pgrep -af 'rpki_artifact_metrics|rpki_query_service|rpki_inter_rp_metrics|prometheus|grafana' || true
|
||||
|
||||
log "current soak processes under root"
|
||||
pgrep -af "$remote_root/(run_soak.sh|bin/rpki_daemon|bin/rpki)( |$)" || true
|
||||
|
||||
if [[ "$mode" == "execute" ]]; then
|
||||
terminate_matching -TERM "$remote_root/bin/rpki "
|
||||
terminate_matching -TERM "$remote_root/bin/rpki_daemon "
|
||||
terminate_matching -TERM "$remote_root/run_soak.sh"
|
||||
if ! wait_no_soak_children; then
|
||||
echo "soak processes did not stop cleanly; forcing kill" >&2
|
||||
terminate_matching -KILL "$remote_root/bin/rpki "
|
||||
terminate_matching -KILL "$remote_root/bin/rpki_daemon "
|
||||
terminate_matching -KILL "$remote_root/run_soak.sh"
|
||||
wait_no_soak_children || { echo "failed to stop soak processes" >&2; exit 4; }
|
||||
fi
|
||||
else
|
||||
log "would stop soak processes under $remote_root only"
|
||||
fi
|
||||
|
||||
latest_run_after_stop="$(max_run_name || true)"
|
||||
last_successful_run_after_stop="$(max_successful_run_name || true)"
|
||||
if [[ -z "$last_successful_run_after_stop" ]]; then
|
||||
echo "no successful run remains after stopping soak" >&2
|
||||
exit 7
|
||||
fi
|
||||
if [[ "$last_successful_run_after_stop" != "$last_run" ]]; then
|
||||
last_run="$last_successful_run_after_stop"
|
||||
last_status="$(json_get_status "$remote_root/runs/$last_run/run-meta.json")"
|
||||
last_summary_status="$(json_get_status "$remote_root/runs/$last_run/run-summary.json")"
|
||||
next_index=$((10#${last_run#run_} + 1))
|
||||
next_run="$(printf 'run_%04d' "$next_index")"
|
||||
backup_root="$remote_root/state/backups/pre_publish_${timestamp}_after_${last_run}"
|
||||
log "recomputed last_successful_run=$last_run next_run=$next_run after stopping soak"
|
||||
fi
|
||||
if [[ -n "${latest_run_after_stop:-}" && "$latest_run_after_stop" != "$last_run" ]]; then
|
||||
log "latest run after stop is incomplete: $latest_run_after_stop; preserving it outside runs/ before publishing"
|
||||
incomplete_dir="$backup_root/incomplete-runs"
|
||||
run_or_echo mkdir -p "$incomplete_dir"
|
||||
latest_index=$((10#${latest_run_after_stop#run_}))
|
||||
stable_index=$((10#${last_run#run_}))
|
||||
for ((idx = stable_index + 1; idx <= latest_index; idx++)); do
|
||||
candidate="$(printf 'run_%04d' "$idx")"
|
||||
if [[ -d "$remote_root/runs/$candidate" ]]; then
|
||||
run_or_echo mv "$remote_root/runs/$candidate" "$incomplete_dir/$candidate"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
run_or_echo mkdir -p "$backup_root" "$extract_root"
|
||||
if [[ "$mode" == "execute" ]]; then
|
||||
tar -C "$extract_root" -xzf "$remote_archive"
|
||||
[[ -x "$new_pkg/bin/rpki" ]] || { echo "extracted package missing bin/rpki" >&2; exit 5; }
|
||||
else
|
||||
log "would extract archive to $extract_root"
|
||||
fi
|
||||
|
||||
if [[ "$mode" == "execute" ]]; then
|
||||
{
|
||||
echo "timestamp_utc=$timestamp"
|
||||
echo "remote_root=$remote_root"
|
||||
echo "remote_archive=$remote_archive"
|
||||
echo "last_run=$last_run"
|
||||
echo "next_run=$next_run"
|
||||
echo "last_status=$last_status"
|
||||
echo "last_summary_status=$last_summary_status"
|
||||
echo "mode=$mode"
|
||||
} > "$backup_root/publish-meta.txt"
|
||||
fi
|
||||
|
||||
if [[ -d "$remote_root/state/db" ]]; then
|
||||
run_or_echo mv "$remote_root/state/db" "$backup_root/db"
|
||||
fi
|
||||
if [[ -d "$remote_root/state/meta" ]]; then
|
||||
run_or_echo cp -a "$remote_root/state/meta" "$backup_root/meta-copy"
|
||||
fi
|
||||
if [[ -f "$remote_root/.env" ]]; then
|
||||
run_or_echo cp -a "$remote_root/.env" "$backup_root/env.before"
|
||||
fi
|
||||
if [[ -d "$remote_root/bin" ]]; then
|
||||
run_or_echo mv "$remote_root/bin" "$backup_root/bin.before"
|
||||
fi
|
||||
|
||||
for path in run_soak.sh run_24h_soak_with_metrics.sh scripts monitor fixtures copied-binaries.txt missing-optional-binaries.txt fixtures.txt scripts.txt manifest.json portable-soak.env.example; do
|
||||
if [[ -e "$new_pkg/$path" ]]; then
|
||||
if [[ -e "$remote_root/$path" ]]; then
|
||||
run_or_echo rm -rf "$remote_root/$path"
|
||||
fi
|
||||
run_or_echo cp -a "$new_pkg/$path" "$remote_root/$path"
|
||||
fi
|
||||
done
|
||||
run_or_echo cp -a "$new_pkg/bin" "$remote_root/bin"
|
||||
|
||||
if [[ -f "$remote_root/.env" ]]; then
|
||||
run_or_echo cp -a "$remote_root/.env" "$backup_root/env.generated_from_package"
|
||||
fi
|
||||
if [[ -f "$backup_root/env.before" ]]; then
|
||||
run_or_echo cp -a "$backup_root/env.before" "$remote_root/.env"
|
||||
fi
|
||||
run_or_echo mkdir -p "$remote_root/state/db" "$remote_root/state/meta" "$remote_root/tmp" "$remote_root/logs" "$remote_root/state/invalid"
|
||||
if [[ -f "$backup_root/meta-copy/last-run-id" ]]; then
|
||||
run_or_echo cp -a "$backup_root/meta-copy/last-run-id" "$remote_root/state/meta/last-run-id"
|
||||
fi
|
||||
|
||||
if [[ "$mode" == "execute" ]]; then
|
||||
chmod +x "$remote_root/run_soak.sh" "$remote_root/run_24h_soak_with_metrics.sh" "$remote_root/bin/"* 2>/dev/null || true
|
||||
nohup bash "$remote_root/run_soak.sh" > "$remote_root/logs/run_soak.publish-${timestamp}.stdout" 2> "$remote_root/logs/run_soak.publish-${timestamp}.stderr" &
|
||||
echo $! > "$remote_root/state/meta/run_soak-pid"
|
||||
sleep 3
|
||||
log "started run_soak pid=$(cat "$remote_root/state/meta/run_soak-pid")"
|
||||
log "startup log"
|
||||
sed -n '1,20p' "$remote_root/logs/run_soak.publish-${timestamp}.stdout" || true
|
||||
if grep -q "starting run ${next_run} sync_mode=snapshot" "$remote_root/logs/run_soak.publish-${timestamp}.stdout"; then
|
||||
log "verified first published run starts as snapshot: $next_run"
|
||||
else
|
||||
echo "failed to verify snapshot startup for $next_run" >&2
|
||||
sed -n '1,40p' "$remote_root/logs/run_soak.publish-${timestamp}.stdout" >&2 || true
|
||||
exit 6
|
||||
fi
|
||||
else
|
||||
log "would start: nohup bash $remote_root/run_soak.sh > $remote_root/logs/run_soak.publish-${timestamp}.stdout 2> $remote_root/logs/run_soak.publish-${timestamp}.stderr &"
|
||||
log "expected first line: run_soak mode=continuous max_existing_run_index=${last_run#run_} next_run=$next_run"
|
||||
log "expected start: starting run $next_run sync_mode=snapshot"
|
||||
fi
|
||||
|
||||
if [[ "$restart_query_service" == "1" ]]; then
|
||||
if [[ "$mode" == "execute" ]]; then
|
||||
if [[ -n "$(matching_pids "$query_pattern" | head -1)" ]]; then
|
||||
terminate_matching -TERM "$query_pattern"
|
||||
sleep 2
|
||||
fi
|
||||
nohup /root/rpki_20260616_query_service_deploy/bin/rpki_query_service \
|
||||
--query-db /root/rpki_20260616_query_service_deploy/query-db \
|
||||
--repo-bytes-db "$remote_root/state/db/repo-bytes.db" \
|
||||
--export-root /root/rpki_20260616_query_service_deploy/query-exports \
|
||||
--listen 0.0.0.0:9560 \
|
||||
--watch-run-root "$remote_root" \
|
||||
--watch-interval-secs 60 \
|
||||
--retain-indexed-runs 10 \
|
||||
--indexer-bin /root/rpki_20260616_query_service_deploy/bin/rpki_query_indexer \
|
||||
> /root/rpki_20260616_query_service_deploy/query-service.publish-${timestamp}.log 2>&1 &
|
||||
log "restarted query service"
|
||||
else
|
||||
log "would restart query service to reopen repo-bytes db"
|
||||
fi
|
||||
else
|
||||
log "query service left unchanged"
|
||||
fi
|
||||
|
||||
log "post-publish sidecars"
|
||||
pgrep -af 'rpki_artifact_metrics|rpki_query_service|rpki_inter_rp_metrics|prometheus|grafana' || true
|
||||
log "df"
|
||||
df -h / /root 2>/dev/null | sort -u || true
|
||||
REMOTE
|
||||
)"
|
||||
|
||||
ssh "$REMOTE_HOST" "bash -s -- '$REMOTE_ROOT' '$REMOTE_ARCHIVE' '$MODE' '$RESTART_QUERY_SERVICE' '$QUERY_SERVICE_PID_PATTERN'" <<< "$REMOTE_SCRIPT"
|
||||
Loading…
x
Reference in New Issue
Block a user