From fe8d4fcb144980272af4e01a5f07825ceb8a02e3 Mon Sep 17 00:00:00 2001 From: yuyr Date: Mon, 29 Jun 2026 16:47:47 +0800 Subject: [PATCH] add periodic snapshot reset --- deploy/arm64-installer/.env.example | 6 + deploy/arm64-installer/docs/README.en.md | 27 +++ deploy/arm64-installer/docs/README.zh-CN.md | 27 +++ deploy/arm64-installer/docs/operations.en.md | 38 +++- .../arm64-installer/docs/operations.zh-CN.md | 38 +++- .../docs/troubleshooting.en.md | 15 ++ .../docs/troubleshooting.zh-CN.md | 15 ++ deploy/arm64-installer/status.sh | 2 + deploy/arm64-installer/upgrade.sh | 35 ++++ scripts/soak/run_soak.sh | 179 +++++++++++++++++- 10 files changed, 371 insertions(+), 11 deletions(-) diff --git a/deploy/arm64-installer/.env.example b/deploy/arm64-installer/.env.example index 726d580..20f903a 100644 --- a/deploy/arm64-installer/.env.example +++ b/deploy/arm64-installer/.env.example @@ -42,6 +42,12 @@ CLEAN_TMP_AFTER_RUN=0 OUTPUT_COMPACT_REPORT=1 ALLOW_RSYNC_MIRROR_REUSE=1 FAILURE_SNAPSHOT_RESET=1 +# Periodic snapshot reset of active state DB. +# 0: keep existing behavior. +# 1: after one successful snapshot, allow at most N successful delta runs; +# the next run is forced to snapshot and active state/db is rebuilt from empty. +PERIODIC_SNAPSHOT_RESET=0 +PERIODIC_SNAPSHOT_MAX_DELTAS=100 DB_STATS_EXACT_EVERY=0 # Validation and performance options aligned with current optimized soak defaults. diff --git a/deploy/arm64-installer/docs/README.en.md b/deploy/arm64-installer/docs/README.en.md index 1276872..f65d0e9 100644 --- a/deploy/arm64-installer/docs/README.en.md +++ b/deploy/arm64-installer/docs/README.en.md @@ -26,6 +26,8 @@ Defaults: - `INTERVAL_SECS=600` - `TAL_INPUT_MODE=file-live-ta` - `LIVE_TA_REFRESH_BEFORE_SNAPSHOT=1` +- `PERIODIC_SNAPSHOT_RESET=0` +- `PERIODIC_SNAPSHOT_MAX_DELTAS=100` - `HOST_DATA_DIR=/var/lib/ours-rp-arm64` - `SOAK_RESTART_POLICY=unless-stopped` - `MONITOR_PLATFORM=linux/arm64` @@ -69,6 +71,31 @@ Default host directory: Each `runs/run_XXXX/` directory contains `report.json`, `result.ccr`, `input.cir`, `vrps.csv`, `vaps.csv`, `stage-timing.json`, logs and metadata. +## Periodic Snapshot Reset + +New knobs: + +```bash +PERIODIC_SNAPSHOT_RESET=0 +PERIODIC_SNAPSHOT_MAX_DELTAS=100 +``` + +Semantics: + +- disabled by default, keeping previous behavior unchanged; +- when enabled, one successful snapshot is followed by at most `N` successful delta runs; +- after the threshold is reached, the next run is forced to snapshot; +- before that forced snapshot, only the active `state/db` is reset, while `runs/`, `logs/`, `state/rsync-mirror`, `.env`, and Prometheus/Grafana data are preserved; +- after a successful forced snapshot, the old DB staging is deleted so disk usage does not keep growing elsewhere. + +Check the latest `run-meta.json` for: + +- `sync_mode` +- `snapshot_reason` +- `periodic_snapshot_delta_count` +- `periodic_snapshot_forced` +- `reset_db_cleanup_status` + ## Common Commands ```bash diff --git a/deploy/arm64-installer/docs/README.zh-CN.md b/deploy/arm64-installer/docs/README.zh-CN.md index 2ff75cc..28a4bb9 100644 --- a/deploy/arm64-installer/docs/README.zh-CN.md +++ b/deploy/arm64-installer/docs/README.zh-CN.md @@ -26,6 +26,8 @@ vim .env - `INTERVAL_SECS=600` - `TAL_INPUT_MODE=file-live-ta` - `LIVE_TA_REFRESH_BEFORE_SNAPSHOT=1` +- `PERIODIC_SNAPSHOT_RESET=0` +- `PERIODIC_SNAPSHOT_MAX_DELTAS=100` - `HOST_DATA_DIR=/var/lib/ours-rp-arm64` - `SOAK_RESTART_POLICY=unless-stopped` - `MONITOR_PLATFORM=linux/arm64` @@ -69,6 +71,31 @@ GRAFANA_ADMIN_PASSWORD=admin `runs/run_XXXX/` 中包含每轮 `report.json`、`result.ccr`、`input.cir`、`vrps.csv`、`vaps.csv`、`stage-timing.json`、日志和元数据。 +## 定期 snapshot reset + +新增配置: + +```bash +PERIODIC_SNAPSHOT_RESET=0 +PERIODIC_SNAPSHOT_MAX_DELTAS=100 +``` + +语义: + +- 默认关闭,行为与旧版本一致; +- 开启后,一次成功 snapshot 后最多连续执行 `N` 个成功 delta; +- 达到阈值后,下一轮强制跑 snapshot; +- 强制 snapshot 前只重置 active `state/db`,保留 `runs/`、`logs/`、`state/rsync-mirror`、`.env`、Prometheus/Grafana 数据; +- 强制 snapshot 成功后旧 DB staging 会被删除,避免磁盘只是换目录继续增长。 + +可通过最新 `run-meta.json` 中的以下字段确认: + +- `sync_mode` +- `snapshot_reason` +- `periodic_snapshot_delta_count` +- `periodic_snapshot_forced` +- `reset_db_cleanup_status` + ## 常用命令 ```bash diff --git a/deploy/arm64-installer/docs/operations.en.md b/deploy/arm64-installer/docs/operations.en.md index 4e4aa22..0035129 100644 --- a/deploy/arm64-installer/docs/operations.en.md +++ b/deploy/arm64-installer/docs/operations.en.md @@ -48,15 +48,47 @@ Important checks: - latest run status; - metrics, Prometheus and Grafana endpoints. +`status.sh` also prints: + +- `periodic_snapshot_reset` +- `periodic_snapshot_max_deltas` + ## Upgrade -Extract the new package, reuse the existing `.env` and `HOST_DATA_DIR`, then run: +Extract the new package into a new directory and explicitly reuse the existing `.env` through the upgrade script: ```bash -./upgrade.sh +./upgrade.sh --reuse-env-from /path/to/old-installer/.env ``` -Upgrade does not delete runtime data. +If the new package directory already has a `.env`, the upgrade script keeps it. + +Upgrade does not delete: + +- `runs/` +- `logs/` +- `state/rsync-mirror` +- runtime configuration referenced by `.env` +- Prometheus / Grafana data + +To validate periodic forced snapshot behavior, temporarily set: + +```bash +PERIODIC_SNAPSHOT_RESET=1 +PERIODIC_SNAPSHOT_MAX_DELTAS=2 +``` + +Then confirm the latest `run-meta.json` contains: + +```bash +snapshot_reason=periodic_snapshot_delta_limit +``` + +After validation, restore: + +```bash +PERIODIC_SNAPSHOT_MAX_DELTAS=100 +``` ## Cleanup diff --git a/deploy/arm64-installer/docs/operations.zh-CN.md b/deploy/arm64-installer/docs/operations.zh-CN.md index ad1b9f0..a561deb 100644 --- a/deploy/arm64-installer/docs/operations.zh-CN.md +++ b/deploy/arm64-installer/docs/operations.zh-CN.md @@ -48,15 +48,47 @@ - 最新 run 状态; - metrics、Prometheus、Grafana endpoint。 +`status.sh` 还会显示: + +- `periodic_snapshot_reset` +- `periodic_snapshot_max_deltas` + ## 升级 -把新安装包解压到新目录后,可以复用旧 `.env` 和 `HOST_DATA_DIR`。执行: +把新安装包解压到新目录后,推荐通过升级脚本显式复用旧 `.env`: ```bash -./upgrade.sh +./upgrade.sh --reuse-env-from /path/to/old-installer/.env ``` -升级不会删除运行数据。 +如果新目录已经存在 `.env`,升级脚本会保留它,不覆盖。 + +升级不会删除以下数据: + +- `runs/` +- `logs/` +- `state/rsync-mirror` +- `.env` 对应的运行配置 +- Prometheus / Grafana 数据 + +验证定期 forced snapshot 时,可临时设置: + +```bash +PERIODIC_SNAPSHOT_RESET=1 +PERIODIC_SNAPSHOT_MAX_DELTAS=2 +``` + +然后检查最新 `run-meta.json` 应出现: + +```bash +snapshot_reason=periodic_snapshot_delta_limit +``` + +验证完成后恢复: + +```bash +PERIODIC_SNAPSHOT_MAX_DELTAS=100 +``` ## 清理 diff --git a/deploy/arm64-installer/docs/troubleshooting.en.md b/deploy/arm64-installer/docs/troubleshooting.en.md index 8299d80..2a11748 100644 --- a/deploy/arm64-installer/docs/troubleshooting.en.md +++ b/deploy/arm64-installer/docs/troubleshooting.en.md @@ -66,3 +66,18 @@ For finite tests, set: ```bash SOAK_RESTART_POLICY=no ``` + +## How to Confirm a Periodic Forced Snapshot + +Check the latest run metadata: + +```bash +latest="$(find ${HOST_DATA_DIR}/runs -maxdepth 1 -type d -name 'run_*' | sort | tail -1)" +jq '{run_id,sync_mode,snapshot_reason,periodic_snapshot_delta_count,periodic_snapshot_forced,reset_db_cleanup_status}' "$latest/run-meta.json" +``` + +For a threshold-triggered reset you should see: + +- `sync_mode: "snapshot"` +- `snapshot_reason: "periodic_snapshot_delta_limit"` +- `periodic_snapshot_forced: true` diff --git a/deploy/arm64-installer/docs/troubleshooting.zh-CN.md b/deploy/arm64-installer/docs/troubleshooting.zh-CN.md index df5edcd..9b48a83 100644 --- a/deploy/arm64-installer/docs/troubleshooting.zh-CN.md +++ b/deploy/arm64-installer/docs/troubleshooting.zh-CN.md @@ -66,3 +66,18 @@ GRAFANA_ADMIN_PASSWORD=admin ```bash SOAK_RESTART_POLICY=no ``` + +## 如何确认触发了定期 forced snapshot + +检查最新 run metadata: + +```bash +latest="$(find ${HOST_DATA_DIR}/runs -maxdepth 1 -type d -name 'run_*' | sort | tail -1)" +jq '{run_id,sync_mode,snapshot_reason,periodic_snapshot_delta_count,periodic_snapshot_forced,reset_db_cleanup_status}' "$latest/run-meta.json" +``` + +阈值触发时应看到: + +- `sync_mode: "snapshot"` +- `snapshot_reason: "periodic_snapshot_delta_limit"` +- `periodic_snapshot_forced: true` diff --git a/deploy/arm64-installer/status.sh b/deploy/arm64-installer/status.sh index 01a1d10..890e822 100755 --- a/deploy/arm64-installer/status.sh +++ b/deploy/arm64-installer/status.sh @@ -29,6 +29,8 @@ echo "platform=$RPKI_PLATFORM" echo "rirs=${RIRS:-}" echo "max_runs=${MAX_RUNS:-}" echo "interval_secs=${INTERVAL_SECS:-}" +echo "periodic_snapshot_reset=${PERIODIC_SNAPSHOT_RESET:-0}" +echo "periodic_snapshot_max_deltas=${PERIODIC_SNAPSHOT_MAX_DELTAS:-100}" echo if command -v docker >/dev/null 2>&1; then docker version --format 'docker={{.Server.Version}}' 2>/dev/null || echo "docker=unavailable" diff --git a/deploy/arm64-installer/upgrade.sh b/deploy/arm64-installer/upgrade.sh index 1e4220a..dee59f9 100755 --- a/deploy/arm64-installer/upgrade.sh +++ b/deploy/arm64-installer/upgrade.sh @@ -4,7 +4,42 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" # shellcheck source=scripts/common.sh source "$SCRIPT_DIR/scripts/common.sh" +REUSE_ENV_FROM="" + +usage() { + cat <<'USAGE' +Usage: ./upgrade.sh [--reuse-env-from /path/to/.env] +USAGE +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --reuse-env-from) + REUSE_ENV_FROM="$2" + shift 2 + ;; + -h|--help) + usage + exit 0 + ;; + *) + die "unknown option: $1" + ;; + esac +done + +if [[ -n "$REUSE_ENV_FROM" ]]; then + [[ -f "$REUSE_ENV_FROM" ]] || die "missing reuse env file: $REUSE_ENV_FROM" + if [[ ! -f "$ENV_FILE" ]]; then + cp "$REUSE_ENV_FROM" "$ENV_FILE" + log "copied existing env into new package: $REUSE_ENV_FROM -> $ENV_FILE" + else + log "keeping existing env at $ENV_FILE; reuse source ignored: $REUSE_ENV_FROM" + fi +fi + load_env +create_data_dirs install_docker_if_missing load_installer_images ensure_binfmt_if_needed diff --git a/scripts/soak/run_soak.sh b/scripts/soak/run_soak.sh index 94aaa5c..2ec5388 100755 --- a/scripts/soak/run_soak.sh +++ b/scripts/soak/run_soak.sh @@ -22,6 +22,8 @@ OUTPUT_COMPACT_REPORT="${OUTPUT_COMPACT_REPORT:-1}" ALLOW_RSYNC_MIRROR_REUSE="${ALLOW_RSYNC_MIRROR_REUSE:-1}" RSYNC_SCOPE="${RSYNC_SCOPE:-module-root}" FAILURE_SNAPSHOT_RESET="${FAILURE_SNAPSHOT_RESET:-1}" +PERIODIC_SNAPSHOT_RESET="${PERIODIC_SNAPSHOT_RESET:-0}" +PERIODIC_SNAPSHOT_MAX_DELTAS="${PERIODIC_SNAPSHOT_MAX_DELTAS:-100}" DB_STATS_EXACT_EVERY="${DB_STATS_EXACT_EVERY:-3}" RPKI_PROGRESS_LOG="${RPKI_PROGRESS_LOG:-1}" RPKI_PROGRESS_SLOW_SECS="${RPKI_PROGRESS_SLOW_SECS:-10}" @@ -44,6 +46,7 @@ META_DIR="${META_DIR:-$STATE_ROOT/meta}" TMP_DIR="${TMP_DIR:-$RUN_ROOT/tmp}" RSYNC_MIRROR_ROOT="${RSYNC_MIRROR_ROOT:-$STATE_ROOT/rsync-mirror}" INVALID_ROOT="$STATE_ROOT/invalid" +RESET_STAGING_ROOT="$STATE_ROOT/reset-staging" LIVE_TA_REFRESH_DIR="${LIVE_TA_REFRESH_DIR:-$META_DIR/live-ta-refresh}" LIVE_TA_REFRESH_CONNECT_TIMEOUT_SECS="${LIVE_TA_REFRESH_CONNECT_TIMEOUT_SECS:-15}" LIVE_TA_REFRESH_MAX_TIME_SECS="${LIVE_TA_REFRESH_MAX_TIME_SECS:-120}" @@ -67,6 +70,10 @@ die() { exit 2 } +warn() { + echo "warning: $*" >&2 +} + is_true() { case "${1:-}" in 1|true|TRUE|yes|YES|on|ON) return 0 ;; @@ -462,6 +469,10 @@ db_state_exists() { [[ -e "$DB_DIR/work-db" || -e "$DB_DIR/repo-bytes.db" ]] } +delta_state_available() { + [[ -e "$DB_DIR/work-db" ]] +} + isolate_state_after_failure() { local previous_run_id="$1" local timestamp @@ -477,6 +488,97 @@ isolate_state_after_failure() { INVALID_TMP_PATH="$invalid_dir/$(basename "$TMP_DIR")" } +periodic_snapshot_delta_scan() { + python3 - "$RUNS_ROOT" <<'PY' +import json +import pathlib +import sys + +runs_root = pathlib.Path(sys.argv[1]) +delta_count = 0 +run_dirs = sorted( + [path for path in runs_root.glob("run_[0-9][0-9][0-9][0-9]") if path.is_dir()], + reverse=True, +) +for run_dir in run_dirs: + meta_path = run_dir / "run-meta.json" + summary_path = run_dir / "run-summary.json" + try: + with meta_path.open("r", encoding="utf-8") as handle: + meta = json.load(handle) + with summary_path.open("r", encoding="utf-8") as handle: + summary = json.load(handle) + except Exception as exc: + print(f"error\t{delta_count}\t{run_dir.name}\tjson_parse:{exc.__class__.__name__}") + sys.exit(0) + if meta.get("status") != "success" or summary.get("status") != "success": + continue + sync_mode = meta.get("sync_mode") or meta.get("syncMode") + if sync_mode == "delta": + delta_count += 1 + continue + if sync_mode == "snapshot": + print(f"ok\t{delta_count}\t{run_dir.name}\t") + sys.exit(0) + print(f"error\t{delta_count}\t{run_dir.name}\tmissing_sync_mode") + sys.exit(0) +print(f"error\t{delta_count}\t\tmissing_success_snapshot") +PY +} + +periodic_snapshot_force_needed() { + PERIODIC_SCAN_STATUS="" + PERIODIC_SCAN_DELTA_COUNT="" + PERIODIC_SCAN_SNAPSHOT_RUN_ID="" + PERIODIC_SCAN_DETAIL="" + local scan_output + scan_output="$(periodic_snapshot_delta_scan)" + IFS=$'\t' read -r PERIODIC_SCAN_STATUS PERIODIC_SCAN_DELTA_COUNT PERIODIC_SCAN_SNAPSHOT_RUN_ID PERIODIC_SCAN_DETAIL <<< "$scan_output" + if [[ "$PERIODIC_SCAN_STATUS" != "ok" ]]; then + warn "periodic snapshot reset scan skipped status=${PERIODIC_SCAN_STATUS:-missing} snapshot_run=${PERIODIC_SCAN_SNAPSHOT_RUN_ID:-none} detail=${PERIODIC_SCAN_DETAIL:-unknown}" + return 1 + fi + [[ -n "$PERIODIC_SCAN_DELTA_COUNT" ]] || PERIODIC_SCAN_DELTA_COUNT="0" + (( PERIODIC_SCAN_DELTA_COUNT >= PERIODIC_SNAPSHOT_MAX_DELTAS )) +} + +prepare_periodic_reset_state_db() { + local run_id="$1" + RESET_DB_STAGING_PATH="" + RESET_DB_CLEANUP_STATUS="" + db_state_exists || return 0 + local timestamp + local staging_root + timestamp="$(date -u +%Y%m%dT%H%M%SZ)" + staging_root="$RESET_STAGING_ROOT/${run_id}-${timestamp}" + mkdir -p "$staging_root" + mv "$DB_DIR" "$staging_root/" + mkdir -p "$DB_DIR" + RESET_DB_STAGING_PATH="$staging_root/$(basename "$DB_DIR")" + RESET_DB_CLEANUP_STATUS="pending" +} + +finalize_periodic_reset_state_db() { + local final_status="$1" + local reset_db_staging_path="$2" + [[ -n "$reset_db_staging_path" ]] || { + printf '%s\n' "" + return 0 + } + local staging_root + staging_root="$(dirname "$reset_db_staging_path")" + if [[ "$final_status" == "success" ]]; then + if rm -rf "$staging_root"; then + printf '%s\n' "deleted" + return 0 + fi + warn "failed to delete periodic reset staging: $staging_root" + printf '%s\n' "cleanup_failed" + return 1 + fi + printf '%s\n' "retained_failure" +} + write_run_meta() { local output_path="$1" local status="$2" @@ -494,10 +596,18 @@ write_run_meta() { local daemon_exit_code="${14}" local package_root="${15}" local env_file="${16}" + local periodic_snapshot_reset_enabled="${17}" + local periodic_snapshot_max_deltas="${18}" + local periodic_snapshot_delta_count="${19}" + local periodic_snapshot_forced="${20}" + local reset_db_staging_path="${21}" + local reset_db_cleanup_status="${22}" python3 - "$output_path" "$status" "$run_index" "$run_id" "$sync_mode" "$snapshot_reason" \ "$previous_run_id" "$previous_run_success_value" "$started_at" "$completed_at" \ "$invalid_db_path" "$invalid_state_path" "$invalid_tmp_path" "$daemon_exit_code" \ - "$package_root" "$env_file" <<'PY' + "$package_root" "$env_file" "$periodic_snapshot_reset_enabled" \ + "$periodic_snapshot_max_deltas" "$periodic_snapshot_delta_count" \ + "$periodic_snapshot_forced" "$reset_db_staging_path" "$reset_db_cleanup_status" <<'PY' import json import sys @@ -514,6 +624,9 @@ def nullable_int(value): return None return int(value) +def bool_value(value): + return value == "true" + ( output_path, status, @@ -531,6 +644,12 @@ def nullable_int(value): daemon_exit_code, package_root, env_file, + periodic_snapshot_reset_enabled, + periodic_snapshot_max_deltas, + periodic_snapshot_delta_count, + periodic_snapshot_forced, + reset_db_staging_path, + reset_db_cleanup_status, ) = sys.argv[1:] data = { @@ -549,6 +668,12 @@ data = { "daemon_exit_code": nullable_int(daemon_exit_code), "package_root": package_root, "env_file": env_file, + "periodic_snapshot_reset_enabled": bool_value(periodic_snapshot_reset_enabled), + "periodic_snapshot_max_deltas": int(periodic_snapshot_max_deltas), + "periodic_snapshot_delta_count": nullable_int(periodic_snapshot_delta_count), + "periodic_snapshot_forced": bool_value(periodic_snapshot_forced), + "reset_db_staging_path": nullable(reset_db_staging_path), + "reset_db_cleanup_status": nullable(reset_db_cleanup_status), } with open(output_path, "w", encoding="utf-8") as handle: json.dump(data, handle, indent=2, sort_keys=True) @@ -796,7 +921,9 @@ run_one_round() { started_at="$(date -u +%Y-%m-%dT%H:%M:%SZ)" write_run_meta "$run_dir/run-meta.json" "running" "$run_index" "$run_id" "$sync_mode" \ "$snapshot_reason" "$previous_run_id" "$previous_success_value" "$started_at" "" \ - "$INVALID_DB_PATH" "$INVALID_STATE_PATH" "$INVALID_TMP_PATH" "" "$PACKAGE_ROOT" "$ENV_FILE" + "$INVALID_DB_PATH" "$INVALID_STATE_PATH" "$INVALID_TMP_PATH" "" "$PACKAGE_ROOT" "$ENV_FILE" \ + "$RUN_META_PERIODIC_ENABLED" "$RUN_META_PERIODIC_MAX_DELTAS" "$RUN_META_PERIODIC_DELTA_COUNT" \ + "$RUN_META_PERIODIC_FORCED" "$RUN_META_RESET_DB_STAGING_PATH" "$RUN_META_RESET_DB_CLEANUP_STATUS" prepare_live_ta_inputs_for_run "$run_id" "$sync_mode" build_child_args @@ -839,9 +966,18 @@ run_one_round() { if [[ "$daemon_exit_code" -eq 0 && "$summary_state" == "success" ]]; then final_status="success" fi + if [[ -n "$RUN_META_RESET_DB_STAGING_PATH" ]]; then + if RUN_META_RESET_DB_CLEANUP_STATUS="$(finalize_periodic_reset_state_db "$final_status" "$RUN_META_RESET_DB_STAGING_PATH")"; then + : + else + final_status="failed" + fi + fi write_run_meta "$run_dir/run-meta.json" "$final_status" "$run_index" "$run_id" "$sync_mode" \ "$snapshot_reason" "$previous_run_id" "$previous_success_value" "$started_at" "$completed_at" \ - "$INVALID_DB_PATH" "$INVALID_STATE_PATH" "$INVALID_TMP_PATH" "$daemon_exit_code" "$PACKAGE_ROOT" "$ENV_FILE" + "$INVALID_DB_PATH" "$INVALID_STATE_PATH" "$INVALID_TMP_PATH" "$daemon_exit_code" "$PACKAGE_ROOT" "$ENV_FILE" \ + "$RUN_META_PERIODIC_ENABLED" "$RUN_META_PERIODIC_MAX_DELTAS" "$RUN_META_PERIODIC_DELTA_COUNT" \ + "$RUN_META_PERIODIC_FORCED" "$RUN_META_RESET_DB_STAGING_PATH" "$RUN_META_RESET_DB_CLEANUP_STATUS" printf '%s\n' "$run_id" > "$META_DIR/last-run-id" if is_true "$CLEAN_TMP_AFTER_RUN"; then rm -rf "$daemon_state_root" @@ -869,6 +1005,7 @@ main() { validate_positive_int "RETAIN_RUNS" "$RETAIN_RUNS" validate_rsync_scope validate_tal_input_mode + validate_non_negative_int "PERIODIC_SNAPSHOT_MAX_DELTAS" "$PERIODIC_SNAPSHOT_MAX_DELTAS" if [[ -n "${DB_STATS_EXACT_EVERY:-}" && "$DB_STATS_EXACT_EVERY" != "0" ]]; then validate_positive_int "DB_STATS_EXACT_EVERY" "$DB_STATS_EXACT_EVERY" fi @@ -889,7 +1026,7 @@ main() { fi done - mkdir -p "$RUNS_ROOT" "$LOG_ROOT" "$DB_DIR" "$META_DIR" "$TMP_DIR" "$INVALID_ROOT" "$LIVE_TA_REFRESH_DIR" + mkdir -p "$RUNS_ROOT" "$LOG_ROOT" "$DB_DIR" "$META_DIR" "$TMP_DIR" "$INVALID_ROOT" "$RESET_STAGING_ROOT" "$LIVE_TA_REFRESH_DIR" if is_true "$ALLOW_RSYNC_MIRROR_REUSE"; then mkdir -p "$RSYNC_MIRROR_ROOT" fi @@ -918,6 +1055,22 @@ main() { INVALID_DB_PATH="" INVALID_STATE_PATH="" INVALID_TMP_PATH="" + PERIODIC_SCAN_STATUS="" + PERIODIC_SCAN_DELTA_COUNT="" + PERIODIC_SCAN_SNAPSHOT_RUN_ID="" + PERIODIC_SCAN_DETAIL="" + RESET_DB_STAGING_PATH="" + RESET_DB_CLEANUP_STATUS="" + if is_true "$PERIODIC_SNAPSHOT_RESET"; then + RUN_META_PERIODIC_ENABLED="true" + else + RUN_META_PERIODIC_ENABLED="false" + fi + RUN_META_PERIODIC_MAX_DELTAS="$PERIODIC_SNAPSHOT_MAX_DELTAS" + RUN_META_PERIODIC_DELTA_COUNT="" + RUN_META_PERIODIC_FORCED="false" + RUN_META_RESET_DB_STAGING_PATH="" + RUN_META_RESET_DB_CLEANUP_STATUS="" local previous_run_id="" local previous_success_value="" local sync_mode="snapshot" @@ -926,8 +1079,24 @@ main() { previous_run_id="$(printf 'run_%04d' $((next_index - 1)))" if previous_run_success "$RUNS_ROOT/$previous_run_id"; then previous_success_value="true" - if [[ -e "$DB_DIR/work-db" ]]; then + if delta_state_available; then sync_mode="delta" + if is_true "$PERIODIC_SNAPSHOT_RESET"; then + if periodic_snapshot_force_needed; then + RUN_META_PERIODIC_DELTA_COUNT="$PERIODIC_SCAN_DELTA_COUNT" + RUN_META_PERIODIC_FORCED="true" + sync_mode="snapshot" + snapshot_reason="periodic_snapshot_delta_limit" + prepare_periodic_reset_state_db "$(printf 'run_%04d' "$next_index")" + RUN_META_RESET_DB_STAGING_PATH="$RESET_DB_STAGING_PATH" + RUN_META_RESET_DB_CLEANUP_STATUS="$RESET_DB_CLEANUP_STATUS" + echo "periodic snapshot reset forcing snapshot run=$(printf 'run_%04d' "$next_index") delta_count=$PERIODIC_SCAN_DELTA_COUNT max_deltas=$PERIODIC_SNAPSHOT_MAX_DELTAS" + else + if [[ "$PERIODIC_SCAN_STATUS" == "ok" ]]; then + RUN_META_PERIODIC_DELTA_COUNT="$PERIODIC_SCAN_DELTA_COUNT" + fi + fi + fi else sync_mode="snapshot" snapshot_reason="missing_db"