|
|
|
|
@ -51,6 +51,8 @@ LIVE_TA_REFRESH_DIR="${LIVE_TA_REFRESH_DIR:-$META_DIR/live-ta-refresh}"
|
|
|
|
|
LIVE_TA_REFRESH_CONNECT_TIMEOUT_SECS="${LIVE_TA_REFRESH_CONNECT_TIMEOUT_SECS:-15}"
|
|
|
|
|
LIVE_TA_REFRESH_MAX_TIME_SECS="${LIVE_TA_REFRESH_MAX_TIME_SECS:-120}"
|
|
|
|
|
LIVE_TA_REFRESH_BEFORE_SNAPSHOT="${LIVE_TA_REFRESH_BEFORE_SNAPSHOT:-1}"
|
|
|
|
|
RUN_LIFECYCLE_STATE_PATH="$STATE_ROOT/run-lifecycle-state.json"
|
|
|
|
|
RUN_LIFECYCLE_RECENT_RUNS_LIMIT=200
|
|
|
|
|
|
|
|
|
|
RPKI_BIN="$BIN_DIR/rpki"
|
|
|
|
|
RPKI_DAEMON_BIN="$BIN_DIR/rpki_daemon"
|
|
|
|
|
@ -507,64 +509,348 @@ isolate_state_after_failure() {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
periodic_snapshot_delta_scan() {
|
|
|
|
|
python3 - "$RUNS_ROOT" <<'PY'
|
|
|
|
|
local command="$1"
|
|
|
|
|
shift
|
|
|
|
|
python3 - "$command" "$RUN_LIFECYCLE_STATE_PATH" "$RUNS_ROOT" "$RUN_LIFECYCLE_RECENT_RUNS_LIMIT" "$@" <<'PY'
|
|
|
|
|
import json
|
|
|
|
|
import os
|
|
|
|
|
import pathlib
|
|
|
|
|
import shlex
|
|
|
|
|
import sys
|
|
|
|
|
from datetime import datetime, timezone
|
|
|
|
|
|
|
|
|
|
runs_root = pathlib.Path(sys.argv[1])
|
|
|
|
|
delta_count = 0
|
|
|
|
|
run_dirs = sorted(
|
|
|
|
|
[
|
|
|
|
|
path
|
|
|
|
|
for path in runs_root.glob("run_*")
|
|
|
|
|
if path.is_dir()
|
|
|
|
|
and path.name.startswith("run_")
|
|
|
|
|
and path.name[4:].isdigit()
|
|
|
|
|
],
|
|
|
|
|
key=lambda path: int(path.name[4:]),
|
|
|
|
|
reverse=True,
|
|
|
|
|
)
|
|
|
|
|
for run_dir in run_dirs:
|
|
|
|
|
command = sys.argv[1]
|
|
|
|
|
state_path = pathlib.Path(sys.argv[2])
|
|
|
|
|
runs_root = pathlib.Path(sys.argv[3])
|
|
|
|
|
recent_limit = int(sys.argv[4])
|
|
|
|
|
extra_args = sys.argv[5:]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def now_rfc3339() -> str:
|
|
|
|
|
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def to_int(value):
|
|
|
|
|
if value is None or value == "":
|
|
|
|
|
return None
|
|
|
|
|
if isinstance(value, bool):
|
|
|
|
|
return None
|
|
|
|
|
return int(value)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def to_bool(value):
|
|
|
|
|
if isinstance(value, bool):
|
|
|
|
|
return value
|
|
|
|
|
if value in (None, ""):
|
|
|
|
|
return None
|
|
|
|
|
if isinstance(value, str):
|
|
|
|
|
lowered = value.strip().lower()
|
|
|
|
|
if lowered in {"1", "true", "yes", "on"}:
|
|
|
|
|
return True
|
|
|
|
|
if lowered in {"0", "false", "no", "off"}:
|
|
|
|
|
return False
|
|
|
|
|
raise ValueError(f"invalid bool value: {value!r}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def normalize_run_entry(entry):
|
|
|
|
|
if entry is None:
|
|
|
|
|
return None
|
|
|
|
|
if not isinstance(entry, dict):
|
|
|
|
|
raise ValueError("run entry must be an object")
|
|
|
|
|
run_id = entry.get("run_id") or entry.get("runId")
|
|
|
|
|
run_index = to_int(entry.get("run_index", entry.get("runSeq")))
|
|
|
|
|
if not run_id or run_index is None:
|
|
|
|
|
raise ValueError("run entry missing run_id/run_index")
|
|
|
|
|
return {
|
|
|
|
|
"run_id": run_id,
|
|
|
|
|
"run_index": run_index,
|
|
|
|
|
"status": entry.get("status") or "unknown",
|
|
|
|
|
"sync_mode": entry.get("sync_mode", entry.get("syncMode")),
|
|
|
|
|
"snapshot_reason": entry.get("snapshot_reason"),
|
|
|
|
|
"started_at_rfc3339_utc": entry.get("started_at_rfc3339_utc"),
|
|
|
|
|
"completed_at_rfc3339_utc": entry.get("completed_at_rfc3339_utc"),
|
|
|
|
|
"periodic_snapshot_reset_enabled": to_bool(entry.get("periodic_snapshot_reset_enabled")),
|
|
|
|
|
"periodic_snapshot_max_deltas": to_int(entry.get("periodic_snapshot_max_deltas")),
|
|
|
|
|
"periodic_snapshot_delta_count": to_int(entry.get("periodic_snapshot_delta_count")),
|
|
|
|
|
"periodic_snapshot_forced": to_bool(entry.get("periodic_snapshot_forced")),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def snapshot_ref(entry):
|
|
|
|
|
if entry is None:
|
|
|
|
|
return None
|
|
|
|
|
return {
|
|
|
|
|
"run_id": entry["run_id"],
|
|
|
|
|
"run_index": entry["run_index"],
|
|
|
|
|
"snapshot_reason": entry.get("snapshot_reason"),
|
|
|
|
|
"completed_at_rfc3339_utc": entry.get("completed_at_rfc3339_utc"),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def normalize_snapshot_ref(entry):
|
|
|
|
|
if entry is None:
|
|
|
|
|
return None
|
|
|
|
|
if not isinstance(entry, dict):
|
|
|
|
|
raise ValueError("snapshot ref must be an object")
|
|
|
|
|
run_id = entry.get("run_id") or entry.get("runId")
|
|
|
|
|
run_index = to_int(entry.get("run_index", entry.get("runSeq")))
|
|
|
|
|
if not run_id or run_index is None:
|
|
|
|
|
raise ValueError("snapshot ref missing run_id/run_index")
|
|
|
|
|
return {
|
|
|
|
|
"run_id": run_id,
|
|
|
|
|
"run_index": run_index,
|
|
|
|
|
"snapshot_reason": entry.get("snapshot_reason"),
|
|
|
|
|
"completed_at_rfc3339_utc": entry.get("completed_at_rfc3339_utc"),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def state_health(last_run, last_success_snapshot):
|
|
|
|
|
if last_success_snapshot is not None:
|
|
|
|
|
return "ready", "ok"
|
|
|
|
|
if last_run is None:
|
|
|
|
|
return "empty", "no_runs"
|
|
|
|
|
return "bootstrap_incomplete", "no_successful_snapshot_in_retained_runs"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def default_state():
|
|
|
|
|
health, detail = state_health(None, None)
|
|
|
|
|
return {
|
|
|
|
|
"version": 1,
|
|
|
|
|
"updated_at_rfc3339_utc": now_rfc3339(),
|
|
|
|
|
"state_health": health,
|
|
|
|
|
"state_detail": detail,
|
|
|
|
|
"recent_runs_limit": recent_limit,
|
|
|
|
|
"last_run": None,
|
|
|
|
|
"last_success_snapshot": None,
|
|
|
|
|
"successful_deltas_since_snapshot": None,
|
|
|
|
|
"recent_runs": [],
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def finalize_state(data):
|
|
|
|
|
data = dict(data)
|
|
|
|
|
data["version"] = 1
|
|
|
|
|
data["updated_at_rfc3339_utc"] = now_rfc3339()
|
|
|
|
|
data["recent_runs_limit"] = recent_limit
|
|
|
|
|
recent_runs = []
|
|
|
|
|
for item in data.get("recent_runs", []):
|
|
|
|
|
recent_runs.append(normalize_run_entry(item))
|
|
|
|
|
if len(recent_runs) > recent_limit:
|
|
|
|
|
recent_runs = recent_runs[-recent_limit:]
|
|
|
|
|
data["recent_runs"] = recent_runs
|
|
|
|
|
data["last_run"] = normalize_run_entry(data.get("last_run"))
|
|
|
|
|
data["last_success_snapshot"] = normalize_snapshot_ref(data.get("last_success_snapshot"))
|
|
|
|
|
delta_count = data.get("successful_deltas_since_snapshot")
|
|
|
|
|
data["successful_deltas_since_snapshot"] = to_int(delta_count)
|
|
|
|
|
if data["last_success_snapshot"] is None:
|
|
|
|
|
data["successful_deltas_since_snapshot"] = None
|
|
|
|
|
elif data["successful_deltas_since_snapshot"] is None:
|
|
|
|
|
data["successful_deltas_since_snapshot"] = 0
|
|
|
|
|
health, detail = state_health(data["last_run"], data["last_success_snapshot"])
|
|
|
|
|
if health == "ready":
|
|
|
|
|
detail = data.get("state_detail") or detail
|
|
|
|
|
data["state_health"] = health
|
|
|
|
|
data["state_detail"] = detail
|
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def sorted_run_dirs():
|
|
|
|
|
candidates = []
|
|
|
|
|
for path in runs_root.glob("run_*"):
|
|
|
|
|
if not path.is_dir():
|
|
|
|
|
continue
|
|
|
|
|
suffix = path.name[4:]
|
|
|
|
|
if suffix.isdigit():
|
|
|
|
|
candidates.append((int(suffix), path))
|
|
|
|
|
candidates.sort()
|
|
|
|
|
return [path for _, path in candidates]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parse_run_dir(run_dir):
|
|
|
|
|
meta_path = run_dir / "run-meta.json"
|
|
|
|
|
summary_path = run_dir / "run-summary.json"
|
|
|
|
|
try:
|
|
|
|
|
if not meta_path.exists() or not summary_path.exists():
|
|
|
|
|
return None
|
|
|
|
|
with meta_path.open("r", encoding="utf-8") as handle:
|
|
|
|
|
meta = json.load(handle)
|
|
|
|
|
with summary_path.open("r", encoding="utf-8") as handle:
|
|
|
|
|
summary = json.load(handle)
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
print(f"error\t{delta_count}\t{run_dir.name}\tjson_parse:{exc.__class__.__name__}")
|
|
|
|
|
sys.exit(0)
|
|
|
|
|
if meta.get("status") != "success" or summary.get("status") != "success":
|
|
|
|
|
entry = normalize_run_entry(meta)
|
|
|
|
|
entry["status"] = meta.get("status") or summary.get("status") or entry["status"]
|
|
|
|
|
return entry
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def bootstrap_state(exclude_run_id=None):
|
|
|
|
|
data = default_state()
|
|
|
|
|
last_success_snapshot = None
|
|
|
|
|
delta_count = None
|
|
|
|
|
recent_runs = []
|
|
|
|
|
last_run = None
|
|
|
|
|
for run_dir in sorted_run_dirs():
|
|
|
|
|
if exclude_run_id and run_dir.name == exclude_run_id:
|
|
|
|
|
continue
|
|
|
|
|
sync_mode = meta.get("sync_mode") or meta.get("syncMode")
|
|
|
|
|
if sync_mode == "delta":
|
|
|
|
|
try:
|
|
|
|
|
entry = parse_run_dir(run_dir)
|
|
|
|
|
except Exception:
|
|
|
|
|
continue
|
|
|
|
|
if entry is None:
|
|
|
|
|
continue
|
|
|
|
|
last_run = entry
|
|
|
|
|
recent_runs.append(entry)
|
|
|
|
|
if entry["status"] != "success":
|
|
|
|
|
continue
|
|
|
|
|
if entry.get("sync_mode") == "snapshot":
|
|
|
|
|
last_success_snapshot = snapshot_ref(entry)
|
|
|
|
|
delta_count = 0
|
|
|
|
|
elif entry.get("sync_mode") == "delta":
|
|
|
|
|
if last_success_snapshot is not None and delta_count is not None:
|
|
|
|
|
delta_count += 1
|
|
|
|
|
continue
|
|
|
|
|
if sync_mode == "snapshot":
|
|
|
|
|
print(f"ok\t{delta_count}\t{run_dir.name}\t")
|
|
|
|
|
sys.exit(0)
|
|
|
|
|
print(f"error\t{delta_count}\t{run_dir.name}\tmissing_sync_mode")
|
|
|
|
|
sys.exit(0)
|
|
|
|
|
print(f"error\t{delta_count}\t\tmissing_success_snapshot")
|
|
|
|
|
data["last_run"] = last_run
|
|
|
|
|
data["last_success_snapshot"] = last_success_snapshot
|
|
|
|
|
data["successful_deltas_since_snapshot"] = delta_count
|
|
|
|
|
data["recent_runs"] = recent_runs[-recent_limit:]
|
|
|
|
|
return finalize_state(data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def backup_corrupt_state():
|
|
|
|
|
timestamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
|
|
|
|
|
backup_path = state_path.with_name(f"{state_path.name}.corrupt.{timestamp}.{os.getpid()}")
|
|
|
|
|
backup_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
os.replace(state_path, backup_path)
|
|
|
|
|
return backup_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_state_or_bootstrap(exclude_run_id=None):
|
|
|
|
|
if not state_path.exists():
|
|
|
|
|
return bootstrap_state(exclude_run_id), "bootstrapped_from_runs_missing_file", "", ""
|
|
|
|
|
try:
|
|
|
|
|
with state_path.open("r", encoding="utf-8") as handle:
|
|
|
|
|
raw = json.load(handle)
|
|
|
|
|
if not isinstance(raw, dict):
|
|
|
|
|
raise ValueError("state root must be an object")
|
|
|
|
|
if to_int(raw.get("version")) != 1:
|
|
|
|
|
raise ValueError("unsupported version")
|
|
|
|
|
state = finalize_state(raw)
|
|
|
|
|
return state, "state_file", "", ""
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
backup_path = backup_corrupt_state()
|
|
|
|
|
state = bootstrap_state(exclude_run_id)
|
|
|
|
|
return state, "bootstrapped_from_runs_after_corrupt_backup", exc.__class__.__name__, str(backup_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def atomic_write_json(path, payload):
|
|
|
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
tmp_path = path.with_name(f"{path.name}.tmp.{os.getpid()}")
|
|
|
|
|
with tmp_path.open("w", encoding="utf-8") as handle:
|
|
|
|
|
json.dump(payload, handle, indent=2, sort_keys=True)
|
|
|
|
|
handle.write("\n")
|
|
|
|
|
handle.flush()
|
|
|
|
|
os.fsync(handle.fileno())
|
|
|
|
|
os.replace(tmp_path, path)
|
|
|
|
|
try:
|
|
|
|
|
dir_fd = os.open(path.parent, os.O_DIRECTORY)
|
|
|
|
|
except OSError:
|
|
|
|
|
return
|
|
|
|
|
try:
|
|
|
|
|
os.fsync(dir_fd)
|
|
|
|
|
finally:
|
|
|
|
|
os.close(dir_fd)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def emit(name, value):
|
|
|
|
|
if value is None:
|
|
|
|
|
value = ""
|
|
|
|
|
elif isinstance(value, bool):
|
|
|
|
|
value = "true" if value else "false"
|
|
|
|
|
else:
|
|
|
|
|
value = str(value)
|
|
|
|
|
print(f"{name}={shlex.quote(value)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if command == "load":
|
|
|
|
|
max_deltas = int(extra_args[0])
|
|
|
|
|
state, source, detail, backup_path = load_state_or_bootstrap()
|
|
|
|
|
if source != "state_file":
|
|
|
|
|
atomic_write_json(state_path, state)
|
|
|
|
|
delta_count = state.get("successful_deltas_since_snapshot")
|
|
|
|
|
force_needed = bool(
|
|
|
|
|
state.get("last_success_snapshot") is not None
|
|
|
|
|
and delta_count is not None
|
|
|
|
|
and delta_count >= max_deltas
|
|
|
|
|
)
|
|
|
|
|
emit("PERIODIC_LIFECYCLE_SOURCE", source)
|
|
|
|
|
emit("PERIODIC_LIFECYCLE_DETAIL", detail or state.get("state_detail"))
|
|
|
|
|
emit("PERIODIC_LIFECYCLE_CORRUPT_BACKUP_PATH", backup_path)
|
|
|
|
|
emit("PERIODIC_LIFECYCLE_STATE_HEALTH", state.get("state_health"))
|
|
|
|
|
emit(
|
|
|
|
|
"PERIODIC_LIFECYCLE_LAST_SNAPSHOT_RUN_ID",
|
|
|
|
|
(state.get("last_success_snapshot") or {}).get("run_id"),
|
|
|
|
|
)
|
|
|
|
|
emit("PERIODIC_LIFECYCLE_DELTA_COUNT", delta_count)
|
|
|
|
|
emit("PERIODIC_LIFECYCLE_FORCE_NEEDED", force_needed)
|
|
|
|
|
elif command == "update":
|
|
|
|
|
run_dir = pathlib.Path(extra_args[0])
|
|
|
|
|
state, source, detail, backup_path = load_state_or_bootstrap(exclude_run_id=run_dir.name)
|
|
|
|
|
entry = parse_run_dir(run_dir)
|
|
|
|
|
if entry is None:
|
|
|
|
|
raise SystemExit(f"missing run metadata for lifecycle update: {run_dir}")
|
|
|
|
|
previous_snapshot = state.get("last_success_snapshot")
|
|
|
|
|
previous_delta_count = state.get("successful_deltas_since_snapshot")
|
|
|
|
|
state["last_run"] = entry
|
|
|
|
|
recent_runs = [item for item in state.get("recent_runs", []) if item.get("run_id") != entry["run_id"]]
|
|
|
|
|
recent_runs.append(entry)
|
|
|
|
|
state["recent_runs"] = recent_runs[-recent_limit:]
|
|
|
|
|
if entry["status"] == "success" and entry.get("sync_mode") == "snapshot":
|
|
|
|
|
state["last_success_snapshot"] = snapshot_ref(entry)
|
|
|
|
|
state["successful_deltas_since_snapshot"] = 0
|
|
|
|
|
state["state_detail"] = "ok"
|
|
|
|
|
elif entry["status"] == "success" and entry.get("sync_mode") == "delta":
|
|
|
|
|
if previous_snapshot is not None and previous_delta_count is not None:
|
|
|
|
|
state["last_success_snapshot"] = previous_snapshot
|
|
|
|
|
state["successful_deltas_since_snapshot"] = previous_delta_count + 1
|
|
|
|
|
state["state_detail"] = "ok"
|
|
|
|
|
else:
|
|
|
|
|
state["last_success_snapshot"] = previous_snapshot
|
|
|
|
|
state["successful_deltas_since_snapshot"] = None
|
|
|
|
|
state["state_detail"] = "success_delta_without_known_snapshot"
|
|
|
|
|
else:
|
|
|
|
|
state["last_success_snapshot"] = previous_snapshot
|
|
|
|
|
state["successful_deltas_since_snapshot"] = previous_delta_count
|
|
|
|
|
state = finalize_state(state)
|
|
|
|
|
atomic_write_json(state_path, state)
|
|
|
|
|
emit("RUN_LIFECYCLE_UPDATE_SOURCE", source)
|
|
|
|
|
emit("RUN_LIFECYCLE_UPDATE_DETAIL", detail or state.get("state_detail"))
|
|
|
|
|
emit("RUN_LIFECYCLE_UPDATE_CORRUPT_BACKUP_PATH", backup_path)
|
|
|
|
|
emit("RUN_LIFECYCLE_UPDATE_STATE_HEALTH", state.get("state_health"))
|
|
|
|
|
emit("RUN_LIFECYCLE_UPDATE_DELTA_COUNT", state.get("successful_deltas_since_snapshot"))
|
|
|
|
|
emit(
|
|
|
|
|
"RUN_LIFECYCLE_UPDATE_LAST_SNAPSHOT_RUN_ID",
|
|
|
|
|
(state.get("last_success_snapshot") or {}).get("run_id"),
|
|
|
|
|
)
|
|
|
|
|
else:
|
|
|
|
|
raise SystemExit(f"unknown lifecycle helper command: {command}")
|
|
|
|
|
PY
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
periodic_snapshot_force_needed() {
|
|
|
|
|
PERIODIC_SCAN_STATUS=""
|
|
|
|
|
PERIODIC_SCAN_DELTA_COUNT=""
|
|
|
|
|
PERIODIC_SCAN_SNAPSHOT_RUN_ID=""
|
|
|
|
|
PERIODIC_SCAN_DETAIL=""
|
|
|
|
|
local scan_output
|
|
|
|
|
scan_output="$(periodic_snapshot_delta_scan)"
|
|
|
|
|
IFS=$'\t' read -r PERIODIC_SCAN_STATUS PERIODIC_SCAN_DELTA_COUNT PERIODIC_SCAN_SNAPSHOT_RUN_ID PERIODIC_SCAN_DETAIL <<< "$scan_output"
|
|
|
|
|
if [[ "$PERIODIC_SCAN_STATUS" != "ok" ]]; then
|
|
|
|
|
warn "periodic snapshot reset scan skipped status=${PERIODIC_SCAN_STATUS:-missing} snapshot_run=${PERIODIC_SCAN_SNAPSHOT_RUN_ID:-none} detail=${PERIODIC_SCAN_DETAIL:-unknown}"
|
|
|
|
|
return 1
|
|
|
|
|
load_periodic_snapshot_lifecycle_context() {
|
|
|
|
|
eval "$(periodic_snapshot_delta_scan load "$PERIODIC_SNAPSHOT_MAX_DELTAS")"
|
|
|
|
|
if [[ -n "$PERIODIC_LIFECYCLE_CORRUPT_BACKUP_PATH" ]]; then
|
|
|
|
|
warn "run lifecycle state corrupt; backed up to $PERIODIC_LIFECYCLE_CORRUPT_BACKUP_PATH detail=${PERIODIC_LIFECYCLE_DETAIL:-unknown}"
|
|
|
|
|
fi
|
|
|
|
|
if [[ "$PERIODIC_LIFECYCLE_SOURCE" != "state_file" ]]; then
|
|
|
|
|
echo "run lifecycle state source=$PERIODIC_LIFECYCLE_SOURCE state_health=${PERIODIC_LIFECYCLE_STATE_HEALTH:-unknown} snapshot_run=${PERIODIC_LIFECYCLE_LAST_SNAPSHOT_RUN_ID:-none} delta_count=${PERIODIC_LIFECYCLE_DELTA_COUNT:-unknown}"
|
|
|
|
|
fi
|
|
|
|
|
if [[ "$PERIODIC_LIFECYCLE_STATE_HEALTH" == "bootstrap_incomplete" ]]; then
|
|
|
|
|
warn "run lifecycle state bootstrap incomplete detail=${PERIODIC_LIFECYCLE_DETAIL:-unknown}; forced snapshot counting resumes after the next successful snapshot"
|
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
update_run_lifecycle_state() {
|
|
|
|
|
local run_dir="$1"
|
|
|
|
|
eval "$(periodic_snapshot_delta_scan update "$run_dir")"
|
|
|
|
|
if [[ -n "$RUN_LIFECYCLE_UPDATE_CORRUPT_BACKUP_PATH" ]]; then
|
|
|
|
|
warn "run lifecycle state corrupt during update; backed up to $RUN_LIFECYCLE_UPDATE_CORRUPT_BACKUP_PATH detail=${RUN_LIFECYCLE_UPDATE_DETAIL:-unknown}"
|
|
|
|
|
fi
|
|
|
|
|
[[ -n "$PERIODIC_SCAN_DELTA_COUNT" ]] || PERIODIC_SCAN_DELTA_COUNT="0"
|
|
|
|
|
(( PERIODIC_SCAN_DELTA_COUNT >= PERIODIC_SNAPSHOT_MAX_DELTAS ))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
prepare_periodic_reset_state_db() {
|
|
|
|
|
@ -1012,6 +1298,7 @@ run_one_round() {
|
|
|
|
|
"$RUN_META_PERIODIC_ENABLED" "$RUN_META_PERIODIC_MAX_DELTAS" "$RUN_META_PERIODIC_DELTA_COUNT" \
|
|
|
|
|
"$RUN_META_PERIODIC_FORCED" "$RUN_META_RESET_DB_STAGING_PATH" "$RUN_META_RESET_DB_CLEANUP_STATUS" \
|
|
|
|
|
"$RUN_META_TMP_CLEANUP_STATUS" "$RUN_META_TMP_CLEANUP_REASON"
|
|
|
|
|
update_run_lifecycle_state "$run_dir"
|
|
|
|
|
printf '%s\n' "$run_id" > "$META_DIR/last-run-id"
|
|
|
|
|
if is_true "$CLEAN_TMP_AFTER_RUN"; then
|
|
|
|
|
rm -rf "$daemon_state_root"
|
|
|
|
|
@ -1091,14 +1378,21 @@ main() {
|
|
|
|
|
INVALID_TMP_PATH=""
|
|
|
|
|
TMP_CLEANUP_STATUS=""
|
|
|
|
|
TMP_CLEANUP_REASON=""
|
|
|
|
|
PERIODIC_SCAN_STATUS=""
|
|
|
|
|
PERIODIC_SCAN_DELTA_COUNT=""
|
|
|
|
|
PERIODIC_SCAN_SNAPSHOT_RUN_ID=""
|
|
|
|
|
PERIODIC_SCAN_DETAIL=""
|
|
|
|
|
PERIODIC_LIFECYCLE_SOURCE=""
|
|
|
|
|
PERIODIC_LIFECYCLE_DETAIL=""
|
|
|
|
|
PERIODIC_LIFECYCLE_CORRUPT_BACKUP_PATH=""
|
|
|
|
|
PERIODIC_LIFECYCLE_STATE_HEALTH=""
|
|
|
|
|
PERIODIC_LIFECYCLE_LAST_SNAPSHOT_RUN_ID=""
|
|
|
|
|
PERIODIC_LIFECYCLE_DELTA_COUNT=""
|
|
|
|
|
PERIODIC_LIFECYCLE_FORCE_NEEDED="false"
|
|
|
|
|
RESET_DB_STAGING_PATH=""
|
|
|
|
|
RESET_DB_CLEANUP_STATUS=""
|
|
|
|
|
if is_true "$PERIODIC_SNAPSHOT_RESET"; then
|
|
|
|
|
RUN_META_PERIODIC_ENABLED="true"
|
|
|
|
|
load_periodic_snapshot_lifecycle_context
|
|
|
|
|
if [[ -n "$PERIODIC_LIFECYCLE_DELTA_COUNT" ]]; then
|
|
|
|
|
RUN_META_PERIODIC_DELTA_COUNT="$PERIODIC_LIFECYCLE_DELTA_COUNT"
|
|
|
|
|
fi
|
|
|
|
|
else
|
|
|
|
|
RUN_META_PERIODIC_ENABLED="false"
|
|
|
|
|
fi
|
|
|
|
|
@ -1120,19 +1414,14 @@ main() {
|
|
|
|
|
if delta_state_available; then
|
|
|
|
|
sync_mode="delta"
|
|
|
|
|
if is_true "$PERIODIC_SNAPSHOT_RESET"; then
|
|
|
|
|
if periodic_snapshot_force_needed; then
|
|
|
|
|
RUN_META_PERIODIC_DELTA_COUNT="$PERIODIC_SCAN_DELTA_COUNT"
|
|
|
|
|
if [[ "$PERIODIC_LIFECYCLE_FORCE_NEEDED" == "true" ]]; then
|
|
|
|
|
RUN_META_PERIODIC_FORCED="true"
|
|
|
|
|
sync_mode="snapshot"
|
|
|
|
|
snapshot_reason="periodic_snapshot_delta_limit"
|
|
|
|
|
prepare_periodic_reset_state_db "$(printf 'run_%04d' "$next_index")"
|
|
|
|
|
RUN_META_RESET_DB_STAGING_PATH="$RESET_DB_STAGING_PATH"
|
|
|
|
|
RUN_META_RESET_DB_CLEANUP_STATUS="$RESET_DB_CLEANUP_STATUS"
|
|
|
|
|
echo "periodic snapshot reset forcing snapshot run=$(printf 'run_%04d' "$next_index") delta_count=$PERIODIC_SCAN_DELTA_COUNT max_deltas=$PERIODIC_SNAPSHOT_MAX_DELTAS"
|
|
|
|
|
else
|
|
|
|
|
if [[ "$PERIODIC_SCAN_STATUS" == "ok" ]]; then
|
|
|
|
|
RUN_META_PERIODIC_DELTA_COUNT="$PERIODIC_SCAN_DELTA_COUNT"
|
|
|
|
|
fi
|
|
|
|
|
echo "periodic snapshot reset forcing snapshot run=$(printf 'run_%04d' "$next_index") delta_count=$PERIODIC_LIFECYCLE_DELTA_COUNT max_deltas=$PERIODIC_SNAPSHOT_MAX_DELTAS"
|
|
|
|
|
fi
|
|
|
|
|
fi
|
|
|
|
|
else
|
|
|
|
|
|