20260622 恢复Routinator inter-RP监控

This commit is contained in:
yuyr 2026-06-22 15:48:31 +08:00
parent 61d3e636ae
commit 4546d90c33
3 changed files with 494 additions and 159 deletions

View File

@ -7,12 +7,22 @@
"graphTooltip": 0,
"id": null,
"links": [],
"liveNow": false,
"panels": [
{
"id": 1,
"title": "Metrics Reload OK",
"type": "stat",
"datasource": {
"type": "prometheus",
"uid": "Prometheus"
},
"gridPos": {
"h": 4,
"w": 6,
"x": 0,
"y": 0
},
"fieldConfig": {
"defaults": {
"unit": "none",
@ -20,13 +30,6 @@
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 6,
"x": 0,
"y": 0
},
"id": 1,
"options": {
"colorMode": "value",
"graphMode": "area",
@ -42,22 +45,29 @@
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "11.3.1",
"targets": [
{
"expr": "inter_rp_service_last_reload_success",
"expr": "max(inter_rp_service_last_reload_success{exported_instance=\"remote200-inter-rp\"})",
"legendFormat": "reload",
"refId": "A"
"refId": "A",
"instant": true
}
],
"title": "Metrics Reload OK",
"type": "stat"
]
},
{
"id": 2,
"title": "231 Sync Age",
"type": "stat",
"datasource": {
"type": "prometheus",
"uid": "Prometheus"
},
"gridPos": {
"h": 4,
"w": 6,
"x": 6,
"y": 0
},
"fieldConfig": {
"defaults": {
"unit": "s",
@ -65,13 +75,6 @@
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 6,
"x": 6,
"y": 0
},
"id": 2,
"options": {
"colorMode": "value",
"graphMode": "area",
@ -87,22 +90,29 @@
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "11.3.1",
"targets": [
{
"expr": "inter_rp_sync_age_seconds",
"expr": "max(inter_rp_sync_age_seconds{exported_instance=\"remote200-inter-rp\"})",
"legendFormat": "sync age",
"refId": "A"
"refId": "A",
"instant": true
}
],
"title": "Remote200 Sync Age",
"type": "stat"
]
},
{
"id": 3,
"title": "Parse Errors",
"type": "stat",
"datasource": {
"type": "prometheus",
"uid": "Prometheus"
},
"gridPos": {
"h": 4,
"w": 6,
"x": 12,
"y": 0
},
"fieldConfig": {
"defaults": {
"unit": "short",
@ -110,13 +120,6 @@
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 6,
"x": 12,
"y": 0
},
"id": 3,
"options": {
"colorMode": "value",
"graphMode": "area",
@ -132,36 +135,36 @@
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "11.3.1",
"targets": [
{
"expr": "inter_rp_parse_errors",
"expr": "max(inter_rp_parse_errors{exported_instance=\"remote200-inter-rp\"})",
"legendFormat": "errors",
"refId": "A"
"refId": "A",
"instant": true
}
],
"title": "Parse Errors",
"type": "stat"
]
},
{
"id": 4,
"title": "Ours vs Routinator VRP Diff",
"type": "stat",
"datasource": {
"type": "prometheus",
"uid": "Prometheus"
},
"fieldConfig": {
"defaults": {
"unit": "none",
"decimals": 0
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 6,
"x": 18,
"y": 0
},
"id": 4,
"fieldConfig": {
"defaults": {
"unit": "short",
"decimals": 0
},
"overrides": []
},
"options": {
"colorMode": "value",
"graphMode": "area",
@ -177,22 +180,29 @@
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "11.3.1",
"targets": [
{
"expr": "inter_rp_ccr_digest_match{state=\"overall\"}",
"legendFormat": "overall",
"refId": "A"
"expr": "max(inter_rp_vrps_diff{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\"})",
"legendFormat": "vrp diff",
"refId": "A",
"instant": true
}
],
"title": "Ours vs rpki-client CCR Match",
"type": "stat"
]
},
{
"id": 5,
"title": "Wall Time by RP",
"type": "timeseries",
"datasource": {
"type": "prometheus",
"uid": "Prometheus"
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 4
},
"fieldConfig": {
"defaults": {
"unit": "s",
@ -200,13 +210,6 @@
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 4
},
"id": 5,
"options": {
"legend": {
"calcs": [
@ -223,19 +226,26 @@
},
"targets": [
{
"expr": "inter_rp_run_wall_seconds",
"legendFormat": "{{rp}}",
"expr": "inter_rp_run_wall_seconds{exported_instance=\"remote200-inter-rp\",exported_rp=~\"ours-rp|routinator\"}",
"legendFormat": "{{exported_rp}}",
"refId": "A"
}
],
"title": "Wall Time by RP",
"type": "timeseries"
]
},
{
"id": 6,
"title": "Max RSS Aggregate Peak by RP",
"type": "timeseries",
"datasource": {
"type": "prometheus",
"uid": "Prometheus"
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 4
},
"fieldConfig": {
"defaults": {
"unit": "bytes",
@ -243,13 +253,6 @@
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 4
},
"id": 6,
"options": {
"legend": {
"calcs": [
@ -266,34 +269,33 @@
},
"targets": [
{
"expr": "inter_rp_run_max_rss_bytes{kind=\"aggregate_peak\"}",
"legendFormat": "{{rp}}",
"expr": "inter_rp_run_max_rss_bytes{exported_instance=\"remote200-inter-rp\",kind=\"aggregate_peak\",exported_rp=~\"ours-rp|routinator\"}",
"legendFormat": "{{exported_rp}}",
"refId": "A"
}
],
"title": "Max RSS Aggregate Peak by RP",
"type": "timeseries"
]
},
{
"datasource": {
"type": "prometheus",
"uid": "Prometheus"
},
"fieldConfig": {
"defaults": {
"unit": "none",
"decimals": 0,
"min": 0
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 12
},
"id": 7,
"title": "VRPs by RP (unique ASN/Prefix/MaxLen)",
"type": "timeseries",
"datasource": {
"type": "prometheus",
"uid": "Prometheus"
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 12
},
"fieldConfig": {
"defaults": {
"unit": "none",
"min": 0
},
"overrides": []
},
"options": {
"legend": {
"calcs": [
@ -310,34 +312,33 @@
},
"targets": [
{
"expr": "inter_rp_vrps",
"legendFormat": "{{rp}}",
"expr": "inter_rp_vrps{exported_instance=\"remote200-inter-rp\",exported_rp=~\"ours-rp|routinator\"}",
"legendFormat": "{{exported_rp}}",
"refId": "A"
}
],
"title": "VRPs by RP (unique ASN/Prefix/MaxLen)",
"type": "timeseries"
]
},
{
"id": 8,
"title": "VAPs / ASPAs by RP (unique Customer/Providers)",
"type": "timeseries",
"datasource": {
"type": "prometheus",
"uid": "Prometheus"
},
"fieldConfig": {
"defaults": {
"unit": "none",
"decimals": 0,
"min": 0
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 12
},
"id": 8,
"fieldConfig": {
"defaults": {
"unit": "none",
"min": 0
},
"overrides": []
},
"options": {
"legend": {
"calcs": [
@ -354,54 +355,75 @@
},
"targets": [
{
"expr": "inter_rp_vaps",
"legendFormat": "{{rp}}",
"expr": "inter_rp_vaps{exported_instance=\"remote200-inter-rp\",exported_rp=~\"ours-rp|routinator\"}",
"legendFormat": "{{exported_rp}}",
"refId": "A"
}
],
"title": "VAPs / ASPAs by RP (unique Customer/Providers)",
"type": "timeseries"
]
},
{
"id": 9,
"title": "Latest RP Runs",
"type": "table",
"datasource": {
"type": "prometheus",
"uid": "Prometheus"
},
"fieldConfig": {
"defaults": {
"unit": "none",
"decimals": 0
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 20
},
"id": 9,
"fieldConfig": {
"defaults": {
"unit": "none",
"decimals": 0
},
"overrides": []
},
"options": {
"showHeader": true,
"sortBy": []
},
"targets": [
{
"expr": "inter_rp_ccr_digest_match{left=\"ours-rp\",right=\"rpki-client\"}",
"expr": "inter_rp_run_seq{exported_instance=\"remote200-inter-rp\",exported_rp=~\"ours-rp|routinator\"}",
"format": "table",
"instant": true,
"legendFormat": "{{state}}",
"legendFormat": "{{exported_rp}} seq",
"refId": "A"
}
],
"title": "CCR Digest Match States",
"type": "table"
},
{
"expr": "inter_rp_run_success{exported_instance=\"remote200-inter-rp\",exported_rp=~\"ours-rp|routinator\"}",
"format": "table",
"instant": true,
"legendFormat": "{{exported_rp}} success",
"refId": "B"
},
{
"expr": "inter_rp_run_wall_seconds{exported_instance=\"remote200-inter-rp\",exported_rp=~\"ours-rp|routinator\"}",
"format": "table",
"instant": true,
"legendFormat": "{{exported_rp}} wall",
"refId": "C"
}
]
},
{
"id": 10,
"title": "Output Count Diffs (unique)",
"type": "table",
"datasource": {
"type": "prometheus",
"uid": "Prometheus"
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 20
},
"fieldConfig": {
"defaults": {
"unit": "none",
@ -409,41 +431,41 @@
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 20
},
"id": 10,
"options": {
"showHeader": true,
"sortBy": []
},
"targets": [
{
"expr": "inter_rp_vrps_diff",
"expr": "inter_rp_vrps_diff{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\"}",
"format": "table",
"instant": true,
"legendFormat": "vrps {{left}}-{{right}}",
"legendFormat": "vrps ours-rp-routinator",
"refId": "A"
},
{
"expr": "inter_rp_vaps_diff",
"expr": "inter_rp_vaps_diff{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\"}",
"format": "table",
"instant": true,
"legendFormat": "vaps {{left}}-{{right}}",
"legendFormat": "vaps ours-rp-routinator",
"refId": "B"
}
],
"title": "Output Count Diffs (VRP/VAP unique)",
"type": "table"
]
},
{
"id": 11,
"title": "Artifact Age by RP",
"type": "timeseries",
"datasource": {
"type": "prometheus",
"uid": "Prometheus"
},
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 28
},
"fieldConfig": {
"defaults": {
"unit": "s",
@ -451,13 +473,6 @@
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 28
},
"id": 11,
"options": {
"legend": {
"calcs": [
@ -474,21 +489,19 @@
},
"targets": [
{
"expr": "inter_rp_artifact_age_seconds",
"legendFormat": "{{rp}}",
"expr": "inter_rp_artifact_age_seconds{exported_instance=\"remote200-inter-rp\",exported_rp=~\"ours-rp|routinator\"}",
"legendFormat": "{{exported_rp}}",
"refId": "A"
}
],
"title": "Artifact Age by RP",
"type": "timeseries"
]
}
],
"preload": false,
"refresh": "10s",
"schemaVersion": 40,
"tags": [
"rpki",
"inter-rp"
"inter-rp",
"routinator"
],
"templating": {
"list": []
@ -497,9 +510,8 @@
"from": "now-6h",
"to": "now"
},
"timepicker": {},
"timezone": "browser",
"title": "Ours RP Inter-RP",
"title": "Ours RP vs Routinator",
"uid": "ours-rp-inter-rp",
"version": 1
"version": 2
}

View File

@ -0,0 +1,250 @@
#!/usr/bin/env python3
import csv
import datetime as dt
import json
import os
import socket
import sys
import threading
import time
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
from pathlib import Path
RUN_ROOT = Path(os.environ.get("OURS_RUN_ROOT", "/root/ours-rp-continuous/portable-soak"))
PEER_ROOT = Path(os.environ.get("PEER_ROOT", "/root/ours-rp-continuous/portable-soak/inter-rp-peers"))
INSTANCE = os.environ.get("INTER_RP_INSTANCE", "remote200-inter-rp")
LISTEN = os.environ.get("INTER_RP_LISTEN", "0.0.0.0:9557")
SCAN_TTL = float(os.environ.get("INTER_RP_SCAN_TTL_SECONDS", "10"))
_cache_lock = threading.Lock()
_cache = {"deadline": 0.0, "metrics": "", "status": {}}
_count_cache = {}
def unix_now():
return time.time()
def parse_rfc3339(value):
if not value:
return None
text = str(value).replace("Z", "+00:00")
try:
return dt.datetime.fromisoformat(text).timestamp()
except Exception:
return None
def read_json(path):
with open(path, "r", encoding="utf-8") as handle:
return json.load(handle)
def latest_ours_run():
runs = RUN_ROOT / "runs"
candidates = sorted(p for p in runs.glob("run_*") if (p / "run-summary.json").exists())
return candidates[-1] if candidates else None
def count_unique_csv(path, cols):
if not path.exists():
return None
stat = path.stat()
key = (str(path), stat.st_mtime_ns, stat.st_size, cols)
if key in _count_cache:
return _count_cache[key]
seen = set()
with open(path, "r", encoding="utf-8", newline="") as handle:
reader = csv.reader(handle)
first = True
for row in reader:
if first:
first = False
continue
if not row:
continue
if len(row) < cols:
continue
seen.add(tuple(cell.strip() for cell in row[:cols]))
value = len(seen)
_count_cache.clear()
_count_cache[key] = value
return value
def metric_line(name, labels, value):
label_text = ",".join(f'{k}="{str(v).replace(chr(92), chr(92)+chr(92)).replace(chr(34), chr(92)+chr(34))}"' for k, v in labels.items())
return f"{name}{{{label_text}}} {value}\n"
def bool_num(value):
return 1 if value else 0
def sample_ours(now, errors):
sample = {"rp": "ours-rp", "present": False, "success": False, "max": {}, "errors": 0}
run_dir = latest_ours_run()
if run_dir is None:
sample["errors"] += 1
errors.append("ours-rp: no run-summary.json")
return sample
sample["present"] = True
summary_path = run_dir / "run-summary.json"
try:
summary = read_json(summary_path)
except Exception as exc:
sample["errors"] += 1
errors.append(f"ours-rp: read {summary_path}: {exc}")
return sample
sample["run_id"] = summary.get("runId") or run_dir.name
sample["run_seq"] = summary.get("runSeq") or int(run_dir.name.split("_")[-1])
sample["success"] = summary.get("status") == "success" and int(summary.get("exitCode", 0)) == 0
if summary.get("wallMs") is not None:
sample["wall"] = float(summary["wallMs"]) / 1000.0
finished = parse_rfc3339(summary.get("finishedAtRfc3339Utc"))
if finished is not None:
sample["finished"] = finished
sample["age"] = max(0.0, now - finished)
rss = summary.get("processMetrics", {}).get("maxRssKb")
if rss is not None:
sample["max"]["parent"] = int(rss) * 1024
sample["max"]["aggregate_peak"] = int(rss) * 1024
sample["vrps"] = count_unique_csv(run_dir / "vrps.csv", 3)
sample["vaps"] = count_unique_csv(run_dir / "vaps.csv", 2)
return sample
def sample_routinator(now, errors):
rp = "routinator"
latest = PEER_ROOT / rp / "latest"
sample = {"rp": rp, "present": False, "success": False, "max": {}, "errors": 0}
if not latest.exists():
sample["errors"] += 1
errors.append(f"routinator: missing latest directory: {latest}")
return sample
sample["present"] = True
meta_path = latest / "run-meta.json"
try:
meta = read_json(meta_path)
except Exception as exc:
sample["errors"] += 1
errors.append(f"routinator: read {meta_path}: {exc}")
return sample
sample["run_id"] = meta.get("runId")
sample["run_seq"] = meta.get("runSeq")
sample["success"] = bool(meta.get("success"))
if meta.get("wallMs") is not None:
sample["wall"] = float(meta["wallMs"]) / 1000.0
finished = parse_rfc3339(meta.get("finishedAtRfc3339Utc"))
if finished is not None:
sample["finished"] = finished
sample["age"] = max(0.0, now - finished)
max_rss = meta.get("maxRssKb", {})
for label, key in [("parent", "parent"), ("child_max", "childMax"), ("aggregate_peak", "aggregatePeak")]:
if max_rss.get(key) is not None:
sample["max"][label] = int(max_rss[key]) * 1024
sample["vrps"] = count_unique_csv(latest / "vrps.csv", 3)
sample["vaps"] = count_unique_csv(latest / "vaps.csv", 2)
return sample
def sync_metrics(now):
path = PEER_ROOT / "sync-status.json"
if not path.exists():
return {"present": False, "success": False, "message": f"missing {path}"}
try:
value = read_json(path)
except Exception as exc:
return {"present": True, "success": False, "message": str(exc)}
ts = parse_rfc3339(value.get("lastSyncAtRfc3339Utc"))
return {
"present": True,
"success": bool(value.get("success")),
"timestamp": ts,
"age": max(0.0, now - ts) if ts is not None else None,
"remote": value.get("remoteHost", ""),
"message": value.get("message", ""),
}
def build_metrics():
now = unix_now()
errors = []
start = time.time()
samples = [sample_ours(now, errors), sample_routinator(now, errors)]
sync = sync_metrics(now)
if not sync.get("success"):
errors.append("sync: " + str(sync.get("message", "failed")))
out = []
out.append(metric_line("inter_rp_service_up", {"instance": INSTANCE}, 1))
out.append(metric_line("inter_rp_service_last_scan_timestamp_seconds", {"instance": INSTANCE}, now))
out.append(metric_line("inter_rp_service_last_scan_duration_seconds", {"instance": INSTANCE}, time.time() - start))
out.append(metric_line("inter_rp_service_last_reload_success", {"instance": INSTANCE}, bool_num(len(errors) == 0)))
out.append(metric_line("inter_rp_parse_errors", {"instance": INSTANCE}, len(errors)))
out.append(metric_line("inter_rp_sync_present", {"instance": INSTANCE}, bool_num(sync.get("present"))))
out.append(metric_line("inter_rp_sync_last_success", {"instance": INSTANCE}, bool_num(sync.get("success"))))
if sync.get("age") is not None:
out.append(metric_line("inter_rp_sync_age_seconds", {"instance": INSTANCE}, sync["age"]))
if sync.get("timestamp") is not None:
out.append(metric_line("inter_rp_sync_last_timestamp_seconds", {"instance": INSTANCE}, sync["timestamp"]))
by_rp = {s["rp"]: s for s in samples}
for s in samples:
labels = {"instance": INSTANCE, "rp": s["rp"]}
out.append(metric_line("inter_rp_run_present", labels, bool_num(s.get("present"))))
out.append(metric_line("inter_rp_run_success", labels, bool_num(s.get("success"))))
out.append(metric_line("inter_rp_sample_parse_errors", labels, s.get("errors", 0)))
if s.get("run_seq") is not None:
out.append(metric_line("inter_rp_run_seq", labels, s["run_seq"]))
if s.get("wall") is not None:
out.append(metric_line("inter_rp_run_wall_seconds", labels, s["wall"]))
if s.get("age") is not None:
out.append(metric_line("inter_rp_artifact_age_seconds", labels, s["age"]))
if s.get("vrps") is not None:
out.append(metric_line("inter_rp_vrps", labels, s["vrps"]))
if s.get("vaps") is not None:
out.append(metric_line("inter_rp_vaps", labels, s["vaps"]))
for kind, value in s.get("max", {}).items():
labels2 = dict(labels)
labels2["kind"] = kind
out.append(metric_line("inter_rp_run_max_rss_bytes", labels2, value))
ours, rout = by_rp.get("ours-rp", {}), by_rp.get("routinator", {})
if ours.get("vrps") is not None and rout.get("vrps") is not None:
out.append(metric_line("inter_rp_vrps_diff", {"instance": INSTANCE, "left": "ours-rp", "right": "routinator"}, abs(int(ours["vrps"]) - int(rout["vrps"]))))
if ours.get("vaps") is not None and rout.get("vaps") is not None:
out.append(metric_line("inter_rp_vaps_diff", {"instance": INSTANCE, "left": "ours-rp", "right": "routinator"}, abs(int(ours["vaps"]) - int(rout["vaps"]))))
return "".join(out), {"errors": errors, "samples": samples, "sync": sync}
def get_metrics():
now = time.time()
with _cache_lock:
if _cache["metrics"] and _cache["deadline"] > now:
return _cache["metrics"]
metrics, status = build_metrics()
_cache["metrics"] = metrics
_cache["status"] = status
_cache["deadline"] = now + SCAN_TTL
return metrics
class Handler(BaseHTTPRequestHandler):
def do_GET(self):
if self.path == "/metrics":
body = get_metrics().encode("utf-8")
self.send_response(200)
self.send_header("Content-Type", "text/plain; version=0.0.4; charset=utf-8")
self.send_header("Content-Length", str(len(body)))
self.end_headers()
self.wfile.write(body)
return
if self.path == "/status":
with _cache_lock:
if not _cache["status"] or _cache["deadline"] <= time.time():
build_metrics()
body = json.dumps(_cache["status"], indent=2).encode("utf-8")
self.send_response(200)
self.send_header("Content-Type", "application/json")
self.send_header("Content-Length", str(len(body)))
self.end_headers()
self.wfile.write(body)
return
self.send_response(404)
self.end_headers()
def log_message(self, fmt, *args):
return
def main():
host, port = LISTEN.rsplit(":", 1)
server = ThreadingHTTPServer((host, int(port)), Handler)
print(f"inter-rp ours+routinator exporter listen={LISTEN} instance={INSTANCE}", flush=True)
server.serve_forever()
if __name__ == "__main__":
main()

View File

@ -0,0 +1,73 @@
#!/usr/bin/env bash
set -euo pipefail
REMOTE231="${REMOTE231:-root@47.251.127.231}"
REMOTE_ROOT="${REMOTE_ROOT:-/var/lib/inter-rp-runners}"
PEER_ROOT="${PEER_ROOT:-/root/ours-rp-continuous/portable-soak/inter-rp-peers}"
SYNC_INTERVAL_SECS="${SYNC_INTERVAL_SECS:-60}"
MAX_SYNCS="${MAX_SYNCS:--1}"
LOG_PREFIX="[inter-rp-sync]"
mkdir -p "$PEER_ROOT/routinator"
write_status() {
local success="$1"
local message="$2"
env SYNC_SUCCESS="$success" SYNC_MESSAGE="$message" SYNC_REMOTE="$REMOTE231" SYNC_REMOTE_ROOT="$REMOTE_ROOT" python3 - "$PEER_ROOT/sync-status.json" <<'PY'
import datetime, json, os, socket, sys
path = sys.argv[1]
payload = {
"schemaVersion": 1,
"success": os.environ["SYNC_SUCCESS"] == "true",
"lastSyncAtRfc3339Utc": datetime.datetime.now(datetime.UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z"),
"remoteHost": os.environ["SYNC_REMOTE"],
"remoteRoot": os.environ["SYNC_REMOTE_ROOT"],
"localHost": socket.gethostname(),
"message": os.environ["SYNC_MESSAGE"],
}
with open(path, "w", encoding="utf-8") as handle:
json.dump(payload, handle, indent=2)
handle.write("\n")
PY
}
sync_once() {
local tmp="$PEER_ROOT/.sync-routinator-$$"
rm -rf "$tmp"
mkdir -p "$tmp"
if ! rsync -aL --delete \
--include='run-meta.json' \
--include='result.ccr' \
--include='vrps.csv' \
--include='vaps.csv' \
--include='stdout.log' \
--include='stderr.log' \
--exclude='*' \
"$REMOTE231:$REMOTE_ROOT/routinator/latest/" "$tmp/latest/"; then
rm -rf "$tmp"
return 1
fi
if [[ ! -f "$tmp/latest/run-meta.json" ]]; then
rm -rf "$tmp"
return 2
fi
rm -rf "$PEER_ROOT/routinator/latest.next" "$PEER_ROOT/routinator/latest.prev"
mv "$tmp/latest" "$PEER_ROOT/routinator/latest.next"
if [[ -e "$PEER_ROOT/routinator/latest" ]]; then
mv "$PEER_ROOT/routinator/latest" "$PEER_ROOT/routinator/latest.prev"
fi
mv "$PEER_ROOT/routinator/latest.next" "$PEER_ROOT/routinator/latest"
rm -rf "$PEER_ROOT/routinator/latest.prev" "$tmp"
}
completed=0
while true; do
if sync_once; then
echo "$LOG_PREFIX $(date -u +%Y-%m-%dT%H:%M:%SZ) ok"
write_status true "ok"
else
code=$?
echo "$LOG_PREFIX $(date -u +%Y-%m-%dT%H:%M:%SZ) sync failed code=$code" >&2
write_status false "routinator rsync failed code=$code"
fi
completed=$((completed + 1))
if [[ "$MAX_SYNCS" =~ ^[0-9]+$ ]] && (( completed >= MAX_SYNCS )); then
break
fi
sleep "$SYNC_INTERVAL_SECS"
done