diff --git a/monitor/grafana/dashboards/ours-rp-inter-rp.json b/monitor/grafana/dashboards/ours-rp-inter-rp.json index 58dc268..fd2e431 100644 --- a/monitor/grafana/dashboards/ours-rp-inter-rp.json +++ b/monitor/grafana/dashboards/ours-rp-inter-rp.json @@ -7,12 +7,22 @@ "graphTooltip": 0, "id": null, "links": [], + "liveNow": false, "panels": [ { + "id": 1, + "title": "Metrics Reload OK", + "type": "stat", "datasource": { "type": "prometheus", "uid": "Prometheus" }, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 0 + }, "fieldConfig": { "defaults": { "unit": "none", @@ -20,13 +30,6 @@ }, "overrides": [] }, - "gridPos": { - "h": 4, - "w": 6, - "x": 0, - "y": 0 - }, - "id": 1, "options": { "colorMode": "value", "graphMode": "area", @@ -42,22 +45,29 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.3.1", "targets": [ { - "expr": "inter_rp_service_last_reload_success", + "expr": "max(inter_rp_service_last_reload_success{exported_instance=\"remote200-inter-rp\"})", "legendFormat": "reload", - "refId": "A" + "refId": "A", + "instant": true } - ], - "title": "Metrics Reload OK", - "type": "stat" + ] }, { + "id": 2, + "title": "231 Sync Age", + "type": "stat", "datasource": { "type": "prometheus", "uid": "Prometheus" }, + "gridPos": { + "h": 4, + "w": 6, + "x": 6, + "y": 0 + }, "fieldConfig": { "defaults": { "unit": "s", @@ -65,13 +75,6 @@ }, "overrides": [] }, - "gridPos": { - "h": 4, - "w": 6, - "x": 6, - "y": 0 - }, - "id": 2, "options": { "colorMode": "value", "graphMode": "area", @@ -87,22 +90,29 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.3.1", "targets": [ { - "expr": "inter_rp_sync_age_seconds", + "expr": "max(inter_rp_sync_age_seconds{exported_instance=\"remote200-inter-rp\"})", "legendFormat": "sync age", - "refId": "A" + "refId": "A", + "instant": true } - ], - "title": "Remote200 Sync Age", - "type": "stat" + ] }, { + "id": 3, + "title": "Parse Errors", + "type": "stat", "datasource": { "type": "prometheus", "uid": "Prometheus" }, + "gridPos": { + "h": 4, + "w": 6, + "x": 12, + "y": 0 + }, "fieldConfig": { "defaults": { "unit": "short", @@ -110,13 +120,6 @@ }, "overrides": [] }, - "gridPos": { - "h": 4, - "w": 6, - "x": 12, - "y": 0 - }, - "id": 3, "options": { "colorMode": "value", "graphMode": "area", @@ -132,36 +135,36 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.3.1", "targets": [ { - "expr": "inter_rp_parse_errors", + "expr": "max(inter_rp_parse_errors{exported_instance=\"remote200-inter-rp\"})", "legendFormat": "errors", - "refId": "A" + "refId": "A", + "instant": true } - ], - "title": "Parse Errors", - "type": "stat" + ] }, { + "id": 4, + "title": "Ours vs Routinator VRP Diff", + "type": "stat", "datasource": { "type": "prometheus", "uid": "Prometheus" }, - "fieldConfig": { - "defaults": { - "unit": "none", - "decimals": 0 - }, - "overrides": [] - }, "gridPos": { "h": 4, "w": 6, "x": 18, "y": 0 }, - "id": 4, + "fieldConfig": { + "defaults": { + "unit": "short", + "decimals": 0 + }, + "overrides": [] + }, "options": { "colorMode": "value", "graphMode": "area", @@ -177,22 +180,29 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.3.1", "targets": [ { - "expr": "inter_rp_ccr_digest_match{state=\"overall\"}", - "legendFormat": "overall", - "refId": "A" + "expr": "max(inter_rp_vrps_diff{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\"})", + "legendFormat": "vrp diff", + "refId": "A", + "instant": true } - ], - "title": "Ours vs rpki-client CCR Match", - "type": "stat" + ] }, { + "id": 5, + "title": "Wall Time by RP", + "type": "timeseries", "datasource": { "type": "prometheus", "uid": "Prometheus" }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 4 + }, "fieldConfig": { "defaults": { "unit": "s", @@ -200,13 +210,6 @@ }, "overrides": [] }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 4 - }, - "id": 5, "options": { "legend": { "calcs": [ @@ -223,19 +226,26 @@ }, "targets": [ { - "expr": "inter_rp_run_wall_seconds", - "legendFormat": "{{rp}}", + "expr": "inter_rp_run_wall_seconds{exported_instance=\"remote200-inter-rp\",exported_rp=~\"ours-rp|routinator\"}", + "legendFormat": "{{exported_rp}}", "refId": "A" } - ], - "title": "Wall Time by RP", - "type": "timeseries" + ] }, { + "id": 6, + "title": "Max RSS Aggregate Peak by RP", + "type": "timeseries", "datasource": { "type": "prometheus", "uid": "Prometheus" }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 4 + }, "fieldConfig": { "defaults": { "unit": "bytes", @@ -243,13 +253,6 @@ }, "overrides": [] }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 4 - }, - "id": 6, "options": { "legend": { "calcs": [ @@ -266,34 +269,33 @@ }, "targets": [ { - "expr": "inter_rp_run_max_rss_bytes{kind=\"aggregate_peak\"}", - "legendFormat": "{{rp}}", + "expr": "inter_rp_run_max_rss_bytes{exported_instance=\"remote200-inter-rp\",kind=\"aggregate_peak\",exported_rp=~\"ours-rp|routinator\"}", + "legendFormat": "{{exported_rp}}", "refId": "A" } - ], - "title": "Max RSS Aggregate Peak by RP", - "type": "timeseries" + ] }, { - "datasource": { - "type": "prometheus", - "uid": "Prometheus" - }, - "fieldConfig": { - "defaults": { - "unit": "none", - "decimals": 0, - "min": 0 - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 12 - }, "id": 7, + "title": "VRPs by RP (unique ASN/Prefix/MaxLen)", + "type": "timeseries", + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 12 + }, + "fieldConfig": { + "defaults": { + "unit": "none", + "min": 0 + }, + "overrides": [] + }, "options": { "legend": { "calcs": [ @@ -310,34 +312,33 @@ }, "targets": [ { - "expr": "inter_rp_vrps", - "legendFormat": "{{rp}}", + "expr": "inter_rp_vrps{exported_instance=\"remote200-inter-rp\",exported_rp=~\"ours-rp|routinator\"}", + "legendFormat": "{{exported_rp}}", "refId": "A" } - ], - "title": "VRPs by RP (unique ASN/Prefix/MaxLen)", - "type": "timeseries" + ] }, { + "id": 8, + "title": "VAPs / ASPAs by RP (unique Customer/Providers)", + "type": "timeseries", "datasource": { "type": "prometheus", "uid": "Prometheus" }, - "fieldConfig": { - "defaults": { - "unit": "none", - "decimals": 0, - "min": 0 - }, - "overrides": [] - }, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 12 }, - "id": 8, + "fieldConfig": { + "defaults": { + "unit": "none", + "min": 0 + }, + "overrides": [] + }, "options": { "legend": { "calcs": [ @@ -354,54 +355,75 @@ }, "targets": [ { - "expr": "inter_rp_vaps", - "legendFormat": "{{rp}}", + "expr": "inter_rp_vaps{exported_instance=\"remote200-inter-rp\",exported_rp=~\"ours-rp|routinator\"}", + "legendFormat": "{{exported_rp}}", "refId": "A" } - ], - "title": "VAPs / ASPAs by RP (unique Customer/Providers)", - "type": "timeseries" + ] }, { + "id": 9, + "title": "Latest RP Runs", + "type": "table", "datasource": { "type": "prometheus", "uid": "Prometheus" }, - "fieldConfig": { - "defaults": { - "unit": "none", - "decimals": 0 - }, - "overrides": [] - }, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 20 }, - "id": 9, + "fieldConfig": { + "defaults": { + "unit": "none", + "decimals": 0 + }, + "overrides": [] + }, "options": { "showHeader": true, "sortBy": [] }, "targets": [ { - "expr": "inter_rp_ccr_digest_match{left=\"ours-rp\",right=\"rpki-client\"}", + "expr": "inter_rp_run_seq{exported_instance=\"remote200-inter-rp\",exported_rp=~\"ours-rp|routinator\"}", "format": "table", "instant": true, - "legendFormat": "{{state}}", + "legendFormat": "{{exported_rp}} seq", "refId": "A" + }, + { + "expr": "inter_rp_run_success{exported_instance=\"remote200-inter-rp\",exported_rp=~\"ours-rp|routinator\"}", + "format": "table", + "instant": true, + "legendFormat": "{{exported_rp}} success", + "refId": "B" + }, + { + "expr": "inter_rp_run_wall_seconds{exported_instance=\"remote200-inter-rp\",exported_rp=~\"ours-rp|routinator\"}", + "format": "table", + "instant": true, + "legendFormat": "{{exported_rp}} wall", + "refId": "C" } - ], - "title": "CCR Digest Match States", - "type": "table" + ] }, { + "id": 10, + "title": "Output Count Diffs (unique)", + "type": "table", "datasource": { "type": "prometheus", "uid": "Prometheus" }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 20 + }, "fieldConfig": { "defaults": { "unit": "none", @@ -409,41 +431,41 @@ }, "overrides": [] }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 20 - }, - "id": 10, "options": { "showHeader": true, "sortBy": [] }, "targets": [ { - "expr": "inter_rp_vrps_diff", + "expr": "inter_rp_vrps_diff{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\"}", "format": "table", "instant": true, - "legendFormat": "vrps {{left}}-{{right}}", + "legendFormat": "vrps ours-rp-routinator", "refId": "A" }, { - "expr": "inter_rp_vaps_diff", + "expr": "inter_rp_vaps_diff{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\"}", "format": "table", "instant": true, - "legendFormat": "vaps {{left}}-{{right}}", + "legendFormat": "vaps ours-rp-routinator", "refId": "B" } - ], - "title": "Output Count Diffs (VRP/VAP unique)", - "type": "table" + ] }, { + "id": 11, + "title": "Artifact Age by RP", + "type": "timeseries", "datasource": { "type": "prometheus", "uid": "Prometheus" }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 28 + }, "fieldConfig": { "defaults": { "unit": "s", @@ -451,13 +473,6 @@ }, "overrides": [] }, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 28 - }, - "id": 11, "options": { "legend": { "calcs": [ @@ -474,21 +489,19 @@ }, "targets": [ { - "expr": "inter_rp_artifact_age_seconds", - "legendFormat": "{{rp}}", + "expr": "inter_rp_artifact_age_seconds{exported_instance=\"remote200-inter-rp\",exported_rp=~\"ours-rp|routinator\"}", + "legendFormat": "{{exported_rp}}", "refId": "A" } - ], - "title": "Artifact Age by RP", - "type": "timeseries" + ] } ], - "preload": false, "refresh": "10s", "schemaVersion": 40, "tags": [ "rpki", - "inter-rp" + "inter-rp", + "routinator" ], "templating": { "list": [] @@ -497,9 +510,8 @@ "from": "now-6h", "to": "now" }, - "timepicker": {}, "timezone": "browser", - "title": "Ours RP Inter-RP", + "title": "Ours RP vs Routinator", "uid": "ours-rp-inter-rp", - "version": 1 + "version": 2 } diff --git a/scripts/inter_rp/inter_rp_ours_routinator_exporter.py b/scripts/inter_rp/inter_rp_ours_routinator_exporter.py new file mode 100755 index 0000000..66cc2d6 --- /dev/null +++ b/scripts/inter_rp/inter_rp_ours_routinator_exporter.py @@ -0,0 +1,250 @@ +#!/usr/bin/env python3 +import csv +import datetime as dt +import json +import os +import socket +import sys +import threading +import time +from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer +from pathlib import Path + +RUN_ROOT = Path(os.environ.get("OURS_RUN_ROOT", "/root/ours-rp-continuous/portable-soak")) +PEER_ROOT = Path(os.environ.get("PEER_ROOT", "/root/ours-rp-continuous/portable-soak/inter-rp-peers")) +INSTANCE = os.environ.get("INTER_RP_INSTANCE", "remote200-inter-rp") +LISTEN = os.environ.get("INTER_RP_LISTEN", "0.0.0.0:9557") +SCAN_TTL = float(os.environ.get("INTER_RP_SCAN_TTL_SECONDS", "10")) + +_cache_lock = threading.Lock() +_cache = {"deadline": 0.0, "metrics": "", "status": {}} +_count_cache = {} + +def unix_now(): + return time.time() + +def parse_rfc3339(value): + if not value: + return None + text = str(value).replace("Z", "+00:00") + try: + return dt.datetime.fromisoformat(text).timestamp() + except Exception: + return None + +def read_json(path): + with open(path, "r", encoding="utf-8") as handle: + return json.load(handle) + +def latest_ours_run(): + runs = RUN_ROOT / "runs" + candidates = sorted(p for p in runs.glob("run_*") if (p / "run-summary.json").exists()) + return candidates[-1] if candidates else None + +def count_unique_csv(path, cols): + if not path.exists(): + return None + stat = path.stat() + key = (str(path), stat.st_mtime_ns, stat.st_size, cols) + if key in _count_cache: + return _count_cache[key] + seen = set() + with open(path, "r", encoding="utf-8", newline="") as handle: + reader = csv.reader(handle) + first = True + for row in reader: + if first: + first = False + continue + if not row: + continue + if len(row) < cols: + continue + seen.add(tuple(cell.strip() for cell in row[:cols])) + value = len(seen) + _count_cache.clear() + _count_cache[key] = value + return value + +def metric_line(name, labels, value): + label_text = ",".join(f'{k}="{str(v).replace(chr(92), chr(92)+chr(92)).replace(chr(34), chr(92)+chr(34))}"' for k, v in labels.items()) + return f"{name}{{{label_text}}} {value}\n" + +def bool_num(value): + return 1 if value else 0 + +def sample_ours(now, errors): + sample = {"rp": "ours-rp", "present": False, "success": False, "max": {}, "errors": 0} + run_dir = latest_ours_run() + if run_dir is None: + sample["errors"] += 1 + errors.append("ours-rp: no run-summary.json") + return sample + sample["present"] = True + summary_path = run_dir / "run-summary.json" + try: + summary = read_json(summary_path) + except Exception as exc: + sample["errors"] += 1 + errors.append(f"ours-rp: read {summary_path}: {exc}") + return sample + sample["run_id"] = summary.get("runId") or run_dir.name + sample["run_seq"] = summary.get("runSeq") or int(run_dir.name.split("_")[-1]) + sample["success"] = summary.get("status") == "success" and int(summary.get("exitCode", 0)) == 0 + if summary.get("wallMs") is not None: + sample["wall"] = float(summary["wallMs"]) / 1000.0 + finished = parse_rfc3339(summary.get("finishedAtRfc3339Utc")) + if finished is not None: + sample["finished"] = finished + sample["age"] = max(0.0, now - finished) + rss = summary.get("processMetrics", {}).get("maxRssKb") + if rss is not None: + sample["max"]["parent"] = int(rss) * 1024 + sample["max"]["aggregate_peak"] = int(rss) * 1024 + sample["vrps"] = count_unique_csv(run_dir / "vrps.csv", 3) + sample["vaps"] = count_unique_csv(run_dir / "vaps.csv", 2) + return sample + +def sample_routinator(now, errors): + rp = "routinator" + latest = PEER_ROOT / rp / "latest" + sample = {"rp": rp, "present": False, "success": False, "max": {}, "errors": 0} + if not latest.exists(): + sample["errors"] += 1 + errors.append(f"routinator: missing latest directory: {latest}") + return sample + sample["present"] = True + meta_path = latest / "run-meta.json" + try: + meta = read_json(meta_path) + except Exception as exc: + sample["errors"] += 1 + errors.append(f"routinator: read {meta_path}: {exc}") + return sample + sample["run_id"] = meta.get("runId") + sample["run_seq"] = meta.get("runSeq") + sample["success"] = bool(meta.get("success")) + if meta.get("wallMs") is not None: + sample["wall"] = float(meta["wallMs"]) / 1000.0 + finished = parse_rfc3339(meta.get("finishedAtRfc3339Utc")) + if finished is not None: + sample["finished"] = finished + sample["age"] = max(0.0, now - finished) + max_rss = meta.get("maxRssKb", {}) + for label, key in [("parent", "parent"), ("child_max", "childMax"), ("aggregate_peak", "aggregatePeak")]: + if max_rss.get(key) is not None: + sample["max"][label] = int(max_rss[key]) * 1024 + sample["vrps"] = count_unique_csv(latest / "vrps.csv", 3) + sample["vaps"] = count_unique_csv(latest / "vaps.csv", 2) + return sample + +def sync_metrics(now): + path = PEER_ROOT / "sync-status.json" + if not path.exists(): + return {"present": False, "success": False, "message": f"missing {path}"} + try: + value = read_json(path) + except Exception as exc: + return {"present": True, "success": False, "message": str(exc)} + ts = parse_rfc3339(value.get("lastSyncAtRfc3339Utc")) + return { + "present": True, + "success": bool(value.get("success")), + "timestamp": ts, + "age": max(0.0, now - ts) if ts is not None else None, + "remote": value.get("remoteHost", ""), + "message": value.get("message", ""), + } + +def build_metrics(): + now = unix_now() + errors = [] + start = time.time() + samples = [sample_ours(now, errors), sample_routinator(now, errors)] + sync = sync_metrics(now) + if not sync.get("success"): + errors.append("sync: " + str(sync.get("message", "failed"))) + out = [] + out.append(metric_line("inter_rp_service_up", {"instance": INSTANCE}, 1)) + out.append(metric_line("inter_rp_service_last_scan_timestamp_seconds", {"instance": INSTANCE}, now)) + out.append(metric_line("inter_rp_service_last_scan_duration_seconds", {"instance": INSTANCE}, time.time() - start)) + out.append(metric_line("inter_rp_service_last_reload_success", {"instance": INSTANCE}, bool_num(len(errors) == 0))) + out.append(metric_line("inter_rp_parse_errors", {"instance": INSTANCE}, len(errors))) + out.append(metric_line("inter_rp_sync_present", {"instance": INSTANCE}, bool_num(sync.get("present")))) + out.append(metric_line("inter_rp_sync_last_success", {"instance": INSTANCE}, bool_num(sync.get("success")))) + if sync.get("age") is not None: + out.append(metric_line("inter_rp_sync_age_seconds", {"instance": INSTANCE}, sync["age"])) + if sync.get("timestamp") is not None: + out.append(metric_line("inter_rp_sync_last_timestamp_seconds", {"instance": INSTANCE}, sync["timestamp"])) + by_rp = {s["rp"]: s for s in samples} + for s in samples: + labels = {"instance": INSTANCE, "rp": s["rp"]} + out.append(metric_line("inter_rp_run_present", labels, bool_num(s.get("present")))) + out.append(metric_line("inter_rp_run_success", labels, bool_num(s.get("success")))) + out.append(metric_line("inter_rp_sample_parse_errors", labels, s.get("errors", 0))) + if s.get("run_seq") is not None: + out.append(metric_line("inter_rp_run_seq", labels, s["run_seq"])) + if s.get("wall") is not None: + out.append(metric_line("inter_rp_run_wall_seconds", labels, s["wall"])) + if s.get("age") is not None: + out.append(metric_line("inter_rp_artifact_age_seconds", labels, s["age"])) + if s.get("vrps") is not None: + out.append(metric_line("inter_rp_vrps", labels, s["vrps"])) + if s.get("vaps") is not None: + out.append(metric_line("inter_rp_vaps", labels, s["vaps"])) + for kind, value in s.get("max", {}).items(): + labels2 = dict(labels) + labels2["kind"] = kind + out.append(metric_line("inter_rp_run_max_rss_bytes", labels2, value)) + ours, rout = by_rp.get("ours-rp", {}), by_rp.get("routinator", {}) + if ours.get("vrps") is not None and rout.get("vrps") is not None: + out.append(metric_line("inter_rp_vrps_diff", {"instance": INSTANCE, "left": "ours-rp", "right": "routinator"}, abs(int(ours["vrps"]) - int(rout["vrps"])))) + if ours.get("vaps") is not None and rout.get("vaps") is not None: + out.append(metric_line("inter_rp_vaps_diff", {"instance": INSTANCE, "left": "ours-rp", "right": "routinator"}, abs(int(ours["vaps"]) - int(rout["vaps"])))) + return "".join(out), {"errors": errors, "samples": samples, "sync": sync} + +def get_metrics(): + now = time.time() + with _cache_lock: + if _cache["metrics"] and _cache["deadline"] > now: + return _cache["metrics"] + metrics, status = build_metrics() + _cache["metrics"] = metrics + _cache["status"] = status + _cache["deadline"] = now + SCAN_TTL + return metrics + +class Handler(BaseHTTPRequestHandler): + def do_GET(self): + if self.path == "/metrics": + body = get_metrics().encode("utf-8") + self.send_response(200) + self.send_header("Content-Type", "text/plain; version=0.0.4; charset=utf-8") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + return + if self.path == "/status": + with _cache_lock: + if not _cache["status"] or _cache["deadline"] <= time.time(): + build_metrics() + body = json.dumps(_cache["status"], indent=2).encode("utf-8") + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + return + self.send_response(404) + self.end_headers() + def log_message(self, fmt, *args): + return + +def main(): + host, port = LISTEN.rsplit(":", 1) + server = ThreadingHTTPServer((host, int(port)), Handler) + print(f"inter-rp ours+routinator exporter listen={LISTEN} instance={INSTANCE}", flush=True) + server.serve_forever() + +if __name__ == "__main__": + main() diff --git a/scripts/inter_rp/sync_routinator_from_remote231.sh b/scripts/inter_rp/sync_routinator_from_remote231.sh new file mode 100755 index 0000000..d541515 --- /dev/null +++ b/scripts/inter_rp/sync_routinator_from_remote231.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash +set -euo pipefail +REMOTE231="${REMOTE231:-root@47.251.127.231}" +REMOTE_ROOT="${REMOTE_ROOT:-/var/lib/inter-rp-runners}" +PEER_ROOT="${PEER_ROOT:-/root/ours-rp-continuous/portable-soak/inter-rp-peers}" +SYNC_INTERVAL_SECS="${SYNC_INTERVAL_SECS:-60}" +MAX_SYNCS="${MAX_SYNCS:--1}" +LOG_PREFIX="[inter-rp-sync]" +mkdir -p "$PEER_ROOT/routinator" +write_status() { + local success="$1" + local message="$2" + env SYNC_SUCCESS="$success" SYNC_MESSAGE="$message" SYNC_REMOTE="$REMOTE231" SYNC_REMOTE_ROOT="$REMOTE_ROOT" python3 - "$PEER_ROOT/sync-status.json" <<'PY' +import datetime, json, os, socket, sys +path = sys.argv[1] +payload = { + "schemaVersion": 1, + "success": os.environ["SYNC_SUCCESS"] == "true", + "lastSyncAtRfc3339Utc": datetime.datetime.now(datetime.UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z"), + "remoteHost": os.environ["SYNC_REMOTE"], + "remoteRoot": os.environ["SYNC_REMOTE_ROOT"], + "localHost": socket.gethostname(), + "message": os.environ["SYNC_MESSAGE"], +} +with open(path, "w", encoding="utf-8") as handle: + json.dump(payload, handle, indent=2) + handle.write("\n") +PY +} +sync_once() { + local tmp="$PEER_ROOT/.sync-routinator-$$" + rm -rf "$tmp" + mkdir -p "$tmp" + if ! rsync -aL --delete \ + --include='run-meta.json' \ + --include='result.ccr' \ + --include='vrps.csv' \ + --include='vaps.csv' \ + --include='stdout.log' \ + --include='stderr.log' \ + --exclude='*' \ + "$REMOTE231:$REMOTE_ROOT/routinator/latest/" "$tmp/latest/"; then + rm -rf "$tmp" + return 1 + fi + if [[ ! -f "$tmp/latest/run-meta.json" ]]; then + rm -rf "$tmp" + return 2 + fi + rm -rf "$PEER_ROOT/routinator/latest.next" "$PEER_ROOT/routinator/latest.prev" + mv "$tmp/latest" "$PEER_ROOT/routinator/latest.next" + if [[ -e "$PEER_ROOT/routinator/latest" ]]; then + mv "$PEER_ROOT/routinator/latest" "$PEER_ROOT/routinator/latest.prev" + fi + mv "$PEER_ROOT/routinator/latest.next" "$PEER_ROOT/routinator/latest" + rm -rf "$PEER_ROOT/routinator/latest.prev" "$tmp" +} +completed=0 +while true; do + if sync_once; then + echo "$LOG_PREFIX $(date -u +%Y-%m-%dT%H:%M:%SZ) ok" + write_status true "ok" + else + code=$? + echo "$LOG_PREFIX $(date -u +%Y-%m-%dT%H:%M:%SZ) sync failed code=$code" >&2 + write_status false "routinator rsync failed code=$code" + fi + completed=$((completed + 1)) + if [[ "$MAX_SYNCS" =~ ^[0-9]+$ ]] && (( completed >= MAX_SYNCS )); then + break + fi + sleep "$SYNC_INTERVAL_SECS" +done