#!/usr/bin/env python3 from __future__ import annotations import argparse import json from pathlib import Path def read_set(path: Path) -> set[str]: if not path.is_file(): return set() return {line.strip() for line in path.read_text(encoding="utf-8", errors="replace").splitlines() if line.strip()} def compare(left: set[str], right: set[str]) -> dict[str, object]: union = left | right inter = left & right return { "left": len(left), "right": len(right), "intersection": len(inter), "onlyLeft": len(left - right), "onlyRight": len(right - left), "jaccard": round(len(inter) / len(union), 8) if union else 1.0, "onlyLeftSamples": sorted(left - right)[:20], "onlyRightSamples": sorted(right - left)[:20], } def main() -> int: parser = argparse.ArgumentParser() parser.add_argument("--left", type=Path, required=True) parser.add_argument("--right", type=Path, required=True) parser.add_argument("--left-name", default="left") parser.add_argument("--right-name", default="right") parser.add_argument("--out", type=Path, required=True) args = parser.parse_args() result = { "leftName": args.left_name, "rightName": args.right_name, "vrps": compare(read_set(args.left / "vrps.normalized.txt"), read_set(args.right / "vrps.normalized.txt")), "vaps": compare(read_set(args.left / "vaps.normalized.txt"), read_set(args.right / "vaps.normalized.txt")), } args.out.parent.mkdir(parents=True, exist_ok=True) args.out.write_text(json.dumps(result, indent=2, sort_keys=True) + "\n", encoding="utf-8") print(args.out) return 0 if __name__ == "__main__": raise SystemExit(main())