diff --git a/src/sys/debug/.env.example b/src/sys/debug/.env.example new file mode 100644 index 0000000..4ee2fa5 --- /dev/null +++ b/src/sys/debug/.env.example @@ -0,0 +1,12 @@ +# Generated by 01_bootstrap.sh +SYS_DEBUG_PRIVATE_CORE=/absolute/path/to/private +SYS_DEBUG_PRIVATE_NODEA=/absolute/path/to/private-nodea +SYS_DEBUG_PRIVATE_NODEB=/absolute/path/to/private-nodeb +SYS_DEBUG_TMP_DIR=/absolute/path/to/tmp +SYS_DEBUG_NETWORK_NAME=argus-debug-net +SYS_DEBUG_NETWORK_SUBNET=172.30.0.0/16 +SYS_DEBUG_NETWORK_GATEWAY=172.30.0.1 +SYS_DEBUG_PROJECT_NAME=argus-debug +SYS_DEBUG_CONTAINER_PREFIX=argus-debug +ARGUS_BUILD_UID=2133 +ARGUS_BUILD_GID=2015 diff --git a/src/sys/debug/README.md b/src/sys/debug/README.md new file mode 100644 index 0000000..2466b63 --- /dev/null +++ b/src/sys/debug/README.md @@ -0,0 +1,67 @@ +# ARGUS 系统调试部署模式 + +该目录提供基于系统级 E2E 测试构建的调试部署流程,便于本地快速复现与排查问题。核心特性: + +- 独立 docker 网络 `argus-debug-net`(默认子网 `172.30.0.0/16`),避免与 `src/sys/tests` 冲突。 +- 私有数据目录可通过参数自定义,例如 `--private-root /tmp/argus-debug`。 +- 默认保留调试过程生成的文件,避免 `down`/`bootstrap` 自动删除。 + +## 快速开始 + +```bash +cd src/sys/debug + +# 仅首次需要,创建 external 网络 +./scripts/network-create.sh + +# 初始化目录/构建 agent/写入 .env +./scripts/01_bootstrap.sh --private-root /tmp/argus-debug + +# 启动调试栈 +./scripts/02_up.sh + +# 根据需要执行验证脚本(03~08) +./scripts/03_wait_ready.sh +... + +# 调试结束停止服务 +./scripts/09_down.sh + +# 若需移除网络或数据 +./scripts/network-destroy.sh +./scripts/clean-data.sh +``` + +> **提示**:调试与测试栈不能同时运行,应保持 `src/sys/tests` 中的 `argus-sys` 栈已停止。 + +## 参数与环境变量 + +- `--private-root `:同时指定核心服务与两个节点的私有目录根,脚本自动派生 `private`、`private-nodea`、`private-nodeb`。 +- `--private-core `、`--private-nodea `、`--private-nodeb `:分别覆盖单独目录。 +- 环境变量可覆盖 `.env` 中写入的值,例如 `export SYS_DEBUG_NETWORK_NAME=my-debug-net`。 +- `.env` 文件字段: + - `SYS_DEBUG_PRIVATE_CORE` + - `SYS_DEBUG_PRIVATE_NODEA` + - `SYS_DEBUG_PRIVATE_NODEB` + - `SYS_DEBUG_TMP_DIR` + - `SYS_DEBUG_NETWORK_NAME` + - `SYS_DEBUG_NETWORK_SUBNET` + - `SYS_DEBUG_NETWORK_GATEWAY` + - `SYS_DEBUG_PROJECT_NAME` + - `SYS_DEBUG_CONTAINER_PREFIX` + - `ARGUS_BUILD_UID` / `ARGUS_BUILD_GID` + +## 脚本说明 + +- `scripts/common.sh`:通用函数与环境加载。 +- `scripts/network-create.sh` / `network-destroy.sh`:管理 external 网络。 +- `scripts/00_debug_all.sh`:顺序执行 01~08(默认不执行 09)。 +- `scripts/clean-data.sh`:选择性清理宿主机私有数据。 +- `scripts/08_restart_agent_reregister.sh`:将 node-b 切换到 `SYS_DEBUG_NODEB_FIXED_IP`(默认 `172.30.0.200`),如果目标地址与当前 IP 相同脚本会报错提醒重新选择地址。 +- 其它 `01~09` 与测试目录对应,但针对参数化路径及网络做了调整。 + +## 注意事项 + +- 若宿主机未安装 Docker,脚本将提示错误并退出。 +- 当指定的私有目录已存在数据时,脚本不会清理,请确认内容安全后再复用。 +- 与测试环境共用镜像:请提前执行仓库根目录的 `./build/build_images.sh`。 diff --git a/src/sys/debug/docker-compose.yml b/src/sys/debug/docker-compose.yml new file mode 100644 index 0000000..c11f777 --- /dev/null +++ b/src/sys/debug/docker-compose.yml @@ -0,0 +1,147 @@ +version: "3.8" + +networks: + argus-debug-net: + external: true + name: ${SYS_DEBUG_NETWORK_NAME:-argus-debug-net} + +services: + bind: + image: ${BIND_IMAGE_TAG:-argus-bind9:latest} + container_name: ${SYS_DEBUG_CONTAINER_PREFIX:-argus-debug}-bind + networks: + argus-debug-net: + ipv4_address: ${SYS_DEBUG_BIND_IP:-172.30.0.2} + volumes: + - ${SYS_DEBUG_PRIVATE_CORE}:/private + restart: unless-stopped + + master: + image: ${MASTER_IMAGE_TAG:-argus-master:latest} + container_name: ${SYS_DEBUG_CONTAINER_PREFIX:-argus-debug}-master + depends_on: + - bind + environment: + - OFFLINE_THRESHOLD_SECONDS=6 + - ONLINE_THRESHOLD_SECONDS=2 + - SCHEDULER_INTERVAL_SECONDS=1 + - ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133} + - ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015} + ports: + - "32300:3000" + volumes: + - ${SYS_DEBUG_PRIVATE_CORE}/argus/master:/private/argus/master + - ${SYS_DEBUG_PRIVATE_CORE}/argus/metric/prometheus:/private/argus/metric/prometheus + - ${SYS_DEBUG_PRIVATE_CORE}/argus/etc:/private/argus/etc + networks: + argus-debug-net: + ipv4_address: ${SYS_DEBUG_MASTER_IP:-172.30.0.10} + restart: unless-stopped + + es: + image: ${ES_IMAGE_TAG:-argus-elasticsearch:latest} + container_name: ${SYS_DEBUG_CONTAINER_PREFIX:-argus-debug}-es + environment: + - discovery.type=single-node + - xpack.security.enabled=false + - ES_JAVA_OPTS=-Xms512m -Xmx512m + - ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133} + - ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015} + volumes: + - ${SYS_DEBUG_PRIVATE_CORE}/argus/log/elasticsearch:/private/argus/log/elasticsearch + - ${SYS_DEBUG_PRIVATE_CORE}/argus/etc:/private/argus/etc + ports: + - "9200:9200" + networks: + argus-debug-net: + ipv4_address: ${SYS_DEBUG_ES_IP:-172.30.0.20} + restart: unless-stopped + + kibana: + image: ${KIBANA_IMAGE_TAG:-argus-kibana:latest} + container_name: ${SYS_DEBUG_CONTAINER_PREFIX:-argus-debug}-kibana + environment: + - ELASTICSEARCH_HOSTS=http://es.log.argus.com:9200 + - ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133} + - ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015} + volumes: + - ${SYS_DEBUG_PRIVATE_CORE}/argus/log/kibana:/private/argus/log/kibana + - ${SYS_DEBUG_PRIVATE_CORE}/argus/etc:/private/argus/etc + depends_on: + - es + ports: + - "5601:5601" + networks: + argus-debug-net: + ipv4_address: ${SYS_DEBUG_KIBANA_IP:-172.30.0.30} + restart: unless-stopped + + node-a: + image: ubuntu:22.04 + container_name: ${SYS_DEBUG_CONTAINER_PREFIX:-argus-debug}-node-a + hostname: ${SYS_DEBUG_NODEA_HOST:-dev-yyrshare-nbnyx10-cp2f-pod-0} + depends_on: + - master + - bind + - es + environment: + - MASTER_ENDPOINT=http://master.argus.com:3000 + - REPORT_INTERVAL_SECONDS=2 + - ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133} + - ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015} + - ES_HOST=es + - ES_PORT=9200 + - CLUSTER=local + - RACK=dev + volumes: + - ${SYS_DEBUG_PRIVATE_NODEA}/argus/agent/${SYS_DEBUG_NODEA_HOST:-dev-yyrshare-nbnyx10-cp2f-pod-0}:/private/argus/agent/${SYS_DEBUG_NODEA_HOST:-dev-yyrshare-nbnyx10-cp2f-pod-0} + - ../../agent/dist/argus-agent:/usr/local/bin/argus-agent:ro + - ../tests/scripts/node_entrypoint.sh:/usr/local/bin/node-entrypoint.sh:ro + - ../../log/fluent-bit/build/start-fluent-bit.sh:/assets/start-fluent-bit.sh:ro + - ../../log/fluent-bit/build/etc:/assets/fluent-bit/etc:ro + - ../../log/fluent-bit/build/packages:/assets/fluent-bit/packages:ro + entrypoint: + - /usr/local/bin/node-entrypoint.sh + dns: + - ${SYS_DEBUG_BIND_IP:-172.30.0.2} + ports: + - "2020:2020" + networks: + argus-debug-net: + ipv4_address: ${SYS_DEBUG_NODEA_IP:-172.30.0.101} + restart: unless-stopped + + node-b: + image: ubuntu:22.04 + container_name: ${SYS_DEBUG_CONTAINER_PREFIX:-argus-debug}-node-b + hostname: ${SYS_DEBUG_NODEB_HOST:-dev-yyrshare-uuuu10-ep2f-pod-0} + depends_on: + - master + - bind + - es + environment: + - MASTER_ENDPOINT=http://master.argus.com:3000 + - REPORT_INTERVAL_SECONDS=2 + - ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133} + - ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015} + - ES_HOST=es + - ES_PORT=9200 + - CLUSTER=local + - RACK=dev + volumes: + - ${SYS_DEBUG_PRIVATE_NODEB}/argus/agent/${SYS_DEBUG_NODEB_HOST:-dev-yyrshare-uuuu10-ep2f-pod-0}:/private/argus/agent/${SYS_DEBUG_NODEB_HOST:-dev-yyrshare-uuuu10-ep2f-pod-0} + - ../../agent/dist/argus-agent:/usr/local/bin/argus-agent:ro + - ../tests/scripts/node_entrypoint.sh:/usr/local/bin/node-entrypoint.sh:ro + - ../../log/fluent-bit/build/start-fluent-bit.sh:/assets/start-fluent-bit.sh:ro + - ../../log/fluent-bit/build/etc:/assets/fluent-bit/etc:ro + - ../../log/fluent-bit/build/packages:/assets/fluent-bit/packages:ro + entrypoint: + - /usr/local/bin/node-entrypoint.sh + dns: + - ${SYS_DEBUG_BIND_IP:-172.30.0.2} + ports: + - "2021:2020" + networks: + argus-debug-net: + ipv4_address: ${SYS_DEBUG_NODEB_IP:-172.30.0.102} + restart: unless-stopped diff --git a/src/sys/debug/scripts/00_debug_all.sh b/src/sys/debug/scripts/00_debug_all.sh new file mode 100755 index 0000000..6e39309 --- /dev/null +++ b/src/sys/debug/scripts/00_debug_all.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +SCRIPTS=( + "01_bootstrap.sh" + "02_up.sh" + "03_wait_ready.sh" + "04_verify_dns_routing.sh" + "05_agent_register.sh" + "06_write_health_and_assert.sh" + "07_logs_send_and_assert.sh" + "08_restart_agent_reregister.sh" +) + +for script in "${SCRIPTS[@]}"; do + echo "[SYS-DEBUG] Running $script" + "$SCRIPT_DIR/$script" + echo "[SYS-DEBUG] $script completed" + echo +done + +echo "[SYS-DEBUG] Complete. Run scripts/09_down.sh when finished (data retained)." diff --git a/src/sys/debug/scripts/01_bootstrap.sh b/src/sys/debug/scripts/01_bootstrap.sh new file mode 100755 index 0000000..e044e5e --- /dev/null +++ b/src/sys/debug/scripts/01_bootstrap.sh @@ -0,0 +1,210 @@ +#!/usr/bin/env bash +set -euo pipefail + +# shellcheck source=common.sh +source "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/common.sh" + +PRIVATE_ROOT="" +PRIVATE_CORE="$SYS_DEBUG_PRIVATE_CORE" +PRIVATE_NODEA="$SYS_DEBUG_PRIVATE_NODEA" +PRIVATE_NODEB="$SYS_DEBUG_PRIVATE_NODEB" +TMP_DIR_VAL="$SYS_DEBUG_TMP_DIR" +NETWORK_NAME="$SYS_DEBUG_NETWORK_NAME" +NETWORK_SUBNET="$SYS_DEBUG_NETWORK_SUBNET" +NETWORK_GATEWAY="$SYS_DEBUG_NETWORK_GATEWAY" +PROJECT_NAME="$SYS_DEBUG_PROJECT_NAME" +CONTAINER_PREFIX="$SYS_DEBUG_CONTAINER_PREFIX" +NODEB_FIXED_IP=${SYS_DEBUG_NODEB_FIXED_IP:-172.30.0.200} + +usage() { + cat <&2; exit 1; } + PRIVATE_ROOT="$1" + ;; + --private-root=*) + PRIVATE_ROOT="${1#*=}" + ;; + --private-core) + shift; [[ $# -gt 0 ]] || { echo "--private-core requires value" >&2; exit 1; } + PRIVATE_CORE="$1" + ;; + --private-core=*) + PRIVATE_CORE="${1#*=}" + ;; + --private-nodea) + shift; [[ $# -gt 0 ]] || { echo "--private-nodea requires value" >&2; exit 1; } + PRIVATE_NODEA="$1" + ;; + --private-nodea=*) + PRIVATE_NODEA="${1#*=}" + ;; + --private-nodeb) + shift; [[ $# -gt 0 ]] || { echo "--private-nodeb requires value" >&2; exit 1; } + PRIVATE_NODEB="$1" + ;; + --private-nodeb=*) + PRIVATE_NODEB="${1#*=}" + ;; + --tmp-dir) + shift; [[ $# -gt 0 ]] || { echo "--tmp-dir requires value" >&2; exit 1; } + TMP_DIR_VAL="$1" + ;; + --tmp-dir=*) + TMP_DIR_VAL="${1#*=}" + ;; + --network-name) + shift; [[ $# -gt 0 ]] || { echo "--network-name requires value" >&2; exit 1; } + NETWORK_NAME="$1" + ;; + --network-name=*) + NETWORK_NAME="${1#*=}" + ;; + --network-subnet) + shift; [[ $# -gt 0 ]] || { echo "--network-subnet requires value" >&2; exit 1; } + NETWORK_SUBNET="$1" + ;; + --network-subnet=*) + NETWORK_SUBNET="${1#*=}" + ;; + --network-gateway) + shift; [[ $# -gt 0 ]] || { echo "--network-gateway requires value" >&2; exit 1; } + NETWORK_GATEWAY="$1" + ;; + --network-gateway=*) + NETWORK_GATEWAY="${1#*=}" + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "Unknown argument: $1" >&2 + usage >&2 + exit 1 + ;; + esac + shift +done + +if [[ -n "$PRIVATE_ROOT" ]]; then + PRIVATE_CORE="$PRIVATE_ROOT/private" + PRIVATE_NODEA="$PRIVATE_ROOT/private-nodea" + PRIVATE_NODEB="$PRIVATE_ROOT/private-nodeb" +fi + +PRIVATE_CORE=$(abs_path "$PRIVATE_CORE") +PRIVATE_NODEA=$(abs_path "$PRIVATE_NODEA") +PRIVATE_NODEB=$(abs_path "$PRIVATE_NODEB") +TMP_DIR_VAL=$(abs_path "$TMP_DIR_VAL") + +log "Preparing directories under $PRIVATE_CORE" +mkdir -p \ + "$PRIVATE_CORE/argus/etc" \ + "$PRIVATE_CORE/argus/bind" \ + "$PRIVATE_CORE/argus/master" \ + "$PRIVATE_CORE/argus/metric/prometheus" \ + "$PRIVATE_CORE/argus/log/elasticsearch" \ + "$PRIVATE_CORE/argus/log/kibana" \ + "$PRIVATE_NODEA/argus/agent/$HOST_A/health" \ + "$PRIVATE_NODEB/argus/agent/$HOST_B/health" \ + "$TMP_DIR_VAL" + +log "Aligning ownership for core directories" +chown -R "${ARGUS_BUILD_UID}:${ARGUS_BUILD_GID}" \ + "$PRIVATE_CORE/argus/log/elasticsearch" \ + "$PRIVATE_CORE/argus/log/kibana" \ + "$PRIVATE_CORE/argus/etc" 2>/dev/null || true + +log "Distributing update-dns.sh" +BIND_UPDATE_SRC="$REPO_ROOT/src/bind/build/update-dns.sh" +BIND_UPDATE_DEST="$PRIVATE_CORE/argus/etc/update-dns.sh" +if [[ -f "$BIND_UPDATE_SRC" ]]; then + cp "$BIND_UPDATE_SRC" "$BIND_UPDATE_DEST" + chmod +x "$BIND_UPDATE_DEST" +else + echo "[WARN] Missing $BIND_UPDATE_SRC" >&2 +fi + +require_docker + +ensure_image() { + local image="$1" + if ! docker image inspect "$image" >/dev/null 2>&1; then + echo "[ERR] Missing image: $image. Run ./build/build_images.sh" >&2 + exit 1 + fi +} + +log "Ensuring required images exist" +ensure_image "${ES_IMAGE_TAG:-argus-elasticsearch:latest}" +ensure_image "${KIBANA_IMAGE_TAG:-argus-kibana:latest}" +ensure_image "${BIND_IMAGE_TAG:-argus-bind9:latest}" +ensure_image "${MASTER_IMAGE_TAG:-argus-master:latest}" + +log "Building agent binary" +pushd "$REPO_ROOT/src/agent" >/dev/null +./scripts/build_binary.sh +popd >/dev/null + +AGENT_BIN="$REPO_ROOT/src/agent/dist/argus-agent" +if [[ ! -x "$AGENT_BIN" ]]; then + echo "[ERR] Agent binary not found at $AGENT_BIN" >&2 + exit 1 +fi +echo "$AGENT_BIN" > "$TMP_DIR_VAL/agent_binary_path" + +log "Preparing environment file contents" +tmp_env="$(mktemp)" +cat > "$tmp_env" </dev/null 2>&1; then + echo "[ERR] Network $SYS_DEBUG_NETWORK_NAME not found. Run scripts/network-create.sh first." >&2 + exit 1 +fi + +log "Starting debug stack on project $SYS_DEBUG_PROJECT_NAME" +compose up -d + +log "Services started: master:32300 es:9200 kibana:5601 node-a:2020 node-b:2021" diff --git a/src/sys/debug/scripts/03_wait_ready.sh b/src/sys/debug/scripts/03_wait_ready.sh new file mode 100755 index 0000000..a92411c --- /dev/null +++ b/src/sys/debug/scripts/03_wait_ready.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash +set -euo pipefail + +# shellcheck source=common.sh +source "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/common.sh" + +ensure_env_file +ensure_paths_defined + +service_id() { + compose ps -q "$1" +} + +wait_http() { + local url="$1"; local attempts="${2:-120}"; local i=1 + while (( i <= attempts )); do + if curl -fsS "$url" >/dev/null 2>&1; then + return 0 + fi + echo "[..] waiting $url ($i/$attempts)" + sleep 5 + ((i++)) + done + echo "[ERR] Timeout waiting for $url" >&2 + return 1 +} + +log "Waiting for ES/Kibana/Master/Fluent Bit/Bind" + +attempt=1; max=120 +while (( attempt <= max )); do + if curl -fsS "http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=1s" >/dev/null 2>&1; then + break + fi + echo "[..] waiting ES ($attempt/$max)" + sleep 5 + ((attempt++)) +done +if (( attempt > max )); then + echo "[ERR] ES not ready" >&2 + exit 1 +fi + +log "Waiting for Kibana to be available (HTTP 200)" +kb_attempt=1; kb_max=180 +while (( kb_attempt <= kb_max )); do + body=$(curl -sS "http://localhost:5601/api/status" 2>/dev/null || true) + code=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:5601/api/status" || echo 000) + if [[ "$code" == "200" ]] && echo "$body" | grep -q '"level":"available"'; then + log "Kibana available" + break + fi + echo "[..] waiting kibana 200 ($kb_attempt/$kb_max), last_code=$code" + sleep 5 + ((kb_attempt++)) +done +if (( kb_attempt > kb_max )); then + echo "[ERR] Kibana did not reach HTTP 200" >&2 + exit 1 +fi + +wait_http "http://localhost:32300/readyz" 120 +wait_http "http://localhost:2020/api/v2/metrics" 120 +wait_http "http://localhost:2021/api/v2/metrics" 120 + +BIND_ID="$(service_id bind)" +if [[ -n "$BIND_ID" ]]; then + docker exec "$BIND_ID" named-checkconf >/dev/null +else + echo "[WARN] bind container id not found" >&2 +fi + +log "All services are ready" diff --git a/src/sys/debug/scripts/04_verify_dns_routing.sh b/src/sys/debug/scripts/04_verify_dns_routing.sh new file mode 100755 index 0000000..4244e8d --- /dev/null +++ b/src/sys/debug/scripts/04_verify_dns_routing.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash +set -euo pipefail + +# shellcheck source=common.sh +source "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/common.sh" + +ensure_env_file +ensure_paths_defined + +service_id() { + compose ps -q "$1" +} + +log "Verifying DNS routing via bind" + +MASTER_FILE="$SYS_DEBUG_PRIVATE_CORE/argus/etc/master.argus.com" +if [[ ! -f "$MASTER_FILE" ]]; then + echo "[ERR] master.argus.com file missing at $MASTER_FILE" >&2 + exit 1 +fi +MASTER_IP_HOST="$(tr -d '\r\n' < "$MASTER_FILE" || true)" +log "master.argus.com file content: $MASTER_IP_HOST" + +BIN_ID="$(service_id bind)" +if [[ -n "$BIN_ID" ]]; then + DIG_IP="$(docker exec "$BIN_ID" dig +short master.argus.com A | tail -n1 || true)" + log "dig(master.argus.com) from bind container -> $DIG_IP" + if [[ -z "$DIG_IP" ]]; then + echo "[ERR] bind did not resolve master.argus.com" >&2 + exit 1 + fi +else + echo "[WARN] bind container not found; skip dig" >&2 +fi + +for node in node-a node-b; do + CID="$(service_id "$node")" + if [[ -z "$CID" ]]; then + echo "[ERR] Container for $node not found" >&2 + exit 1 + fi + log "Checking resolution inside $node" + if ! docker exec "$CID" getent hosts master.argus.com >/dev/null 2>&1; then + echo "[ERR] $node cannot resolve master.argus.com" >&2 + exit 1 + fi + RES="$(docker exec "$CID" getent hosts master.argus.com | awk '{print $1}' | head -n1)" + log "$node resolved master.argus.com -> $RES" +done + +log "DNS routing verified" diff --git a/src/sys/debug/scripts/05_agent_register.sh b/src/sys/debug/scripts/05_agent_register.sh new file mode 100755 index 0000000..ec41857 --- /dev/null +++ b/src/sys/debug/scripts/05_agent_register.sh @@ -0,0 +1,84 @@ +#!/usr/bin/env bash +set -euo pipefail + +# shellcheck source=common.sh +source "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/common.sh" + +ensure_env_file +ensure_paths_defined + +TMP_DIR_LOCAL="$TMP_DIR" +mkdir -p "$TMP_DIR_LOCAL" + +API_BASE="http://localhost:32300/api/v1/master" + +log "Waiting for agent nodes to register" + +extract_node() { + local name="$1"; local output="$2"; local json_file="$3" + python3 - "$name" "$output" "$json_file" <<'PY' +import json, sys, pathlib +name = sys.argv[1] +out = pathlib.Path(sys.argv[2]) +json_file = sys.argv[3] +with open(json_file, 'r') as fh: + data = json.load(fh) +node = next((n for n in data if n.get("name") == name), None) +if node: + out.write_text(node["id"]) + print(node["id"]) +PY +} + +ID_A=""; ID_B="" +for _ in {1..60}; do + sleep 2 + resp=$(curl -fsS "$API_BASE/nodes" 2>/dev/null || true) + [[ -z "$resp" ]] && continue + if ! echo "$resp" | head -c1 | grep -q '\['; then + continue + fi + echo "$resp" > "$TMP_DIR_LOCAL/nodes_list.json" + ID_A=$(extract_node "$HOST_A" "$TMP_DIR_LOCAL/node_id_a" "$TMP_DIR_LOCAL/nodes_list.json" 2>/dev/null || true) + ID_B=$(extract_node "$HOST_B" "$TMP_DIR_LOCAL/node_id_b" "$TMP_DIR_LOCAL/nodes_list.json" 2>/dev/null || true) + if [[ -s "$TMP_DIR_LOCAL/node_id_a" && -s "$TMP_DIR_LOCAL/node_id_b" ]]; then + break + fi +done + +if [[ ! -s "$TMP_DIR_LOCAL/node_id_a" || ! -s "$TMP_DIR_LOCAL/node_id_b" ]]; then + echo "[ERR] Agents did not register in time" >&2 + exit 1 +fi + +node_detail() { + local id="$1"; local out="$2" + curl -fsS "$API_BASE/nodes/$id" -o "$out" +} + +node_detail "$(cat "$TMP_DIR_LOCAL/node_id_a")" "$TMP_DIR_LOCAL/detail_a.json" +node_detail "$(cat "$TMP_DIR_LOCAL/node_id_b")" "$TMP_DIR_LOCAL/detail_b.json" + +python3 - "$TMP_DIR_LOCAL/detail_a.json" "$TMP_DIR_LOCAL/initial_ip_a" <<'PY' +import json, sys, pathlib +node=json.load(open(sys.argv[1])) +ip=node.get("meta_data",{}).get("ip") +assert ip, "missing ip" +pathlib.Path(sys.argv[2]).write_text(ip) +PY + +python3 - "$TMP_DIR_LOCAL/detail_b.json" "$TMP_DIR_LOCAL/initial_ip_b" <<'PY' +import json, sys, pathlib +node=json.load(open(sys.argv[1])) +ip=node.get("meta_data",{}).get("ip") +assert ip, "missing ip" +pathlib.Path(sys.argv[2]).write_text(ip) +PY + +NODE_JSON_A="$SYS_DEBUG_PRIVATE_NODEA/argus/agent/$HOST_A/node.json" +NODE_JSON_B="$SYS_DEBUG_PRIVATE_NODEB/argus/agent/$HOST_B/node.json" + +[[ -f "$NODE_JSON_A" ]] || { echo "[ERR] node.json missing for $HOST_A" >&2; exit 1; } +[[ -f "$NODE_JSON_B" ]] || { echo "[ERR] node.json missing for $HOST_B" >&2; exit 1; } + +log "Agents registered: $(cat "$TMP_DIR_LOCAL/node_id_a") , $(cat "$TMP_DIR_LOCAL/node_id_b")" diff --git a/src/sys/debug/scripts/06_write_health_and_assert.sh b/src/sys/debug/scripts/06_write_health_and_assert.sh new file mode 100755 index 0000000..1cf85ca --- /dev/null +++ b/src/sys/debug/scripts/06_write_health_and_assert.sh @@ -0,0 +1,78 @@ +#!/usr/bin/env bash +set -euo pipefail + +# shellcheck source=common.sh +source "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/common.sh" + +ensure_env_file +ensure_paths_defined + +API_BASE="http://localhost:32300/api/v1/master" + +HEALTH_A="$SYS_DEBUG_PRIVATE_NODEA/argus/agent/$HOST_A/health" +HEALTH_B="$SYS_DEBUG_PRIVATE_NODEB/argus/agent/$HOST_B/health" + +write_health() { + local dir="$1"; mkdir -p "$dir" + cat > "$dir/log-fluentbit.json" < "$dir/metric-node-exporter.json" <&2; exit 1; } + +ID_A_VAL="$(cat "$ID_A")" +ID_B_VAL="$(cat "$ID_B")" + +check_health() { + local id="$1"; local tries=40 + for _ in $(seq 1 $tries); do + sleep 2 + resp=$(curl -fsS "$API_BASE/nodes/$id" 2>/dev/null || true) + [[ -z "$resp" ]] && continue + echo "$resp" > "$TMP_DIR/node_${id}_detail.json" + if python3 - "$TMP_DIR/node_${id}_detail.json" <<'PY' +import json,sys +node=json.load(open(sys.argv[1])) +h=node.get("health",{}) +if "log-fluentbit" in h and "metric-node-exporter" in h: + sys.exit(0) +sys.exit(1) +PY + then + return 0 + fi + done + return 1 +} + +check_health "$ID_A_VAL" || { echo "[ERR] health keys not reported for node A" >&2; exit 1; } +check_health "$ID_B_VAL" || { echo "[ERR] health keys not reported for node B" >&2; exit 1; } + +NODES_JSON="$SYS_DEBUG_PRIVATE_CORE/argus/metric/prometheus/nodes.json" +if [[ ! -f "$NODES_JSON" ]]; then + echo "[ERR] nodes.json missing at $NODES_JSON" >&2 + exit 1 +fi + +python3 - "$NODES_JSON" <<'PY' +import json,sys +with open(sys.argv[1]) as h: + nodes=json.load(h) +if not isinstance(nodes, list): + raise SystemExit("nodes.json expected list") +if len(nodes) != 2: + raise SystemExit(f"expected 2 nodes online, got {len(nodes)}") +PY + +log "Health reported and nodes.json has 2 online nodes" diff --git a/src/sys/debug/scripts/07_logs_send_and_assert.sh b/src/sys/debug/scripts/07_logs_send_and_assert.sh new file mode 100755 index 0000000..775a886 --- /dev/null +++ b/src/sys/debug/scripts/07_logs_send_and_assert.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash +set -euo pipefail + +# shellcheck source=common.sh +source "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/common.sh" + +ensure_env_file +ensure_paths_defined + +log "Sending logs and asserting ES counts" + +get_count() { + local idx="$1" + curl -s "http://localhost:9200/${idx}/_count?ignore_unavailable=true&allow_no_indices=true" | sed -E 's/.*"count":([0-9]+).*/\1/' | awk 'NF{print $0;exit} END{if(NR==0)print 0}' +} + +train0=$(get_count "train-*") +infer0=$(get_count "infer-*") +base=$((train0 + infer0)) +log "initial counts: train=${train0} infer=${infer0} total=${base}" + +service_id() { + compose ps -q "$1" +} + +send_logs() { + local sid="$1"; local hosttag="$2" + docker exec "$sid" sh -lc 'mkdir -p /logs/train /logs/infer' + docker exec "$sid" sh -lc "ts=\ +\$(date '+%F %T'); echo \"\$ts INFO [$hosttag] training step=1 loss=1.23 model=bert\" >> /logs/train/train-demo.log" + docker exec "$sid" sh -lc "ts=\ +\$(date '+%F %T'); echo \"\$ts INFO [$hosttag] training step=2 loss=1.10 model=bert\" >> /logs/train/train-demo.log" + docker exec "$sid" sh -lc "ts=\ +\$(date '+%F %T'); echo \"\$ts WARN [$hosttag] inference slow on batch=2 latency=1.9s\" >> /logs/infer/infer-demo.log" +} + +CID_A="$(service_id node-a)" +CID_B="$(service_id node-b)" + +[[ -n "$CID_A" && -n "$CID_B" ]] || { echo "[ERR] node containers not found" >&2; exit 1; } + +send_logs "$CID_A" "host01" +send_logs "$CID_B" "host02" + +log "Waiting for ES to ingest" +sleep 10 + +train1=$(get_count "train-*") +infer1=$(get_count "infer-*") +final=$((train1 + infer1)) +log "final counts: train=${train1} infer=${infer1} total=${final}" + +if (( final <= base )); then + echo "[ERR] ES total did not increase (${base} -> ${final})" >&2 + exit 1 +fi + +if (( final < 4 )); then + echo "[ERR] ES total below expected threshold: ${final} < 4" >&2 + exit 1 +fi + +es_health=$(curl -s "http://localhost:9200/_cluster/health" | grep -o '"status":"[^"]*"' | cut -d'"' -f4) +if [[ "$es_health" != "green" && "$es_health" != "yellow" ]]; then + echo "[ERR] ES health not green/yellow: $es_health" >&2 + exit 1 +fi + +if ! curl -fs "http://localhost:5601/api/status" >/dev/null 2>&1; then + echo "[WARN] Kibana status endpoint not available" +fi + +log "ES counts increased and services healthy" diff --git a/src/sys/debug/scripts/08_restart_agent_reregister.sh b/src/sys/debug/scripts/08_restart_agent_reregister.sh new file mode 100755 index 0000000..30b1298 --- /dev/null +++ b/src/sys/debug/scripts/08_restart_agent_reregister.sh @@ -0,0 +1,110 @@ +#!/usr/bin/env bash +set -euo pipefail + +# shellcheck source=common.sh +source "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/common.sh" + +ensure_env_file +ensure_paths_defined + +API_BASE="http://localhost:32300/api/v1/master" +NODE_ENTRYPOINT="$DEBUG_ROOT/../tests/scripts/node_entrypoint.sh" +[[ -f "$NODE_ENTRYPOINT" ]] || { echo "[ERR] node entrypoint script missing at $NODE_ENTRYPOINT" >&2; exit 1; } + +TARGET_FIXED_IP="${SYS_DEBUG_NODEB_FIXED_IP:-172.30.0.200}" + +ID_B_FILE="$TMP_DIR/node_id_b" +IP_INIT_FILE="$TMP_DIR/initial_ip_b" +[[ -f "$ID_B_FILE" && -f "$IP_INIT_FILE" ]] || { echo "[ERR] Required node id/ip files missing in $TMP_DIR" >&2; exit 1; } + +ID_B="$(cat "$ID_B_FILE")" +IP0_B="$(cat "$IP_INIT_FILE")" + +DETAIL_BEFORE="$TMP_DIR/node_b_before.json" +curl -fsS "$API_BASE/nodes/$ID_B" -o "$DETAIL_BEFORE" +LAST0=$(python3 - "$DETAIL_BEFORE" <<'PY' +import json,sys +node=json.load(open(sys.argv[1])) +print(node.get("last_updated","")) +PY +) +IP_BEFORE=$(python3 - "$DETAIL_BEFORE" <<'PY' +import json,sys +node=json.load(open(sys.argv[1])) +print(node.get("meta_data",{}).get("ip","")) +PY +) + +if [[ "$IP_BEFORE" != "$IP0_B" ]]; then + echo "[ERR] Expected initial IP $IP0_B for node-b, got $IP_BEFORE" >&2 + exit 1 +fi + +if [[ "$IP_BEFORE" == "$TARGET_FIXED_IP" ]]; then + echo "[ERR] node-b current IP $IP_BEFORE already matches target $TARGET_FIXED_IP. Configure SYS_DEBUG_NODEB_FIXED_IP to a different address before rerun." >&2 + exit 1 +fi + +service_id() { + compose ps -q "$1" +} + +log "Recreating node-b (old IP $IP_BEFORE) with static IP $TARGET_FIXED_IP" +compose rm -sf node-b >/dev/null 2>&1 || true + +CONTAINER_NAME="${SYS_DEBUG_CONTAINER_PREFIX:-argus-debug}-node-b" +docker rm -f "$CONTAINER_NAME" >/dev/null 2>&1 || true + +AGENT_BIN_PATH="$(cat "$TMP_DIR/agent_binary_path")" +[[ -f "$AGENT_BIN_PATH" ]] || { echo "[ERR] Agent binary path missing in $TMP_DIR" >&2; exit 1; } + +require_docker + +docker run -d \ + --name "$CONTAINER_NAME" \ + --hostname "$HOST_B" \ + --network "$SYS_DEBUG_NETWORK_NAME" \ + --ip "$TARGET_FIXED_IP" \ + --dns "${SYS_DEBUG_BIND_IP:-172.30.0.2}" \ + -e MASTER_ENDPOINT=http://master.argus.com:3000 \ + -e REPORT_INTERVAL_SECONDS=2 \ + -e ARGUS_BUILD_UID=$ARGUS_BUILD_UID \ + -e ARGUS_BUILD_GID=$ARGUS_BUILD_GID \ + -e ES_HOST=es \ + -e ES_PORT=9200 \ + -e CLUSTER=local \ + -e RACK=dev \ + -p 2021:2020 \ + -v "$SYS_DEBUG_PRIVATE_NODEB/argus/agent/$HOST_B:/private/argus/agent/$HOST_B" \ + -v "$AGENT_BIN_PATH:/usr/local/bin/argus-agent:ro" \ + -v "$NODE_ENTRYPOINT:/usr/local/bin/node-entrypoint.sh:ro" \ + -v "$REPO_ROOT/src/log/fluent-bit/build/start-fluent-bit.sh:/assets/start-fluent-bit.sh:ro" \ + -v "$REPO_ROOT/src/log/fluent-bit/build/etc:/assets/fluent-bit/etc:ro" \ + -v "$REPO_ROOT/src/log/fluent-bit/build/packages:/assets/fluent-bit/packages:ro" \ + --entrypoint /usr/local/bin/node-entrypoint.sh \ + ubuntu:22.04 >/dev/null + +log "Waiting for node-b to re-register with new IP" +for _ in {1..40}; do + sleep 3 + if curl -fsS "$API_BASE/nodes/$ID_B" -o "$TMP_DIR/node_b_after.json"; then + if python3 - "$TMP_DIR/node_b_after.json" "$LAST0" "$TARGET_FIXED_IP" <<'PY' +import json,sys +node=json.load(open(sys.argv[1])) +last0=sys.argv[2] +expected_ip=sys.argv[3] +ip=node.get("meta_data",{}).get("ip") +lu=node.get("last_updated") +if ip == expected_ip and lu and lu != last0: + sys.exit(0) +sys.exit(1) +PY + then + log "node-b IP updated: $IP_BEFORE -> $TARGET_FIXED_IP" + exit 0 + fi + fi +done + +echo "[ERR] node-b did not update to IP $TARGET_FIXED_IP in time" >&2 +exit 1 diff --git a/src/sys/debug/scripts/09_down.sh b/src/sys/debug/scripts/09_down.sh new file mode 100755 index 0000000..87ef0bf --- /dev/null +++ b/src/sys/debug/scripts/09_down.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +set -euo pipefail + +# shellcheck source=common.sh +source "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/common.sh" + +ensure_env_file +require_docker + +log "Stopping debug stack (project $SYS_DEBUG_PROJECT_NAME)" +compose down --remove-orphans >/dev/null 2>&1 || true + +log "Containers stopped. No host directories were removed." diff --git a/src/sys/debug/scripts/clean-data.sh b/src/sys/debug/scripts/clean-data.sh new file mode 100755 index 0000000..79267aa --- /dev/null +++ b/src/sys/debug/scripts/clean-data.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +set -euo pipefail + +# shellcheck source=common.sh +source "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/common.sh" + +ensure_env_file +ensure_paths_defined + +FORCE=false +while [[ $# -gt 0 ]]; do + case "$1" in + -y|--yes) + FORCE=true + ;; + -h|--help) + cat <&2 + exit 1 + ;; + esac + shift +done + +if [[ $FORCE == false ]]; then + read -r -p "This will delete debug private directories. Continue? [y/N] " reply + case "$reply" in + y|Y|yes|YES) + ;; + *) + echo "Aborted" + exit 0 + ;; + esac +fi + +paths=( + "$SYS_DEBUG_PRIVATE_CORE" + "$SYS_DEBUG_PRIVATE_NODEA" + "$SYS_DEBUG_PRIVATE_NODEB" + "$SYS_DEBUG_TMP_DIR" +) + +require_docker + +image="ubuntu:22.04" + +for dir in "${paths[@]}"; do + [[ -d "$dir" ]] || continue + log "Fixing ownership for $dir" + if ! docker run --rm -v "$dir:/target" "$image" chown -R "$(id -u):$(id -g)" /target >/dev/null 2>&1; then + echo "[WARN] Failed to adjust ownership via $image, attempting local chown" >&2 + chown -R "$(id -u):$(id -g)" "$dir" >/dev/null 2>&1 || true + fi + log "Removing $dir" + rm -rf "$dir" +done + +log "Clean data completed" diff --git a/src/sys/debug/scripts/common.sh b/src/sys/debug/scripts/common.sh new file mode 100755 index 0000000..1510e65 --- /dev/null +++ b/src/sys/debug/scripts/common.sh @@ -0,0 +1,96 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +DEBUG_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +REPO_ROOT="$(cd "$DEBUG_ROOT/../../.." && pwd)" +ENV_FILE="$DEBUG_ROOT/.env" + +source "$REPO_ROOT/scripts/common/build_user.sh" +load_build_user + +if [[ -f "$ENV_FILE" ]]; then + set -a + # shellcheck disable=SC1090 + source "$ENV_FILE" + set +a +fi + +SYS_DEBUG_NETWORK_NAME=${SYS_DEBUG_NETWORK_NAME:-argus-debug-net} +SYS_DEBUG_NETWORK_SUBNET=${SYS_DEBUG_NETWORK_SUBNET:-172.30.0.0/16} +SYS_DEBUG_NETWORK_GATEWAY=${SYS_DEBUG_NETWORK_GATEWAY:-172.30.0.1} +SYS_DEBUG_PROJECT_NAME=${SYS_DEBUG_PROJECT_NAME:-argus-debug} +SYS_DEBUG_CONTAINER_PREFIX=${SYS_DEBUG_CONTAINER_PREFIX:-argus-debug} +SYS_DEBUG_PRIVATE_CORE=${SYS_DEBUG_PRIVATE_CORE:-$DEBUG_ROOT/private} +SYS_DEBUG_PRIVATE_NODEA=${SYS_DEBUG_PRIVATE_NODEA:-$DEBUG_ROOT/private-nodea} +SYS_DEBUG_PRIVATE_NODEB=${SYS_DEBUG_PRIVATE_NODEB:-$DEBUG_ROOT/private-nodeb} +SYS_DEBUG_TMP_DIR=${SYS_DEBUG_TMP_DIR:-$DEBUG_ROOT/tmp} +ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133} +ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015} + +SYS_DEBUG_NODEA_HOST=${SYS_DEBUG_NODEA_HOST:-dev-yyrshare-nbnyx10-cp2f-pod-0} +SYS_DEBUG_NODEB_HOST=${SYS_DEBUG_NODEB_HOST:-dev-yyrshare-uuuu10-ep2f-pod-0} + +HOST_A="$SYS_DEBUG_NODEA_HOST" +HOST_B="$SYS_DEBUG_NODEB_HOST" + +COMPOSE_FILE="$DEBUG_ROOT/docker-compose.yml" + +abs_path() { + python3 - "$1" <<'PY' +import os, sys +path = sys.argv[1] +print(os.path.abspath(path)) +PY +} + +ensure_command() { + local cmd="$1" + if ! command -v "$cmd" >/dev/null 2>&1; then + echo "[ERR] Required command '$cmd' not found" >&2 + exit 1 + fi +} + +require_docker() { + ensure_command docker +} + +compose() { + require_docker + local bin + if docker compose version >/dev/null 2>&1; then + bin=(docker compose) + else + bin=(docker-compose) + fi + "${bin[@]}" -p "$SYS_DEBUG_PROJECT_NAME" -f "$COMPOSE_FILE" "$@" +} + +ensure_paths_defined() { + local missing=() + for name in SYS_DEBUG_PRIVATE_CORE SYS_DEBUG_PRIVATE_NODEA SYS_DEBUG_PRIVATE_NODEB SYS_DEBUG_TMP_DIR; do + if [[ -z "${!name:-}" ]]; then + missing+=("$name") + fi + done + if (( ${#missing[@]} > 0 )); then + echo "[ERR] Missing required environment variables: ${missing[*]}" >&2 + echo " Run 01_bootstrap.sh first." >&2 + exit 1 + fi +} + +ensure_env_file() { + if [[ ! -f "$ENV_FILE" ]]; then + echo "[ERR] Missing .env at $ENV_FILE. Run 01_bootstrap.sh first." >&2 + exit 1 + fi +} + +log() { + echo "[INFO] $*" +} + +TMP_DIR="$SYS_DEBUG_TMP_DIR" +mkdir -p "$TMP_DIR" diff --git a/src/sys/debug/scripts/network-create.sh b/src/sys/debug/scripts/network-create.sh new file mode 100755 index 0000000..25eb3b4 --- /dev/null +++ b/src/sys/debug/scripts/network-create.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash +set -euo pipefail + +# shellcheck source=common.sh +source "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/common.sh" + +NAME="$SYS_DEBUG_NETWORK_NAME" +SUBNET="$SYS_DEBUG_NETWORK_SUBNET" +GATEWAY="$SYS_DEBUG_NETWORK_GATEWAY" + +usage() { + cat <&2; exit 1; } + NAME="$1" + ;; + --name=*) + NAME="${1#*=}" + ;; + --subnet) + shift; [[ $# -gt 0 ]] || { echo "--subnet requires value" >&2; exit 1; } + SUBNET="$1" + ;; + --subnet=*) + SUBNET="${1#*=}" + ;; + --gateway) + shift; [[ $# -gt 0 ]] || { echo "--gateway requires value" >&2; exit 1; } + GATEWAY="$1" + ;; + --gateway=*) + GATEWAY="${1#*=}" + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "Unknown argument: $1" >&2 + usage >&2 + exit 1 + ;; + esac + shift +done + +require_docker + +if docker network inspect "$NAME" >/dev/null 2>&1; then + log "Network $NAME already exists" + exit 0 +fi + +log "Creating network $NAME (subnet=$SUBNET gateway=$GATEWAY)" +docker network create \ + --driver bridge \ + --subnet "$SUBNET" \ + --gateway "$GATEWAY" \ + "$NAME" + +mkdir -p "$TMP_DIR" +echo "$NAME" > "$TMP_DIR/network.created" +log "Network $NAME created" diff --git a/src/sys/debug/scripts/network-destroy.sh b/src/sys/debug/scripts/network-destroy.sh new file mode 100755 index 0000000..ade15f5 --- /dev/null +++ b/src/sys/debug/scripts/network-destroy.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash +set -euo pipefail + +# shellcheck source=common.sh +source "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/common.sh" + +NAME="$SYS_DEBUG_NETWORK_NAME" + +usage() { + cat <&2; exit 1; } + NAME="$1" + ;; + --name=*) + NAME="${1#*=}" + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "Unknown argument: $1" >&2 + usage >&2 + exit 1 + ;; + esac + shift +done + +require_docker + +if ! docker network inspect "$NAME" >/dev/null 2>&1; then + log "Network $NAME not found; nothing to do" + exit 0 +fi + +attached=$(docker network inspect -f '{{range $id, $conf := .Containers}}{{printf "%s " $conf.Name}}{{end}}' "$NAME") +if [[ -n "${attached// }" ]]; then + echo "[ERR] Cannot remove network $NAME: still connected containers -> $attached" >&2 + exit 1 +fi + +log "Deleting network $NAME" +docker network rm "$NAME" >/dev/null +rm -f "$TMP_DIR/network.created" +log "Network $NAME removed"