Compare commits

..

2 Commits

19 changed files with 1275 additions and 1 deletions

View File

@ -10,6 +10,7 @@ Usage: $0 [OPTIONS]
Options:
--intranet Use intranet mirror for log/bind builds
--master-offline Build master offline image (requires src/master/offline_wheels.tar.gz)
--no-cache Build all images without using Docker layer cache
-h, --help Show this help message
Examples:
@ -23,6 +24,7 @@ EOF
use_intranet=false
build_master=true
build_master_offline=false
no_cache=false
while [[ $# -gt 0 ]]; do
case $1 in
@ -39,6 +41,10 @@ while [[ $# -gt 0 ]]; do
build_master_offline=true
shift
;;
--no-cache)
no_cache=true
shift
;;
-h|--help)
show_help
exit 0
@ -65,6 +71,10 @@ cd "$root"
load_build_user
build_args+=("--build-arg" "ARGUS_BUILD_UID=${ARGUS_BUILD_UID}" "--build-arg" "ARGUS_BUILD_GID=${ARGUS_BUILD_GID}")
if [[ "$no_cache" == true ]]; then
build_args+=("--no-cache")
fi
master_root="$root/src/master"
master_offline_tar="$master_root/offline_wheels.tar.gz"
master_offline_dir="$master_root/offline_wheels"
@ -159,6 +169,9 @@ if [[ "$build_master" == true ]]; then
if [[ "$build_master_offline" == true ]]; then
master_args+=("--offline")
fi
if [[ "$no_cache" == true ]]; then
master_args+=("--no-cache")
fi
if ./scripts/build_images.sh "${master_args[@]}"; then
if [[ "$build_master_offline" == true ]]; then
images_built+=("argus-master:offline")

View File

@ -3,12 +3,13 @@ set -euo pipefail
usage() {
cat >&2 <<'USAGE'
Usage: $0 [--intranet] [--offline] [--tag <image_tag>]
Usage: $0 [--intranet] [--offline] [--tag <image_tag>] [--no-cache]
Options:
--intranet 使用指定的 PyPI 镜像源(默认清华镜像)。
--offline 完全离线构建,依赖 offline_wheels/ 目录中的离线依赖包。
--tag <image_tag> 自定义镜像标签,默认 argus-master:latest。
--no-cache 不使用 Docker 构建缓存。
USAGE
}
@ -19,6 +20,7 @@ IMAGE_TAG="${IMAGE_TAG:-argus-master:latest}"
DOCKERFILE="src/master/Dockerfile"
BUILD_ARGS=()
OFFLINE_MODE=0
NO_CACHE=0
source "$PROJECT_ROOT/scripts/common/build_user.sh"
load_build_user
@ -45,6 +47,11 @@ while [[ "$#" -gt 0 ]]; do
IMAGE_TAG="$2"
shift 2
;;
--no-cache)
NO_CACHE=1
BUILD_ARGS+=("--no-cache")
shift
;;
-h|--help)
usage
exit 0

View File

@ -0,0 +1,12 @@
# Generated by 01_bootstrap.sh
SYS_DEBUG_PRIVATE_CORE=/absolute/path/to/private
SYS_DEBUG_PRIVATE_NODEA=/absolute/path/to/private-nodea
SYS_DEBUG_PRIVATE_NODEB=/absolute/path/to/private-nodeb
SYS_DEBUG_TMP_DIR=/absolute/path/to/tmp
SYS_DEBUG_NETWORK_NAME=argus-debug-net
SYS_DEBUG_NETWORK_SUBNET=172.30.0.0/16
SYS_DEBUG_NETWORK_GATEWAY=172.30.0.1
SYS_DEBUG_PROJECT_NAME=argus-debug
SYS_DEBUG_CONTAINER_PREFIX=argus-debug
ARGUS_BUILD_UID=2133
ARGUS_BUILD_GID=2015

67
src/sys/debug/README.md Normal file
View File

@ -0,0 +1,67 @@
# ARGUS 系统调试部署模式
该目录提供基于系统级 E2E 测试构建的调试部署流程,便于本地快速复现与排查问题。核心特性:
- 独立 docker 网络 `argus-debug-net`(默认子网 `172.30.0.0/16`),避免与 `src/sys/tests` 冲突。
- 私有数据目录可通过参数自定义,例如 `--private-root /tmp/argus-debug`
- 默认保留调试过程生成的文件,避免 `down`/`bootstrap` 自动删除。
## 快速开始
```bash
cd src/sys/debug
# 仅首次需要,创建 external 网络
./scripts/network-create.sh
# 初始化目录/构建 agent/写入 .env
./scripts/01_bootstrap.sh --private-root /tmp/argus-debug
# 启动调试栈
./scripts/02_up.sh
# 根据需要执行验证脚本0308
./scripts/03_wait_ready.sh
...
# 调试结束停止服务
./scripts/09_down.sh
# 若需移除网络或数据
./scripts/network-destroy.sh
./scripts/clean-data.sh
```
> **提示**:调试与测试栈不能同时运行,应保持 `src/sys/tests` 中的 `argus-sys` 栈已停止。
## 参数与环境变量
- `--private-root <path>`:同时指定核心服务与两个节点的私有目录根,脚本自动派生 `private``private-nodea``private-nodeb`
- `--private-core <path>``--private-nodea <path>``--private-nodeb <path>`:分别覆盖单独目录。
- 环境变量可覆盖 `.env` 中写入的值,例如 `export SYS_DEBUG_NETWORK_NAME=my-debug-net`
- `.env` 文件字段:
- `SYS_DEBUG_PRIVATE_CORE`
- `SYS_DEBUG_PRIVATE_NODEA`
- `SYS_DEBUG_PRIVATE_NODEB`
- `SYS_DEBUG_TMP_DIR`
- `SYS_DEBUG_NETWORK_NAME`
- `SYS_DEBUG_NETWORK_SUBNET`
- `SYS_DEBUG_NETWORK_GATEWAY`
- `SYS_DEBUG_PROJECT_NAME`
- `SYS_DEBUG_CONTAINER_PREFIX`
- `ARGUS_BUILD_UID` / `ARGUS_BUILD_GID`
## 脚本说明
- `scripts/common.sh`:通用函数与环境加载。
- `scripts/network-create.sh` / `network-destroy.sh`:管理 external 网络。
- `scripts/00_debug_all.sh`:顺序执行 0108默认不执行 09
- `scripts/clean-data.sh`:选择性清理宿主机私有数据。
- `scripts/08_restart_agent_reregister.sh`:将 node-b 切换到 `SYS_DEBUG_NODEB_FIXED_IP`(默认 `172.30.0.200`),如果目标地址与当前 IP 相同脚本会报错提醒重新选择地址。
- 其它 `0109` 与测试目录对应,但针对参数化路径及网络做了调整。
## 注意事项
- 若宿主机未安装 Docker脚本将提示错误并退出。
- 当指定的私有目录已存在数据时,脚本不会清理,请确认内容安全后再复用。
- 与测试环境共用镜像:请提前执行仓库根目录的 `./build/build_images.sh`

View File

@ -0,0 +1,147 @@
version: "3.8"
networks:
argus-debug-net:
external: true
name: ${SYS_DEBUG_NETWORK_NAME:-argus-debug-net}
services:
bind:
image: ${BIND_IMAGE_TAG:-argus-bind9:latest}
container_name: ${SYS_DEBUG_CONTAINER_PREFIX:-argus-debug}-bind
networks:
argus-debug-net:
ipv4_address: ${SYS_DEBUG_BIND_IP:-172.30.0.2}
volumes:
- ${SYS_DEBUG_PRIVATE_CORE}:/private
restart: unless-stopped
master:
image: ${MASTER_IMAGE_TAG:-argus-master:latest}
container_name: ${SYS_DEBUG_CONTAINER_PREFIX:-argus-debug}-master
depends_on:
- bind
environment:
- OFFLINE_THRESHOLD_SECONDS=6
- ONLINE_THRESHOLD_SECONDS=2
- SCHEDULER_INTERVAL_SECONDS=1
- ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}
- ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}
ports:
- "32300:3000"
volumes:
- ${SYS_DEBUG_PRIVATE_CORE}/argus/master:/private/argus/master
- ${SYS_DEBUG_PRIVATE_CORE}/argus/metric/prometheus:/private/argus/metric/prometheus
- ${SYS_DEBUG_PRIVATE_CORE}/argus/etc:/private/argus/etc
networks:
argus-debug-net:
ipv4_address: ${SYS_DEBUG_MASTER_IP:-172.30.0.10}
restart: unless-stopped
es:
image: ${ES_IMAGE_TAG:-argus-elasticsearch:latest}
container_name: ${SYS_DEBUG_CONTAINER_PREFIX:-argus-debug}-es
environment:
- discovery.type=single-node
- xpack.security.enabled=false
- ES_JAVA_OPTS=-Xms512m -Xmx512m
- ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}
- ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}
volumes:
- ${SYS_DEBUG_PRIVATE_CORE}/argus/log/elasticsearch:/private/argus/log/elasticsearch
- ${SYS_DEBUG_PRIVATE_CORE}/argus/etc:/private/argus/etc
ports:
- "9200:9200"
networks:
argus-debug-net:
ipv4_address: ${SYS_DEBUG_ES_IP:-172.30.0.20}
restart: unless-stopped
kibana:
image: ${KIBANA_IMAGE_TAG:-argus-kibana:latest}
container_name: ${SYS_DEBUG_CONTAINER_PREFIX:-argus-debug}-kibana
environment:
- ELASTICSEARCH_HOSTS=http://es.log.argus.com:9200
- ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}
- ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}
volumes:
- ${SYS_DEBUG_PRIVATE_CORE}/argus/log/kibana:/private/argus/log/kibana
- ${SYS_DEBUG_PRIVATE_CORE}/argus/etc:/private/argus/etc
depends_on:
- es
ports:
- "5601:5601"
networks:
argus-debug-net:
ipv4_address: ${SYS_DEBUG_KIBANA_IP:-172.30.0.30}
restart: unless-stopped
node-a:
image: ubuntu:22.04
container_name: ${SYS_DEBUG_CONTAINER_PREFIX:-argus-debug}-node-a
hostname: ${SYS_DEBUG_NODEA_HOST:-dev-yyrshare-nbnyx10-cp2f-pod-0}
depends_on:
- master
- bind
- es
environment:
- MASTER_ENDPOINT=http://master.argus.com:3000
- REPORT_INTERVAL_SECONDS=2
- ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}
- ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}
- ES_HOST=es
- ES_PORT=9200
- CLUSTER=local
- RACK=dev
volumes:
- ${SYS_DEBUG_PRIVATE_NODEA}/argus/agent/${SYS_DEBUG_NODEA_HOST:-dev-yyrshare-nbnyx10-cp2f-pod-0}:/private/argus/agent/${SYS_DEBUG_NODEA_HOST:-dev-yyrshare-nbnyx10-cp2f-pod-0}
- ../../agent/dist/argus-agent:/usr/local/bin/argus-agent:ro
- ../tests/scripts/node_entrypoint.sh:/usr/local/bin/node-entrypoint.sh:ro
- ../../log/fluent-bit/build/start-fluent-bit.sh:/assets/start-fluent-bit.sh:ro
- ../../log/fluent-bit/build/etc:/assets/fluent-bit/etc:ro
- ../../log/fluent-bit/build/packages:/assets/fluent-bit/packages:ro
entrypoint:
- /usr/local/bin/node-entrypoint.sh
dns:
- ${SYS_DEBUG_BIND_IP:-172.30.0.2}
ports:
- "2020:2020"
networks:
argus-debug-net:
ipv4_address: ${SYS_DEBUG_NODEA_IP:-172.30.0.101}
restart: unless-stopped
node-b:
image: ubuntu:22.04
container_name: ${SYS_DEBUG_CONTAINER_PREFIX:-argus-debug}-node-b
hostname: ${SYS_DEBUG_NODEB_HOST:-dev-yyrshare-uuuu10-ep2f-pod-0}
depends_on:
- master
- bind
- es
environment:
- MASTER_ENDPOINT=http://master.argus.com:3000
- REPORT_INTERVAL_SECONDS=2
- ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}
- ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}
- ES_HOST=es
- ES_PORT=9200
- CLUSTER=local
- RACK=dev
volumes:
- ${SYS_DEBUG_PRIVATE_NODEB}/argus/agent/${SYS_DEBUG_NODEB_HOST:-dev-yyrshare-uuuu10-ep2f-pod-0}:/private/argus/agent/${SYS_DEBUG_NODEB_HOST:-dev-yyrshare-uuuu10-ep2f-pod-0}
- ../../agent/dist/argus-agent:/usr/local/bin/argus-agent:ro
- ../tests/scripts/node_entrypoint.sh:/usr/local/bin/node-entrypoint.sh:ro
- ../../log/fluent-bit/build/start-fluent-bit.sh:/assets/start-fluent-bit.sh:ro
- ../../log/fluent-bit/build/etc:/assets/fluent-bit/etc:ro
- ../../log/fluent-bit/build/packages:/assets/fluent-bit/packages:ro
entrypoint:
- /usr/local/bin/node-entrypoint.sh
dns:
- ${SYS_DEBUG_BIND_IP:-172.30.0.2}
ports:
- "2021:2020"
networks:
argus-debug-net:
ipv4_address: ${SYS_DEBUG_NODEB_IP:-172.30.0.102}
restart: unless-stopped

View File

@ -0,0 +1,24 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
SCRIPTS=(
"01_bootstrap.sh"
"02_up.sh"
"03_wait_ready.sh"
"04_verify_dns_routing.sh"
"05_agent_register.sh"
"06_write_health_and_assert.sh"
"07_logs_send_and_assert.sh"
"08_restart_agent_reregister.sh"
)
for script in "${SCRIPTS[@]}"; do
echo "[SYS-DEBUG] Running $script"
"$SCRIPT_DIR/$script"
echo "[SYS-DEBUG] $script completed"
echo
done
echo "[SYS-DEBUG] Complete. Run scripts/09_down.sh when finished (data retained)."

View File

@ -0,0 +1,210 @@
#!/usr/bin/env bash
set -euo pipefail
# shellcheck source=common.sh
source "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/common.sh"
PRIVATE_ROOT=""
PRIVATE_CORE="$SYS_DEBUG_PRIVATE_CORE"
PRIVATE_NODEA="$SYS_DEBUG_PRIVATE_NODEA"
PRIVATE_NODEB="$SYS_DEBUG_PRIVATE_NODEB"
TMP_DIR_VAL="$SYS_DEBUG_TMP_DIR"
NETWORK_NAME="$SYS_DEBUG_NETWORK_NAME"
NETWORK_SUBNET="$SYS_DEBUG_NETWORK_SUBNET"
NETWORK_GATEWAY="$SYS_DEBUG_NETWORK_GATEWAY"
PROJECT_NAME="$SYS_DEBUG_PROJECT_NAME"
CONTAINER_PREFIX="$SYS_DEBUG_CONTAINER_PREFIX"
NODEB_FIXED_IP=${SYS_DEBUG_NODEB_FIXED_IP:-172.30.0.200}
usage() {
cat <<EOF
Usage: ${0##*/} [--private-root PATH] [--private-core PATH] \
[--private-nodea PATH] [--private-nodeb PATH] \
[--tmp-dir PATH] [--network-name NAME] \
[--network-subnet CIDR] [--network-gateway IP]
Prepare directories, build agent binary, and write .env for debug stack.
EOF
}
while [[ $# -gt 0 ]]; do
case "$1" in
--private-root)
shift; [[ $# -gt 0 ]] || { echo "--private-root requires value" >&2; exit 1; }
PRIVATE_ROOT="$1"
;;
--private-root=*)
PRIVATE_ROOT="${1#*=}"
;;
--private-core)
shift; [[ $# -gt 0 ]] || { echo "--private-core requires value" >&2; exit 1; }
PRIVATE_CORE="$1"
;;
--private-core=*)
PRIVATE_CORE="${1#*=}"
;;
--private-nodea)
shift; [[ $# -gt 0 ]] || { echo "--private-nodea requires value" >&2; exit 1; }
PRIVATE_NODEA="$1"
;;
--private-nodea=*)
PRIVATE_NODEA="${1#*=}"
;;
--private-nodeb)
shift; [[ $# -gt 0 ]] || { echo "--private-nodeb requires value" >&2; exit 1; }
PRIVATE_NODEB="$1"
;;
--private-nodeb=*)
PRIVATE_NODEB="${1#*=}"
;;
--tmp-dir)
shift; [[ $# -gt 0 ]] || { echo "--tmp-dir requires value" >&2; exit 1; }
TMP_DIR_VAL="$1"
;;
--tmp-dir=*)
TMP_DIR_VAL="${1#*=}"
;;
--network-name)
shift; [[ $# -gt 0 ]] || { echo "--network-name requires value" >&2; exit 1; }
NETWORK_NAME="$1"
;;
--network-name=*)
NETWORK_NAME="${1#*=}"
;;
--network-subnet)
shift; [[ $# -gt 0 ]] || { echo "--network-subnet requires value" >&2; exit 1; }
NETWORK_SUBNET="$1"
;;
--network-subnet=*)
NETWORK_SUBNET="${1#*=}"
;;
--network-gateway)
shift; [[ $# -gt 0 ]] || { echo "--network-gateway requires value" >&2; exit 1; }
NETWORK_GATEWAY="$1"
;;
--network-gateway=*)
NETWORK_GATEWAY="${1#*=}"
;;
-h|--help)
usage
exit 0
;;
*)
echo "Unknown argument: $1" >&2
usage >&2
exit 1
;;
esac
shift
done
if [[ -n "$PRIVATE_ROOT" ]]; then
PRIVATE_CORE="$PRIVATE_ROOT/private"
PRIVATE_NODEA="$PRIVATE_ROOT/private-nodea"
PRIVATE_NODEB="$PRIVATE_ROOT/private-nodeb"
fi
PRIVATE_CORE=$(abs_path "$PRIVATE_CORE")
PRIVATE_NODEA=$(abs_path "$PRIVATE_NODEA")
PRIVATE_NODEB=$(abs_path "$PRIVATE_NODEB")
TMP_DIR_VAL=$(abs_path "$TMP_DIR_VAL")
log "Preparing directories under $PRIVATE_CORE"
mkdir -p \
"$PRIVATE_CORE/argus/etc" \
"$PRIVATE_CORE/argus/bind" \
"$PRIVATE_CORE/argus/master" \
"$PRIVATE_CORE/argus/metric/prometheus" \
"$PRIVATE_CORE/argus/log/elasticsearch" \
"$PRIVATE_CORE/argus/log/kibana" \
"$PRIVATE_NODEA/argus/agent/$HOST_A/health" \
"$PRIVATE_NODEB/argus/agent/$HOST_B/health" \
"$TMP_DIR_VAL"
log "Aligning ownership for core directories"
chown -R "${ARGUS_BUILD_UID}:${ARGUS_BUILD_GID}" \
"$PRIVATE_CORE/argus/log/elasticsearch" \
"$PRIVATE_CORE/argus/log/kibana" \
"$PRIVATE_CORE/argus/etc" 2>/dev/null || true
log "Distributing update-dns.sh"
BIND_UPDATE_SRC="$REPO_ROOT/src/bind/build/update-dns.sh"
BIND_UPDATE_DEST="$PRIVATE_CORE/argus/etc/update-dns.sh"
if [[ -f "$BIND_UPDATE_SRC" ]]; then
cp "$BIND_UPDATE_SRC" "$BIND_UPDATE_DEST"
chmod +x "$BIND_UPDATE_DEST"
else
echo "[WARN] Missing $BIND_UPDATE_SRC" >&2
fi
require_docker
ensure_image() {
local image="$1"
if ! docker image inspect "$image" >/dev/null 2>&1; then
echo "[ERR] Missing image: $image. Run ./build/build_images.sh" >&2
exit 1
fi
}
log "Ensuring required images exist"
ensure_image "${ES_IMAGE_TAG:-argus-elasticsearch:latest}"
ensure_image "${KIBANA_IMAGE_TAG:-argus-kibana:latest}"
ensure_image "${BIND_IMAGE_TAG:-argus-bind9:latest}"
ensure_image "${MASTER_IMAGE_TAG:-argus-master:latest}"
log "Building agent binary"
pushd "$REPO_ROOT/src/agent" >/dev/null
./scripts/build_binary.sh
popd >/dev/null
AGENT_BIN="$REPO_ROOT/src/agent/dist/argus-agent"
if [[ ! -x "$AGENT_BIN" ]]; then
echo "[ERR] Agent binary not found at $AGENT_BIN" >&2
exit 1
fi
echo "$AGENT_BIN" > "$TMP_DIR_VAL/agent_binary_path"
log "Preparing environment file contents"
tmp_env="$(mktemp)"
cat > "$tmp_env" <<EOF
SYS_DEBUG_PRIVATE_CORE=$PRIVATE_CORE
SYS_DEBUG_PRIVATE_NODEA=$PRIVATE_NODEA
SYS_DEBUG_PRIVATE_NODEB=$PRIVATE_NODEB
SYS_DEBUG_TMP_DIR=$TMP_DIR_VAL
SYS_DEBUG_NETWORK_NAME=$NETWORK_NAME
SYS_DEBUG_NETWORK_SUBNET=$NETWORK_SUBNET
SYS_DEBUG_NETWORK_GATEWAY=$NETWORK_GATEWAY
SYS_DEBUG_PROJECT_NAME=$PROJECT_NAME
SYS_DEBUG_CONTAINER_PREFIX=$CONTAINER_PREFIX
SYS_DEBUG_NODEA_HOST=$HOST_A
SYS_DEBUG_NODEB_HOST=$HOST_B
SYS_DEBUG_BIND_IP=${SYS_DEBUG_BIND_IP:-172.30.0.2}
SYS_DEBUG_MASTER_IP=${SYS_DEBUG_MASTER_IP:-172.30.0.10}
SYS_DEBUG_ES_IP=${SYS_DEBUG_ES_IP:-172.30.0.20}
SYS_DEBUG_KIBANA_IP=${SYS_DEBUG_KIBANA_IP:-172.30.0.30}
SYS_DEBUG_NODEA_IP=${SYS_DEBUG_NODEA_IP:-172.30.0.101}
SYS_DEBUG_NODEB_IP=${SYS_DEBUG_NODEB_IP:-172.30.0.102}
SYS_DEBUG_NODEB_FIXED_IP=$NODEB_FIXED_IP
ARGUS_BUILD_UID=$ARGUS_BUILD_UID
ARGUS_BUILD_GID=$ARGUS_BUILD_GID
EOF
if [[ -f "$ENV_FILE" ]]; then
if cmp -s "$tmp_env" "$ENV_FILE"; then
log ".env already up-to-date"
rm -f "$tmp_env"
if [[ ! -f "$DEBUG_ROOT/.env.lock" ]]; then
cp "$ENV_FILE" "$DEBUG_ROOT/.env.lock"
fi
else
mv "$ENV_FILE" "$ENV_FILE.bak"
mv "$tmp_env" "$ENV_FILE"
cp "$ENV_FILE" "$DEBUG_ROOT/.env.lock"
log "Bootstrap updated .env (previous saved at ${ENV_FILE}.bak)"
fi
else
mv "$tmp_env" "$ENV_FILE"
cp "$ENV_FILE" "$DEBUG_ROOT/.env.lock"
log "Bootstrap created .env at $ENV_FILE"
fi

19
src/sys/debug/scripts/02_up.sh Executable file
View File

@ -0,0 +1,19 @@
#!/usr/bin/env bash
set -euo pipefail
# shellcheck source=common.sh
source "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/common.sh"
ensure_env_file
ensure_paths_defined
require_docker
if ! docker network inspect "$SYS_DEBUG_NETWORK_NAME" >/dev/null 2>&1; then
echo "[ERR] Network $SYS_DEBUG_NETWORK_NAME not found. Run scripts/network-create.sh first." >&2
exit 1
fi
log "Starting debug stack on project $SYS_DEBUG_PROJECT_NAME"
compose up -d
log "Services started: master:32300 es:9200 kibana:5601 node-a:2020 node-b:2021"

View File

@ -0,0 +1,73 @@
#!/usr/bin/env bash
set -euo pipefail
# shellcheck source=common.sh
source "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/common.sh"
ensure_env_file
ensure_paths_defined
service_id() {
compose ps -q "$1"
}
wait_http() {
local url="$1"; local attempts="${2:-120}"; local i=1
while (( i <= attempts )); do
if curl -fsS "$url" >/dev/null 2>&1; then
return 0
fi
echo "[..] waiting $url ($i/$attempts)"
sleep 5
((i++))
done
echo "[ERR] Timeout waiting for $url" >&2
return 1
}
log "Waiting for ES/Kibana/Master/Fluent Bit/Bind"
attempt=1; max=120
while (( attempt <= max )); do
if curl -fsS "http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=1s" >/dev/null 2>&1; then
break
fi
echo "[..] waiting ES ($attempt/$max)"
sleep 5
((attempt++))
done
if (( attempt > max )); then
echo "[ERR] ES not ready" >&2
exit 1
fi
log "Waiting for Kibana to be available (HTTP 200)"
kb_attempt=1; kb_max=180
while (( kb_attempt <= kb_max )); do
body=$(curl -sS "http://localhost:5601/api/status" 2>/dev/null || true)
code=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:5601/api/status" || echo 000)
if [[ "$code" == "200" ]] && echo "$body" | grep -q '"level":"available"'; then
log "Kibana available"
break
fi
echo "[..] waiting kibana 200 ($kb_attempt/$kb_max), last_code=$code"
sleep 5
((kb_attempt++))
done
if (( kb_attempt > kb_max )); then
echo "[ERR] Kibana did not reach HTTP 200" >&2
exit 1
fi
wait_http "http://localhost:32300/readyz" 120
wait_http "http://localhost:2020/api/v2/metrics" 120
wait_http "http://localhost:2021/api/v2/metrics" 120
BIND_ID="$(service_id bind)"
if [[ -n "$BIND_ID" ]]; then
docker exec "$BIND_ID" named-checkconf >/dev/null
else
echo "[WARN] bind container id not found" >&2
fi
log "All services are ready"

View File

@ -0,0 +1,51 @@
#!/usr/bin/env bash
set -euo pipefail
# shellcheck source=common.sh
source "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/common.sh"
ensure_env_file
ensure_paths_defined
service_id() {
compose ps -q "$1"
}
log "Verifying DNS routing via bind"
MASTER_FILE="$SYS_DEBUG_PRIVATE_CORE/argus/etc/master.argus.com"
if [[ ! -f "$MASTER_FILE" ]]; then
echo "[ERR] master.argus.com file missing at $MASTER_FILE" >&2
exit 1
fi
MASTER_IP_HOST="$(tr -d '\r\n' < "$MASTER_FILE" || true)"
log "master.argus.com file content: $MASTER_IP_HOST"
BIN_ID="$(service_id bind)"
if [[ -n "$BIN_ID" ]]; then
DIG_IP="$(docker exec "$BIN_ID" dig +short master.argus.com A | tail -n1 || true)"
log "dig(master.argus.com) from bind container -> $DIG_IP"
if [[ -z "$DIG_IP" ]]; then
echo "[ERR] bind did not resolve master.argus.com" >&2
exit 1
fi
else
echo "[WARN] bind container not found; skip dig" >&2
fi
for node in node-a node-b; do
CID="$(service_id "$node")"
if [[ -z "$CID" ]]; then
echo "[ERR] Container for $node not found" >&2
exit 1
fi
log "Checking resolution inside $node"
if ! docker exec "$CID" getent hosts master.argus.com >/dev/null 2>&1; then
echo "[ERR] $node cannot resolve master.argus.com" >&2
exit 1
fi
RES="$(docker exec "$CID" getent hosts master.argus.com | awk '{print $1}' | head -n1)"
log "$node resolved master.argus.com -> $RES"
done
log "DNS routing verified"

View File

@ -0,0 +1,84 @@
#!/usr/bin/env bash
set -euo pipefail
# shellcheck source=common.sh
source "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/common.sh"
ensure_env_file
ensure_paths_defined
TMP_DIR_LOCAL="$TMP_DIR"
mkdir -p "$TMP_DIR_LOCAL"
API_BASE="http://localhost:32300/api/v1/master"
log "Waiting for agent nodes to register"
extract_node() {
local name="$1"; local output="$2"; local json_file="$3"
python3 - "$name" "$output" "$json_file" <<'PY'
import json, sys, pathlib
name = sys.argv[1]
out = pathlib.Path(sys.argv[2])
json_file = sys.argv[3]
with open(json_file, 'r') as fh:
data = json.load(fh)
node = next((n for n in data if n.get("name") == name), None)
if node:
out.write_text(node["id"])
print(node["id"])
PY
}
ID_A=""; ID_B=""
for _ in {1..60}; do
sleep 2
resp=$(curl -fsS "$API_BASE/nodes" 2>/dev/null || true)
[[ -z "$resp" ]] && continue
if ! echo "$resp" | head -c1 | grep -q '\['; then
continue
fi
echo "$resp" > "$TMP_DIR_LOCAL/nodes_list.json"
ID_A=$(extract_node "$HOST_A" "$TMP_DIR_LOCAL/node_id_a" "$TMP_DIR_LOCAL/nodes_list.json" 2>/dev/null || true)
ID_B=$(extract_node "$HOST_B" "$TMP_DIR_LOCAL/node_id_b" "$TMP_DIR_LOCAL/nodes_list.json" 2>/dev/null || true)
if [[ -s "$TMP_DIR_LOCAL/node_id_a" && -s "$TMP_DIR_LOCAL/node_id_b" ]]; then
break
fi
done
if [[ ! -s "$TMP_DIR_LOCAL/node_id_a" || ! -s "$TMP_DIR_LOCAL/node_id_b" ]]; then
echo "[ERR] Agents did not register in time" >&2
exit 1
fi
node_detail() {
local id="$1"; local out="$2"
curl -fsS "$API_BASE/nodes/$id" -o "$out"
}
node_detail "$(cat "$TMP_DIR_LOCAL/node_id_a")" "$TMP_DIR_LOCAL/detail_a.json"
node_detail "$(cat "$TMP_DIR_LOCAL/node_id_b")" "$TMP_DIR_LOCAL/detail_b.json"
python3 - "$TMP_DIR_LOCAL/detail_a.json" "$TMP_DIR_LOCAL/initial_ip_a" <<'PY'
import json, sys, pathlib
node=json.load(open(sys.argv[1]))
ip=node.get("meta_data",{}).get("ip")
assert ip, "missing ip"
pathlib.Path(sys.argv[2]).write_text(ip)
PY
python3 - "$TMP_DIR_LOCAL/detail_b.json" "$TMP_DIR_LOCAL/initial_ip_b" <<'PY'
import json, sys, pathlib
node=json.load(open(sys.argv[1]))
ip=node.get("meta_data",{}).get("ip")
assert ip, "missing ip"
pathlib.Path(sys.argv[2]).write_text(ip)
PY
NODE_JSON_A="$SYS_DEBUG_PRIVATE_NODEA/argus/agent/$HOST_A/node.json"
NODE_JSON_B="$SYS_DEBUG_PRIVATE_NODEB/argus/agent/$HOST_B/node.json"
[[ -f "$NODE_JSON_A" ]] || { echo "[ERR] node.json missing for $HOST_A" >&2; exit 1; }
[[ -f "$NODE_JSON_B" ]] || { echo "[ERR] node.json missing for $HOST_B" >&2; exit 1; }
log "Agents registered: $(cat "$TMP_DIR_LOCAL/node_id_a") , $(cat "$TMP_DIR_LOCAL/node_id_b")"

View File

@ -0,0 +1,78 @@
#!/usr/bin/env bash
set -euo pipefail
# shellcheck source=common.sh
source "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/common.sh"
ensure_env_file
ensure_paths_defined
API_BASE="http://localhost:32300/api/v1/master"
HEALTH_A="$SYS_DEBUG_PRIVATE_NODEA/argus/agent/$HOST_A/health"
HEALTH_B="$SYS_DEBUG_PRIVATE_NODEB/argus/agent/$HOST_B/health"
write_health() {
local dir="$1"; mkdir -p "$dir"
cat > "$dir/log-fluentbit.json" <<JSON
{ "status": "healthy", "timestamp": "2025-10-13T12:05:00Z" }
JSON
cat > "$dir/metric-node-exporter.json" <<JSON
{ "status": "healthy", "timestamp": "2025-10-13T12:05:00Z" }
JSON
}
log "Writing health files for both nodes"
write_health "$HEALTH_A"
write_health "$HEALTH_B"
ID_A="$TMP_DIR/node_id_a"
ID_B="$TMP_DIR/node_id_b"
[[ -f "$ID_A" && -f "$ID_B" ]] || { echo "[ERR] node id files missing in $TMP_DIR" >&2; exit 1; }
ID_A_VAL="$(cat "$ID_A")"
ID_B_VAL="$(cat "$ID_B")"
check_health() {
local id="$1"; local tries=40
for _ in $(seq 1 $tries); do
sleep 2
resp=$(curl -fsS "$API_BASE/nodes/$id" 2>/dev/null || true)
[[ -z "$resp" ]] && continue
echo "$resp" > "$TMP_DIR/node_${id}_detail.json"
if python3 - "$TMP_DIR/node_${id}_detail.json" <<'PY'
import json,sys
node=json.load(open(sys.argv[1]))
h=node.get("health",{})
if "log-fluentbit" in h and "metric-node-exporter" in h:
sys.exit(0)
sys.exit(1)
PY
then
return 0
fi
done
return 1
}
check_health "$ID_A_VAL" || { echo "[ERR] health keys not reported for node A" >&2; exit 1; }
check_health "$ID_B_VAL" || { echo "[ERR] health keys not reported for node B" >&2; exit 1; }
NODES_JSON="$SYS_DEBUG_PRIVATE_CORE/argus/metric/prometheus/nodes.json"
if [[ ! -f "$NODES_JSON" ]]; then
echo "[ERR] nodes.json missing at $NODES_JSON" >&2
exit 1
fi
python3 - "$NODES_JSON" <<'PY'
import json,sys
with open(sys.argv[1]) as h:
nodes=json.load(h)
if not isinstance(nodes, list):
raise SystemExit("nodes.json expected list")
if len(nodes) != 2:
raise SystemExit(f"expected 2 nodes online, got {len(nodes)}")
PY
log "Health reported and nodes.json has 2 online nodes"

View File

@ -0,0 +1,73 @@
#!/usr/bin/env bash
set -euo pipefail
# shellcheck source=common.sh
source "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/common.sh"
ensure_env_file
ensure_paths_defined
log "Sending logs and asserting ES counts"
get_count() {
local idx="$1"
curl -s "http://localhost:9200/${idx}/_count?ignore_unavailable=true&allow_no_indices=true" | sed -E 's/.*"count":([0-9]+).*/\1/' | awk 'NF{print $0;exit} END{if(NR==0)print 0}'
}
train0=$(get_count "train-*")
infer0=$(get_count "infer-*")
base=$((train0 + infer0))
log "initial counts: train=${train0} infer=${infer0} total=${base}"
service_id() {
compose ps -q "$1"
}
send_logs() {
local sid="$1"; local hosttag="$2"
docker exec "$sid" sh -lc 'mkdir -p /logs/train /logs/infer'
docker exec "$sid" sh -lc "ts=\
\$(date '+%F %T'); echo \"\$ts INFO [$hosttag] training step=1 loss=1.23 model=bert\" >> /logs/train/train-demo.log"
docker exec "$sid" sh -lc "ts=\
\$(date '+%F %T'); echo \"\$ts INFO [$hosttag] training step=2 loss=1.10 model=bert\" >> /logs/train/train-demo.log"
docker exec "$sid" sh -lc "ts=\
\$(date '+%F %T'); echo \"\$ts WARN [$hosttag] inference slow on batch=2 latency=1.9s\" >> /logs/infer/infer-demo.log"
}
CID_A="$(service_id node-a)"
CID_B="$(service_id node-b)"
[[ -n "$CID_A" && -n "$CID_B" ]] || { echo "[ERR] node containers not found" >&2; exit 1; }
send_logs "$CID_A" "host01"
send_logs "$CID_B" "host02"
log "Waiting for ES to ingest"
sleep 10
train1=$(get_count "train-*")
infer1=$(get_count "infer-*")
final=$((train1 + infer1))
log "final counts: train=${train1} infer=${infer1} total=${final}"
if (( final <= base )); then
echo "[ERR] ES total did not increase (${base} -> ${final})" >&2
exit 1
fi
if (( final < 4 )); then
echo "[ERR] ES total below expected threshold: ${final} < 4" >&2
exit 1
fi
es_health=$(curl -s "http://localhost:9200/_cluster/health" | grep -o '"status":"[^"]*"' | cut -d'"' -f4)
if [[ "$es_health" != "green" && "$es_health" != "yellow" ]]; then
echo "[ERR] ES health not green/yellow: $es_health" >&2
exit 1
fi
if ! curl -fs "http://localhost:5601/api/status" >/dev/null 2>&1; then
echo "[WARN] Kibana status endpoint not available"
fi
log "ES counts increased and services healthy"

View File

@ -0,0 +1,110 @@
#!/usr/bin/env bash
set -euo pipefail
# shellcheck source=common.sh
source "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/common.sh"
ensure_env_file
ensure_paths_defined
API_BASE="http://localhost:32300/api/v1/master"
NODE_ENTRYPOINT="$DEBUG_ROOT/../tests/scripts/node_entrypoint.sh"
[[ -f "$NODE_ENTRYPOINT" ]] || { echo "[ERR] node entrypoint script missing at $NODE_ENTRYPOINT" >&2; exit 1; }
TARGET_FIXED_IP="${SYS_DEBUG_NODEB_FIXED_IP:-172.30.0.200}"
ID_B_FILE="$TMP_DIR/node_id_b"
IP_INIT_FILE="$TMP_DIR/initial_ip_b"
[[ -f "$ID_B_FILE" && -f "$IP_INIT_FILE" ]] || { echo "[ERR] Required node id/ip files missing in $TMP_DIR" >&2; exit 1; }
ID_B="$(cat "$ID_B_FILE")"
IP0_B="$(cat "$IP_INIT_FILE")"
DETAIL_BEFORE="$TMP_DIR/node_b_before.json"
curl -fsS "$API_BASE/nodes/$ID_B" -o "$DETAIL_BEFORE"
LAST0=$(python3 - "$DETAIL_BEFORE" <<'PY'
import json,sys
node=json.load(open(sys.argv[1]))
print(node.get("last_updated",""))
PY
)
IP_BEFORE=$(python3 - "$DETAIL_BEFORE" <<'PY'
import json,sys
node=json.load(open(sys.argv[1]))
print(node.get("meta_data",{}).get("ip",""))
PY
)
if [[ "$IP_BEFORE" != "$IP0_B" ]]; then
echo "[ERR] Expected initial IP $IP0_B for node-b, got $IP_BEFORE" >&2
exit 1
fi
if [[ "$IP_BEFORE" == "$TARGET_FIXED_IP" ]]; then
echo "[ERR] node-b current IP $IP_BEFORE already matches target $TARGET_FIXED_IP. Configure SYS_DEBUG_NODEB_FIXED_IP to a different address before rerun." >&2
exit 1
fi
service_id() {
compose ps -q "$1"
}
log "Recreating node-b (old IP $IP_BEFORE) with static IP $TARGET_FIXED_IP"
compose rm -sf node-b >/dev/null 2>&1 || true
CONTAINER_NAME="${SYS_DEBUG_CONTAINER_PREFIX:-argus-debug}-node-b"
docker rm -f "$CONTAINER_NAME" >/dev/null 2>&1 || true
AGENT_BIN_PATH="$(cat "$TMP_DIR/agent_binary_path")"
[[ -f "$AGENT_BIN_PATH" ]] || { echo "[ERR] Agent binary path missing in $TMP_DIR" >&2; exit 1; }
require_docker
docker run -d \
--name "$CONTAINER_NAME" \
--hostname "$HOST_B" \
--network "$SYS_DEBUG_NETWORK_NAME" \
--ip "$TARGET_FIXED_IP" \
--dns "${SYS_DEBUG_BIND_IP:-172.30.0.2}" \
-e MASTER_ENDPOINT=http://master.argus.com:3000 \
-e REPORT_INTERVAL_SECONDS=2 \
-e ARGUS_BUILD_UID=$ARGUS_BUILD_UID \
-e ARGUS_BUILD_GID=$ARGUS_BUILD_GID \
-e ES_HOST=es \
-e ES_PORT=9200 \
-e CLUSTER=local \
-e RACK=dev \
-p 2021:2020 \
-v "$SYS_DEBUG_PRIVATE_NODEB/argus/agent/$HOST_B:/private/argus/agent/$HOST_B" \
-v "$AGENT_BIN_PATH:/usr/local/bin/argus-agent:ro" \
-v "$NODE_ENTRYPOINT:/usr/local/bin/node-entrypoint.sh:ro" \
-v "$REPO_ROOT/src/log/fluent-bit/build/start-fluent-bit.sh:/assets/start-fluent-bit.sh:ro" \
-v "$REPO_ROOT/src/log/fluent-bit/build/etc:/assets/fluent-bit/etc:ro" \
-v "$REPO_ROOT/src/log/fluent-bit/build/packages:/assets/fluent-bit/packages:ro" \
--entrypoint /usr/local/bin/node-entrypoint.sh \
ubuntu:22.04 >/dev/null
log "Waiting for node-b to re-register with new IP"
for _ in {1..40}; do
sleep 3
if curl -fsS "$API_BASE/nodes/$ID_B" -o "$TMP_DIR/node_b_after.json"; then
if python3 - "$TMP_DIR/node_b_after.json" "$LAST0" "$TARGET_FIXED_IP" <<'PY'
import json,sys
node=json.load(open(sys.argv[1]))
last0=sys.argv[2]
expected_ip=sys.argv[3]
ip=node.get("meta_data",{}).get("ip")
lu=node.get("last_updated")
if ip == expected_ip and lu and lu != last0:
sys.exit(0)
sys.exit(1)
PY
then
log "node-b IP updated: $IP_BEFORE -> $TARGET_FIXED_IP"
exit 0
fi
fi
done
echo "[ERR] node-b did not update to IP $TARGET_FIXED_IP in time" >&2
exit 1

View File

@ -0,0 +1,13 @@
#!/usr/bin/env bash
set -euo pipefail
# shellcheck source=common.sh
source "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/common.sh"
ensure_env_file
require_docker
log "Stopping debug stack (project $SYS_DEBUG_PROJECT_NAME)"
compose down --remove-orphans >/dev/null 2>&1 || true
log "Containers stopped. No host directories were removed."

View File

@ -0,0 +1,66 @@
#!/usr/bin/env bash
set -euo pipefail
# shellcheck source=common.sh
source "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/common.sh"
ensure_env_file
ensure_paths_defined
FORCE=false
while [[ $# -gt 0 ]]; do
case "$1" in
-y|--yes)
FORCE=true
;;
-h|--help)
cat <<USAGE
Usage: ${0##*/} [--yes]
Safely remove debug private directories after adjusting ownership.
USAGE
exit 0
;;
*)
echo "Unknown argument: $1" >&2
exit 1
;;
esac
shift
done
if [[ $FORCE == false ]]; then
read -r -p "This will delete debug private directories. Continue? [y/N] " reply
case "$reply" in
y|Y|yes|YES)
;;
*)
echo "Aborted"
exit 0
;;
esac
fi
paths=(
"$SYS_DEBUG_PRIVATE_CORE"
"$SYS_DEBUG_PRIVATE_NODEA"
"$SYS_DEBUG_PRIVATE_NODEB"
"$SYS_DEBUG_TMP_DIR"
)
require_docker
image="ubuntu:22.04"
for dir in "${paths[@]}"; do
[[ -d "$dir" ]] || continue
log "Fixing ownership for $dir"
if ! docker run --rm -v "$dir:/target" "$image" chown -R "$(id -u):$(id -g)" /target >/dev/null 2>&1; then
echo "[WARN] Failed to adjust ownership via $image, attempting local chown" >&2
chown -R "$(id -u):$(id -g)" "$dir" >/dev/null 2>&1 || true
fi
log "Removing $dir"
rm -rf "$dir"
done
log "Clean data completed"

96
src/sys/debug/scripts/common.sh Executable file
View File

@ -0,0 +1,96 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
DEBUG_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
REPO_ROOT="$(cd "$DEBUG_ROOT/../../.." && pwd)"
ENV_FILE="$DEBUG_ROOT/.env"
source "$REPO_ROOT/scripts/common/build_user.sh"
load_build_user
if [[ -f "$ENV_FILE" ]]; then
set -a
# shellcheck disable=SC1090
source "$ENV_FILE"
set +a
fi
SYS_DEBUG_NETWORK_NAME=${SYS_DEBUG_NETWORK_NAME:-argus-debug-net}
SYS_DEBUG_NETWORK_SUBNET=${SYS_DEBUG_NETWORK_SUBNET:-172.30.0.0/16}
SYS_DEBUG_NETWORK_GATEWAY=${SYS_DEBUG_NETWORK_GATEWAY:-172.30.0.1}
SYS_DEBUG_PROJECT_NAME=${SYS_DEBUG_PROJECT_NAME:-argus-debug}
SYS_DEBUG_CONTAINER_PREFIX=${SYS_DEBUG_CONTAINER_PREFIX:-argus-debug}
SYS_DEBUG_PRIVATE_CORE=${SYS_DEBUG_PRIVATE_CORE:-$DEBUG_ROOT/private}
SYS_DEBUG_PRIVATE_NODEA=${SYS_DEBUG_PRIVATE_NODEA:-$DEBUG_ROOT/private-nodea}
SYS_DEBUG_PRIVATE_NODEB=${SYS_DEBUG_PRIVATE_NODEB:-$DEBUG_ROOT/private-nodeb}
SYS_DEBUG_TMP_DIR=${SYS_DEBUG_TMP_DIR:-$DEBUG_ROOT/tmp}
ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}
ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}
SYS_DEBUG_NODEA_HOST=${SYS_DEBUG_NODEA_HOST:-dev-yyrshare-nbnyx10-cp2f-pod-0}
SYS_DEBUG_NODEB_HOST=${SYS_DEBUG_NODEB_HOST:-dev-yyrshare-uuuu10-ep2f-pod-0}
HOST_A="$SYS_DEBUG_NODEA_HOST"
HOST_B="$SYS_DEBUG_NODEB_HOST"
COMPOSE_FILE="$DEBUG_ROOT/docker-compose.yml"
abs_path() {
python3 - "$1" <<'PY'
import os, sys
path = sys.argv[1]
print(os.path.abspath(path))
PY
}
ensure_command() {
local cmd="$1"
if ! command -v "$cmd" >/dev/null 2>&1; then
echo "[ERR] Required command '$cmd' not found" >&2
exit 1
fi
}
require_docker() {
ensure_command docker
}
compose() {
require_docker
local bin
if docker compose version >/dev/null 2>&1; then
bin=(docker compose)
else
bin=(docker-compose)
fi
"${bin[@]}" -p "$SYS_DEBUG_PROJECT_NAME" -f "$COMPOSE_FILE" "$@"
}
ensure_paths_defined() {
local missing=()
for name in SYS_DEBUG_PRIVATE_CORE SYS_DEBUG_PRIVATE_NODEA SYS_DEBUG_PRIVATE_NODEB SYS_DEBUG_TMP_DIR; do
if [[ -z "${!name:-}" ]]; then
missing+=("$name")
fi
done
if (( ${#missing[@]} > 0 )); then
echo "[ERR] Missing required environment variables: ${missing[*]}" >&2
echo " Run 01_bootstrap.sh first." >&2
exit 1
fi
}
ensure_env_file() {
if [[ ! -f "$ENV_FILE" ]]; then
echo "[ERR] Missing .env at $ENV_FILE. Run 01_bootstrap.sh first." >&2
exit 1
fi
}
log() {
echo "[INFO] $*"
}
TMP_DIR="$SYS_DEBUG_TMP_DIR"
mkdir -p "$TMP_DIR"

View File

@ -0,0 +1,76 @@
#!/usr/bin/env bash
set -euo pipefail
# shellcheck source=common.sh
source "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/common.sh"
NAME="$SYS_DEBUG_NETWORK_NAME"
SUBNET="$SYS_DEBUG_NETWORK_SUBNET"
GATEWAY="$SYS_DEBUG_NETWORK_GATEWAY"
usage() {
cat <<EOF
Usage: ${0##*/} [--name NAME] [--subnet CIDR] [--gateway IP]
Create (if missing) the external debug docker network.
Defaults derived from .env or:
name = $NAME
subnet = $SUBNET
gateway = $GATEWAY
EOF
}
while [[ $# -gt 0 ]]; do
case "$1" in
--name)
shift; [[ $# -gt 0 ]] || { echo "--name requires value" >&2; exit 1; }
NAME="$1"
;;
--name=*)
NAME="${1#*=}"
;;
--subnet)
shift; [[ $# -gt 0 ]] || { echo "--subnet requires value" >&2; exit 1; }
SUBNET="$1"
;;
--subnet=*)
SUBNET="${1#*=}"
;;
--gateway)
shift; [[ $# -gt 0 ]] || { echo "--gateway requires value" >&2; exit 1; }
GATEWAY="$1"
;;
--gateway=*)
GATEWAY="${1#*=}"
;;
-h|--help)
usage
exit 0
;;
*)
echo "Unknown argument: $1" >&2
usage >&2
exit 1
;;
esac
shift
done
require_docker
if docker network inspect "$NAME" >/dev/null 2>&1; then
log "Network $NAME already exists"
exit 0
fi
log "Creating network $NAME (subnet=$SUBNET gateway=$GATEWAY)"
docker network create \
--driver bridge \
--subnet "$SUBNET" \
--gateway "$GATEWAY" \
"$NAME"
mkdir -p "$TMP_DIR"
echo "$NAME" > "$TMP_DIR/network.created"
log "Network $NAME created"

View File

@ -0,0 +1,55 @@
#!/usr/bin/env bash
set -euo pipefail
# shellcheck source=common.sh
source "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/common.sh"
NAME="$SYS_DEBUG_NETWORK_NAME"
usage() {
cat <<EOF
Usage: ${0##*/} [--name NAME]
Destroy the debug docker network if no containers are attached.
EOF
}
while [[ $# -gt 0 ]]; do
case "$1" in
--name)
shift; [[ $# -gt 0 ]] || { echo "--name requires value" >&2; exit 1; }
NAME="$1"
;;
--name=*)
NAME="${1#*=}"
;;
-h|--help)
usage
exit 0
;;
*)
echo "Unknown argument: $1" >&2
usage >&2
exit 1
;;
esac
shift
done
require_docker
if ! docker network inspect "$NAME" >/dev/null 2>&1; then
log "Network $NAME not found; nothing to do"
exit 0
fi
attached=$(docker network inspect -f '{{range $id, $conf := .Containers}}{{printf "%s " $conf.Name}}{{end}}' "$NAME")
if [[ -n "${attached// }" ]]; then
echo "[ERR] Cannot remove network $NAME: still connected containers -> $attached" >&2
exit 1
fi
log "Deleting network $NAME"
docker network rm "$NAME" >/dev/null
rm -f "$TMP_DIR/network.created"
log "Network $NAME removed"