dev_1.0.0_yuyr_2:重新提交 PR,增加 master/agent 以及系统集成测试 #17
@ -1,7 +1,42 @@
|
|||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
|
# ES endpoint and wait strategy
|
||||||
ES="${ES:-http://localhost:9200}"
|
ES="${ES:-http://localhost:9200}"
|
||||||
|
es_wait_attempts="${ES_WAIT_ATTEMPTS:-60}" # total attempts to wait for ES
|
||||||
|
es_wait_interval="${ES_WAIT_INTERVAL:-2}" # seconds between attempts
|
||||||
|
|
||||||
echo "[i] 查询 ES 端点:$ES"
|
echo "[i] 查询 ES 端点:$ES"
|
||||||
|
|
||||||
|
wait_for_es() {
|
||||||
|
local attempt=1
|
||||||
|
while (( attempt <= es_wait_attempts )); do
|
||||||
|
# 等待集群达到至少 yellow 状态;请求失败则重试
|
||||||
|
if curl -fsS "$ES/_cluster/health?wait_for_status=yellow&timeout=1s" >/dev/null 2>&1; then
|
||||||
|
echo "[ok] Elasticsearch 已就绪 (attempt=${attempt}/${es_wait_attempts})"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
echo "[..] 等待 Elasticsearch 可用中 (${attempt}/${es_wait_attempts})"
|
||||||
|
sleep "${es_wait_interval}"
|
||||||
|
(( attempt++ ))
|
||||||
|
done
|
||||||
|
echo "[err] Elasticsearch 在 ${es_wait_attempts} 次尝试后仍不可用"
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
safe_count() {
|
||||||
|
# 对缺失索引返回 0,避免 404 触发失败
|
||||||
|
local pattern="$1"
|
||||||
|
local json
|
||||||
|
json=$(curl -fsS "$ES/${pattern}/_count?ignore_unavailable=true&allow_no_indices=true" 2>/dev/null || echo '{}')
|
||||||
|
echo "$json" | sed -E 's/.*"count":([0-9]+).*/\1/' | awk 'NF{print $0;exit} END{if(NR==0)print 0}'
|
||||||
|
}
|
||||||
|
|
||||||
|
wait_for_es
|
||||||
|
|
||||||
|
# 列出相关索引(可能为空,允许)
|
||||||
curl -fsS "$ES/_cat/indices?v" | egrep 'train-|infer-|logstash' || true
|
curl -fsS "$ES/_cat/indices?v" | egrep 'train-|infer-|logstash' || true
|
||||||
printf "train-* 计数:"; curl -fsS "$ES/train-*/_count" | sed -E 's/.*"count":([0-9]+).*/\1/'; echo
|
|
||||||
printf "infer-* 计数:"; curl -fsS "$ES/infer-*/_count" | sed -E 's/.*"count":([0-9]+).*/\1/'; echo
|
# 打印计数,缺失索引按 0 处理
|
||||||
|
printf "train-* 计数:"; safe_count "train-*"; echo
|
||||||
|
printf "infer-* 计数:"; safe_count "infer-*"; echo
|
||||||
|
139
src/sys/tests/docker-compose.yml
Normal file
139
src/sys/tests/docker-compose.yml
Normal file
@ -0,0 +1,139 @@
|
|||||||
|
version: "3.8"
|
||||||
|
|
||||||
|
networks:
|
||||||
|
default:
|
||||||
|
name: argus-sys-net
|
||||||
|
driver: bridge
|
||||||
|
ipam:
|
||||||
|
driver: default
|
||||||
|
config:
|
||||||
|
- subnet: 172.29.0.0/16
|
||||||
|
|
||||||
|
services:
|
||||||
|
bind:
|
||||||
|
image: ${BIND_IMAGE_TAG:-argus-bind9:latest}
|
||||||
|
container_name: argus-bind-sys
|
||||||
|
networks:
|
||||||
|
default:
|
||||||
|
ipv4_address: 172.29.0.2
|
||||||
|
volumes:
|
||||||
|
- ./private:/private
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
master:
|
||||||
|
image: ${MASTER_IMAGE_TAG:-argus-master:latest}
|
||||||
|
container_name: argus-master-sys
|
||||||
|
depends_on:
|
||||||
|
- bind
|
||||||
|
environment:
|
||||||
|
- OFFLINE_THRESHOLD_SECONDS=6
|
||||||
|
- ONLINE_THRESHOLD_SECONDS=2
|
||||||
|
- SCHEDULER_INTERVAL_SECONDS=1
|
||||||
|
- ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}
|
||||||
|
- ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}
|
||||||
|
ports:
|
||||||
|
- "32300:3000"
|
||||||
|
volumes:
|
||||||
|
- ./private/argus/master:/private/argus/master
|
||||||
|
- ./private/argus/metric/prometheus:/private/argus/metric/prometheus
|
||||||
|
- ./private/argus/etc:/private/argus/etc
|
||||||
|
networks:
|
||||||
|
default:
|
||||||
|
ipv4_address: 172.29.0.10
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
es:
|
||||||
|
image: argus-elasticsearch:latest
|
||||||
|
container_name: argus-es-sys
|
||||||
|
environment:
|
||||||
|
- discovery.type=single-node
|
||||||
|
- xpack.security.enabled=false
|
||||||
|
- ES_JAVA_OPTS=-Xms512m -Xmx512m
|
||||||
|
- ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}
|
||||||
|
- ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}
|
||||||
|
volumes:
|
||||||
|
- ./private/argus/log/elasticsearch:/private/argus/log/elasticsearch
|
||||||
|
- ./private/argus/etc:/private/argus/etc
|
||||||
|
ports:
|
||||||
|
- "9200:9200"
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
kibana:
|
||||||
|
image: argus-kibana:latest
|
||||||
|
container_name: argus-kibana-sys
|
||||||
|
environment:
|
||||||
|
- ELASTICSEARCH_HOSTS=http://es.log.argus.com:9200
|
||||||
|
- ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}
|
||||||
|
- ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}
|
||||||
|
volumes:
|
||||||
|
- ./private/argus/log/kibana:/private/argus/log/kibana
|
||||||
|
- ./private/argus/etc:/private/argus/etc
|
||||||
|
depends_on:
|
||||||
|
- es
|
||||||
|
ports:
|
||||||
|
- "5601:5601"
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
node-a:
|
||||||
|
image: ubuntu:22.04
|
||||||
|
container_name: argus-node-a
|
||||||
|
hostname: dev-yyrshare-nbnyx10-cp2f-pod-0
|
||||||
|
depends_on:
|
||||||
|
- master
|
||||||
|
- bind
|
||||||
|
- es
|
||||||
|
environment:
|
||||||
|
- MASTER_ENDPOINT=http://master.argus.com:3000
|
||||||
|
- REPORT_INTERVAL_SECONDS=2
|
||||||
|
- ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}
|
||||||
|
- ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}
|
||||||
|
- ES_HOST=es
|
||||||
|
- ES_PORT=9200
|
||||||
|
- CLUSTER=local
|
||||||
|
- RACK=dev
|
||||||
|
volumes:
|
||||||
|
- ./private-nodea/argus/agent/dev-yyrshare-nbnyx10-cp2f-pod-0:/private/argus/agent/dev-yyrshare-nbnyx10-cp2f-pod-0
|
||||||
|
- ../../agent/dist/argus-agent:/usr/local/bin/argus-agent:ro
|
||||||
|
- ./scripts/node_entrypoint.sh:/usr/local/bin/node-entrypoint.sh:ro
|
||||||
|
- ../../log/fluent-bit/build/start-fluent-bit.sh:/assets/start-fluent-bit.sh:ro
|
||||||
|
- ../../log/fluent-bit/build/etc:/assets/fluent-bit/etc:ro
|
||||||
|
- ../../log/fluent-bit/build/packages:/assets/fluent-bit/packages:ro
|
||||||
|
entrypoint:
|
||||||
|
- /usr/local/bin/node-entrypoint.sh
|
||||||
|
dns:
|
||||||
|
- 172.29.0.2
|
||||||
|
ports:
|
||||||
|
- "2020:2020"
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
node-b:
|
||||||
|
image: ubuntu:22.04
|
||||||
|
container_name: argus-node-b
|
||||||
|
hostname: dev-yyrshare-uuuu10-ep2f-pod-0
|
||||||
|
depends_on:
|
||||||
|
- master
|
||||||
|
- bind
|
||||||
|
- es
|
||||||
|
environment:
|
||||||
|
- MASTER_ENDPOINT=http://master.argus.com:3000
|
||||||
|
- REPORT_INTERVAL_SECONDS=2
|
||||||
|
- ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}
|
||||||
|
- ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}
|
||||||
|
- ES_HOST=es
|
||||||
|
- ES_PORT=9200
|
||||||
|
- CLUSTER=local
|
||||||
|
- RACK=dev
|
||||||
|
volumes:
|
||||||
|
- ./private-node2/argus/agent/dev-yyrshare-uuuu10-ep2f-pod-0:/private/argus/agent/dev-yyrshare-uuuu10-ep2f-pod-0
|
||||||
|
- ../../agent/dist/argus-agent:/usr/local/bin/argus-agent:ro
|
||||||
|
- ./scripts/node_entrypoint.sh:/usr/local/bin/node-entrypoint.sh:ro
|
||||||
|
- ../../log/fluent-bit/build/start-fluent-bit.sh:/assets/start-fluent-bit.sh:ro
|
||||||
|
- ../../log/fluent-bit/build/etc:/assets/fluent-bit/etc:ro
|
||||||
|
- ../../log/fluent-bit/build/packages:/assets/fluent-bit/packages:ro
|
||||||
|
entrypoint:
|
||||||
|
- /usr/local/bin/node-entrypoint.sh
|
||||||
|
dns:
|
||||||
|
- 172.29.0.2
|
||||||
|
ports:
|
||||||
|
- "2021:2020"
|
||||||
|
restart: unless-stopped
|
26
src/sys/tests/scripts/00_e2e_test.sh
Executable file
26
src/sys/tests/scripts/00_e2e_test.sh
Executable file
@ -0,0 +1,26 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
|
||||||
|
SCRIPTS=(
|
||||||
|
"01_bootstrap.sh"
|
||||||
|
"02_up.sh"
|
||||||
|
"03_wait_ready.sh"
|
||||||
|
"04_verify_dns_routing.sh"
|
||||||
|
"05_agent_register.sh"
|
||||||
|
"06_write_health_and_assert.sh"
|
||||||
|
"07_logs_send_and_assert.sh"
|
||||||
|
"08_restart_agent_reregister.sh"
|
||||||
|
"09_down.sh"
|
||||||
|
)
|
||||||
|
|
||||||
|
for script in "${SCRIPTS[@]}"; do
|
||||||
|
echo "[SYS-E2E] Running $script"
|
||||||
|
"$SCRIPT_DIR/$script"
|
||||||
|
echo "[SYS-E2E] $script completed"
|
||||||
|
echo
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "[SYS-E2E] All tests completed"
|
||||||
|
|
77
src/sys/tests/scripts/01_bootstrap.sh
Executable file
77
src/sys/tests/scripts/01_bootstrap.sh
Executable file
@ -0,0 +1,77 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||||
|
REPO_ROOT="$(cd "$TEST_ROOT/../../.." && pwd)"
|
||||||
|
|
||||||
|
PRIVATE_CORE="$TEST_ROOT/private"
|
||||||
|
PRIVATE_NODEA="$TEST_ROOT/private-nodea"
|
||||||
|
PRIVATE_NODEB="$TEST_ROOT/private-node2"
|
||||||
|
TMP_DIR="$TEST_ROOT/tmp"
|
||||||
|
|
||||||
|
source "$REPO_ROOT/scripts/common/build_user.sh"
|
||||||
|
load_build_user
|
||||||
|
|
||||||
|
ensure_image() {
|
||||||
|
local image="$1"
|
||||||
|
if ! docker image inspect "$image" >/dev/null 2>&1; then
|
||||||
|
echo "[ERROR] Missing image: $image. Please run ./build/build_images.sh" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "[INFO] Preparing directories..."
|
||||||
|
mkdir -p \
|
||||||
|
"$PRIVATE_CORE/argus/etc" \
|
||||||
|
"$PRIVATE_CORE/argus/bind" \
|
||||||
|
"$PRIVATE_CORE/argus/master" \
|
||||||
|
"$PRIVATE_CORE/argus/metric/prometheus" \
|
||||||
|
"$PRIVATE_CORE/argus/log/elasticsearch" \
|
||||||
|
"$PRIVATE_CORE/argus/log/kibana" \
|
||||||
|
"$PRIVATE_NODEA/argus/agent/dev-yyrshare-nbnyx10-cp2f-pod-0/health" \
|
||||||
|
"$PRIVATE_NODEB/argus/agent/dev-yyrshare-uuuu10-ep2f-pod-0/health" \
|
||||||
|
"$TMP_DIR"
|
||||||
|
|
||||||
|
# Align ownership for supervisor-managed services (ES/Kibana expect UID/GID inside container)
|
||||||
|
echo "[INFO] Fixing ownership for core private directories..."
|
||||||
|
chown -R "${ARGUS_BUILD_UID}:${ARGUS_BUILD_GID}" \
|
||||||
|
"$PRIVATE_CORE/argus/log/elasticsearch" \
|
||||||
|
"$PRIVATE_CORE/argus/log/kibana" \
|
||||||
|
"$PRIVATE_CORE/argus/etc" 2>/dev/null || true
|
||||||
|
|
||||||
|
echo "[INFO] Distributing update-dns.sh for core services (bind/master/es/kibana)"
|
||||||
|
BIND_UPDATE_SRC="$REPO_ROOT/src/bind/build/update-dns.sh"
|
||||||
|
BIND_UPDATE_DEST="$PRIVATE_CORE/argus/etc/update-dns.sh"
|
||||||
|
if [[ -f "$BIND_UPDATE_SRC" ]]; then
|
||||||
|
cp "$BIND_UPDATE_SRC" "$BIND_UPDATE_DEST"
|
||||||
|
chmod +x "$BIND_UPDATE_DEST"
|
||||||
|
else
|
||||||
|
echo "[WARN] bind update-dns.sh not found at $BIND_UPDATE_SRC"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "[INFO] Ensuring images present..."
|
||||||
|
ensure_image "argus-elasticsearch:latest"
|
||||||
|
ensure_image "argus-kibana:latest"
|
||||||
|
ensure_image "argus-bind9:latest"
|
||||||
|
ensure_image "argus-master:latest"
|
||||||
|
|
||||||
|
echo "[INFO] Building agent binary..."
|
||||||
|
pushd "$REPO_ROOT/src/agent" >/dev/null
|
||||||
|
./scripts/build_binary.sh
|
||||||
|
popd >/dev/null
|
||||||
|
|
||||||
|
AGENT_BIN="$REPO_ROOT/src/agent/dist/argus-agent"
|
||||||
|
if [[ ! -x "$AGENT_BIN" ]]; then
|
||||||
|
echo "[ERROR] Agent binary not found at $AGENT_BIN" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo "$AGENT_BIN" > "$TMP_DIR/agent_binary_path"
|
||||||
|
|
||||||
|
echo "[INFO] Writing .env with UID/GID"
|
||||||
|
cat > "$TEST_ROOT/.env" <<EOF
|
||||||
|
ARGUS_BUILD_UID=$ARGUS_BUILD_UID
|
||||||
|
ARGUS_BUILD_GID=$ARGUS_BUILD_GID
|
||||||
|
EOF
|
||||||
|
|
||||||
|
echo "[OK] Bootstrap completed"
|
22
src/sys/tests/scripts/02_up.sh
Executable file
22
src/sys/tests/scripts/02_up.sh
Executable file
@ -0,0 +1,22 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||||
|
|
||||||
|
compose() {
|
||||||
|
if docker compose version >/dev/null 2>&1; then
|
||||||
|
docker compose "$@"
|
||||||
|
else
|
||||||
|
docker-compose "$@"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "[INFO] Bringing up system stack..."
|
||||||
|
pushd "$TEST_ROOT" >/dev/null
|
||||||
|
compose -p argus-sys down --remove-orphans || true
|
||||||
|
compose -p argus-sys up -d
|
||||||
|
popd >/dev/null
|
||||||
|
|
||||||
|
echo "[OK] Services started: master:32300 es:9200 kibana:5601 node-a:2020 node-b:2021"
|
||||||
|
|
75
src/sys/tests/scripts/03_wait_ready.sh
Executable file
75
src/sys/tests/scripts/03_wait_ready.sh
Executable file
@ -0,0 +1,75 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||||
|
|
||||||
|
compose() {
|
||||||
|
if docker compose version >/dev/null 2>&1; then
|
||||||
|
docker compose "$@"
|
||||||
|
else
|
||||||
|
docker-compose "$@"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
service_id() {
|
||||||
|
compose -p argus-sys ps -q "$1"
|
||||||
|
}
|
||||||
|
|
||||||
|
wait_http() {
|
||||||
|
local url="$1"; local attempts="${2:-120}"; local i=1
|
||||||
|
while (( i <= attempts )); do
|
||||||
|
if curl -fsS "$url" >/dev/null 2>&1; then return 0; fi
|
||||||
|
echo "[..] waiting $url ($i/$attempts)"; sleep 5; ((i++))
|
||||||
|
done
|
||||||
|
echo "[ERR] Timeout waiting for $url" >&2; return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "[INFO] Waiting for ES/Kibana/Master/Fluent Bit/Bind..."
|
||||||
|
|
||||||
|
# ES (>= yellow)
|
||||||
|
attempt=1; max=120
|
||||||
|
while (( attempt <= max )); do
|
||||||
|
if curl -fsS "http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=1s" >/dev/null 2>&1; then
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
echo "[..] waiting ES ($attempt/$max)"; sleep 5; ((attempt++))
|
||||||
|
done
|
||||||
|
[[ $attempt -le $max ]] || { echo "[ERR] ES not ready" >&2; exit 1; }
|
||||||
|
|
||||||
|
# Kibana: must be HTTP 200 and overall.level=available
|
||||||
|
echo "[INFO] Waiting for Kibana to be available (HTTP 200)..."
|
||||||
|
kb_attempt=1; kb_max=180
|
||||||
|
while (( kb_attempt <= kb_max )); do
|
||||||
|
body=$(curl -sS "http://localhost:5601/api/status" 2>/dev/null || true)
|
||||||
|
code=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:5601/api/status" || echo 000)
|
||||||
|
if [[ "$code" == "200" ]]; then
|
||||||
|
if echo "$body" | grep -q '"level":"available"'; then
|
||||||
|
echo "[OK] Kibana available (HTTP 200)"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
echo "[..] waiting kibana 200 ($kb_attempt/$kb_max), last_code=$code"
|
||||||
|
sleep 5
|
||||||
|
((kb_attempt++))
|
||||||
|
done
|
||||||
|
if (( kb_attempt > kb_max )); then
|
||||||
|
echo "[ERR] Kibana did not reach HTTP 200 available in time" >&2; exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Master
|
||||||
|
wait_http "http://localhost:32300/readyz" 120
|
||||||
|
|
||||||
|
# Fluent Bit (host metrics on host ports)
|
||||||
|
wait_http "http://localhost:2020/api/v2/metrics" 120
|
||||||
|
wait_http "http://localhost:2021/api/v2/metrics" 120
|
||||||
|
|
||||||
|
# Bind config check
|
||||||
|
BIND_ID="$(service_id bind)"
|
||||||
|
if [[ -n "$BIND_ID" ]]; then
|
||||||
|
docker exec "$BIND_ID" named-checkconf >/dev/null
|
||||||
|
else
|
||||||
|
echo "[WARN] bind container id not found"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "[OK] All services are ready"
|
54
src/sys/tests/scripts/04_verify_dns_routing.sh
Executable file
54
src/sys/tests/scripts/04_verify_dns_routing.sh
Executable file
@ -0,0 +1,54 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||||
|
|
||||||
|
compose() {
|
||||||
|
if docker compose version >/dev/null 2>&1; then
|
||||||
|
docker compose "$@"
|
||||||
|
else
|
||||||
|
docker-compose "$@"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
service_id() {
|
||||||
|
compose -p argus-sys ps -q "$1"
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "[INFO] Verifying DNS routing via bind..."
|
||||||
|
|
||||||
|
# Check master IP file exists in shared private
|
||||||
|
MASTER_FILE="$TEST_ROOT/private/argus/etc/master.argus.com"
|
||||||
|
if [[ ! -f "$MASTER_FILE" ]]; then
|
||||||
|
echo "[ERR] master.argus.com file missing at $MASTER_FILE" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
MASTER_IP_HOST="$(cat "$MASTER_FILE" | tr -d '\r\n' || true)"
|
||||||
|
echo "[INFO] master.argus.com file content: ${MASTER_IP_HOST}"
|
||||||
|
|
||||||
|
# dig inside bind container
|
||||||
|
BIN_ID="$(service_id bind)"
|
||||||
|
if [[ -n "$BIN_ID" ]]; then
|
||||||
|
DIG_IP="$(docker exec "$BIN_ID" dig +short master.argus.com A | tail -n1 || true)"
|
||||||
|
echo "[INFO] dig(master.argus.com) from bind container -> $DIG_IP"
|
||||||
|
if [[ -z "$DIG_IP" ]]; then
|
||||||
|
echo "[ERR] bind did not resolve master.argus.com" >&2; exit 1
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "[WARN] bind container not found; skip dig"
|
||||||
|
fi
|
||||||
|
|
||||||
|
for node in node-a node-b; do
|
||||||
|
CID="$(service_id "$node")"
|
||||||
|
echo "[INFO] Checking resolution inside $node..."
|
||||||
|
if ! docker exec "$CID" getent hosts master.argus.com >/dev/null 2>&1; then
|
||||||
|
echo "[ERR] $node cannot resolve master.argus.com" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
RES="$(docker exec "$CID" getent hosts master.argus.com | awk '{print $1}' | head -n1)"
|
||||||
|
echo "[OK] $node resolved master.argus.com -> $RES"
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "[OK] DNS routing verified"
|
||||||
|
|
87
src/sys/tests/scripts/05_agent_register.sh
Executable file
87
src/sys/tests/scripts/05_agent_register.sh
Executable file
@ -0,0 +1,87 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||||
|
TMP_DIR="$TEST_ROOT/tmp"
|
||||||
|
|
||||||
|
API_BASE="http://localhost:32300/api/v1/master"
|
||||||
|
|
||||||
|
HOST_A="dev-yyrshare-nbnyx10-cp2f-pod-0"
|
||||||
|
HOST_B="dev-yyrshare-uuuu10-ep2f-pod-0"
|
||||||
|
|
||||||
|
mkdir -p "$TMP_DIR"
|
||||||
|
|
||||||
|
echo "[INFO] Waiting for agent nodes to register..."
|
||||||
|
|
||||||
|
extract_node() {
|
||||||
|
local name="$1"; local output="$2"; local json_file="$3"
|
||||||
|
python3 - "$name" "$output" "$json_file" <<'PY'
|
||||||
|
import json, sys, pathlib
|
||||||
|
name = sys.argv[1]
|
||||||
|
out = pathlib.Path(sys.argv[2])
|
||||||
|
json_file = sys.argv[3]
|
||||||
|
with open(json_file, 'r') as fh:
|
||||||
|
data = json.load(fh)
|
||||||
|
node = next((n for n in data if n.get("name") == name), None)
|
||||||
|
if node:
|
||||||
|
out.write_text(node["id"]) # save id
|
||||||
|
print(node["id"]) # also print for shell capture
|
||||||
|
PY
|
||||||
|
}
|
||||||
|
|
||||||
|
ID_A=""; ID_B=""
|
||||||
|
for _ in {1..60}; do
|
||||||
|
sleep 2
|
||||||
|
resp=$(curl -fsS "$API_BASE/nodes" 2>/dev/null || true)
|
||||||
|
if [[ -z "$resp" ]]; then
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
# only try to parse when it's a JSON array
|
||||||
|
if ! echo "$resp" | head -c1 | grep -q '\['; then
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
echo "$resp" > "$TMP_DIR/nodes_list.json"
|
||||||
|
ID_A=$(extract_node "$HOST_A" "$TMP_DIR/node_id_a" "$TMP_DIR/nodes_list.json" 2>/dev/null || true)
|
||||||
|
ID_B=$(extract_node "$HOST_B" "$TMP_DIR/node_id_b" "$TMP_DIR/nodes_list.json" 2>/dev/null || true)
|
||||||
|
if [[ -s "$TMP_DIR/node_id_a" && -s "$TMP_DIR/node_id_b" ]]; then
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
if [[ ! -s "$TMP_DIR/node_id_a" || ! -s "$TMP_DIR/node_id_b" ]]; then
|
||||||
|
echo "[ERR] Agents did not register in time" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
node_detail() {
|
||||||
|
local id="$1"; local out="$2"
|
||||||
|
curl -fsS "$API_BASE/nodes/$id" -o "$out"
|
||||||
|
}
|
||||||
|
|
||||||
|
node_detail "$(cat "$TMP_DIR/node_id_a")" "$TMP_DIR/detail_a.json"
|
||||||
|
node_detail "$(cat "$TMP_DIR/node_id_b")" "$TMP_DIR/detail_b.json"
|
||||||
|
|
||||||
|
python3 - "$TMP_DIR/detail_a.json" "$TMP_DIR/initial_ip_a" <<'PY'
|
||||||
|
import json, sys, pathlib
|
||||||
|
node=json.load(open(sys.argv[1]))
|
||||||
|
ip=node.get("meta_data",{}).get("ip")
|
||||||
|
assert ip, "missing ip"
|
||||||
|
pathlib.Path(sys.argv[2]).write_text(ip)
|
||||||
|
PY
|
||||||
|
|
||||||
|
python3 - "$TMP_DIR/detail_b.json" "$TMP_DIR/initial_ip_b" <<'PY'
|
||||||
|
import json, sys, pathlib
|
||||||
|
node=json.load(open(sys.argv[1]))
|
||||||
|
ip=node.get("meta_data",{}).get("ip")
|
||||||
|
assert ip, "missing ip"
|
||||||
|
pathlib.Path(sys.argv[2]).write_text(ip)
|
||||||
|
PY
|
||||||
|
|
||||||
|
NODE_JSON_A="$TEST_ROOT/private-nodea/argus/agent/$HOST_A/node.json"
|
||||||
|
NODE_JSON_B="$TEST_ROOT/private-node2/argus/agent/$HOST_B/node.json"
|
||||||
|
|
||||||
|
[[ -f "$NODE_JSON_A" ]] || { echo "[ERR] node.json missing for $HOST_A" >&2; exit 1; }
|
||||||
|
[[ -f "$NODE_JSON_B" ]] || { echo "[ERR] node.json missing for $HOST_B" >&2; exit 1; }
|
||||||
|
|
||||||
|
echo "[OK] Agents registered: $(cat "$TMP_DIR/node_id_a") , $(cat "$TMP_DIR/node_id_b")"
|
67
src/sys/tests/scripts/06_write_health_and_assert.sh
Executable file
67
src/sys/tests/scripts/06_write_health_and_assert.sh
Executable file
@ -0,0 +1,67 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||||
|
TMP_DIR="$TEST_ROOT/tmp"
|
||||||
|
|
||||||
|
API_BASE="http://localhost:32300/api/v1/master"
|
||||||
|
|
||||||
|
HOST_A="dev-yyrshare-nbnyx10-cp2f-pod-0"
|
||||||
|
HOST_B="dev-yyrshare-uuuu10-ep2f-pod-0"
|
||||||
|
|
||||||
|
HEALTH_A="$TEST_ROOT/private-nodea/argus/agent/$HOST_A/health"
|
||||||
|
HEALTH_B="$TEST_ROOT/private-node2/argus/agent/$HOST_B/health"
|
||||||
|
|
||||||
|
write_health() {
|
||||||
|
local dir="$1"; mkdir -p "$dir"
|
||||||
|
cat > "$dir/log-fluentbit.json" <<JSON
|
||||||
|
{ "status": "healthy", "timestamp": "2024-10-05T12:05:00Z" }
|
||||||
|
JSON
|
||||||
|
cat > "$dir/metric-node-exporter.json" <<JSON
|
||||||
|
{ "status": "healthy", "timestamp": "2024-10-05T12:05:00Z" }
|
||||||
|
JSON
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "[INFO] Writing health files for both nodes..."
|
||||||
|
write_health "$HEALTH_A"
|
||||||
|
write_health "$HEALTH_B"
|
||||||
|
|
||||||
|
ID_A="$(cat "$TMP_DIR/node_id_a")"
|
||||||
|
ID_B="$(cat "$TMP_DIR/node_id_b")"
|
||||||
|
|
||||||
|
check_health() {
|
||||||
|
local id="$1"; local tries=40
|
||||||
|
for _ in $(seq 1 $tries); do
|
||||||
|
sleep 2
|
||||||
|
resp=$(curl -fsS "$API_BASE/nodes/$id" 2>/dev/null || true)
|
||||||
|
[[ -z "$resp" ]] && continue
|
||||||
|
echo "$resp" > "$TMP_DIR/node_${id}_detail.json"
|
||||||
|
if python3 - "$TMP_DIR/node_${id}_detail.json" <<'PY'
|
||||||
|
import json,sys
|
||||||
|
node=json.load(open(sys.argv[1]))
|
||||||
|
h=node.get("health",{})
|
||||||
|
sys.exit(0 if ("log-fluentbit" in h and "metric-node-exporter" in h) else 1)
|
||||||
|
PY
|
||||||
|
then return 0; fi
|
||||||
|
done
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
check_health "$ID_A" || { echo "[ERR] health keys not reported for node A" >&2; exit 1; }
|
||||||
|
check_health "$ID_B" || { echo "[ERR] health keys not reported for node B" >&2; exit 1; }
|
||||||
|
|
||||||
|
NODES_JSON="$TEST_ROOT/private/argus/metric/prometheus/nodes.json"
|
||||||
|
if [[ ! -f "$NODES_JSON" ]]; then
|
||||||
|
echo "[ERR] nodes.json missing at $NODES_JSON" >&2; exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
python3 - "$NODES_JSON" <<'PY'
|
||||||
|
import json,sys
|
||||||
|
with open(sys.argv[1]) as h:
|
||||||
|
nodes=json.load(h)
|
||||||
|
assert isinstance(nodes,list)
|
||||||
|
assert len(nodes) == 2, f"expected 2 nodes online, got {len(nodes)}"
|
||||||
|
PY
|
||||||
|
|
||||||
|
echo "[OK] Health reported and nodes.json has 2 online nodes"
|
63
src/sys/tests/scripts/07_logs_send_and_assert.sh
Executable file
63
src/sys/tests/scripts/07_logs_send_and_assert.sh
Executable file
@ -0,0 +1,63 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
echo "[INFO] Sending logs via node-a/node-b and asserting ES counts..."
|
||||||
|
|
||||||
|
get_count() {
|
||||||
|
local idx="$1"
|
||||||
|
curl -s "http://localhost:9200/${idx}/_count?ignore_unavailable=true&allow_no_indices=true" | sed -E 's/.*"count":([0-9]+).*/\1/' | awk 'NF{print $0;exit} END{if(NR==0)print 0}'
|
||||||
|
}
|
||||||
|
|
||||||
|
train0=$(get_count "train-*")
|
||||||
|
infer0=$(get_count "infer-*")
|
||||||
|
base=$((train0 + infer0))
|
||||||
|
echo "[INFO] initial counts: train=${train0} infer=${infer0} total=${base}"
|
||||||
|
|
||||||
|
send_logs() {
|
||||||
|
local cname="$1"; local hosttag="$2"
|
||||||
|
docker exec "$cname" sh -lc 'mkdir -p /logs/train /logs/infer'
|
||||||
|
docker exec "$cname" sh -lc "ts=\
|
||||||
|
\$(date '+%F %T'); echo \"\$ts INFO [$hosttag] training step=1 loss=1.23 model=bert\" >> /logs/train/train-demo.log"
|
||||||
|
docker exec "$cname" sh -lc "ts=\
|
||||||
|
\$(date '+%F %T'); echo \"\$ts INFO [$hosttag] training step=2 loss=1.10 model=bert\" >> /logs/train/train-demo.log"
|
||||||
|
docker exec "$cname" sh -lc "ts=\
|
||||||
|
\$(date '+%F %T'); echo \"\$ts WARN [$hosttag] inference slow on batch=2 latency=1.9s\" >> /logs/infer/infer-demo.log"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Determine container names
|
||||||
|
node_a=$(docker ps --format '{{.Names}}' | grep -E '^argus-node-a$|argus-sys-node-a-1' | head -n1)
|
||||||
|
node_b=$(docker ps --format '{{.Names}}' | grep -E '^argus-node-b$|argus-sys-node-b-1' | head -n1)
|
||||||
|
|
||||||
|
send_logs "$node_a" "host01"
|
||||||
|
send_logs "$node_b" "host02"
|
||||||
|
|
||||||
|
echo "[INFO] Waiting for ES to ingest..."
|
||||||
|
sleep 10
|
||||||
|
|
||||||
|
train1=$(get_count "train-*")
|
||||||
|
infer1=$(get_count "infer-*")
|
||||||
|
final=$((train1 + infer1))
|
||||||
|
echo "[INFO] final counts: train=${train1} infer=${infer1} total=${final}"
|
||||||
|
|
||||||
|
if (( final <= base )); then
|
||||||
|
echo "[ERR] ES total did not increase (${base} -> ${final})" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if (( final < 4 )); then
|
||||||
|
echo "[ERR] ES total below expected threshold: ${final} < 4" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Health endpoints
|
||||||
|
es_health=$(curl -s "http://localhost:9200/_cluster/health" | grep -o '"status":"[^"]*"' | cut -d'"' -f4)
|
||||||
|
if [[ "$es_health" != "green" && "$es_health" != "yellow" ]]; then
|
||||||
|
echo "[ERR] ES health not green/yellow: $es_health" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! curl -fs "http://localhost:5601/api/status" >/dev/null 2>&1; then
|
||||||
|
echo "[WARN] Kibana status endpoint not available"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "[OK] ES counts increased and services healthy"
|
95
src/sys/tests/scripts/08_restart_agent_reregister.sh
Executable file
95
src/sys/tests/scripts/08_restart_agent_reregister.sh
Executable file
@ -0,0 +1,95 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||||
|
TMP_DIR="$TEST_ROOT/tmp"
|
||||||
|
REPO_ROOT="$(cd "$TEST_ROOT/../../.." && pwd)"
|
||||||
|
|
||||||
|
API_BASE="http://localhost:32300/api/v1/master"
|
||||||
|
|
||||||
|
ID_B="$(cat "$TMP_DIR/node_id_b")"
|
||||||
|
IP0_B="$(cat "$TMP_DIR/initial_ip_b")"
|
||||||
|
|
||||||
|
detail_before="$TMP_DIR/node_b_before.json"
|
||||||
|
curl -fsS "$API_BASE/nodes/$ID_B" -o "$detail_before"
|
||||||
|
LAST0=$(python3 - "$detail_before" <<'PY'
|
||||||
|
import json,sys
|
||||||
|
node=json.load(open(sys.argv[1]))
|
||||||
|
print(node.get("last_updated",""))
|
||||||
|
PY
|
||||||
|
)
|
||||||
|
IP_BEFORE=$(python3 - "$detail_before" <<'PY'
|
||||||
|
import json,sys
|
||||||
|
node=json.load(open(sys.argv[1]))
|
||||||
|
print(node.get("meta_data",{}).get("ip",""))
|
||||||
|
PY
|
||||||
|
)
|
||||||
|
|
||||||
|
if [[ "$IP_BEFORE" != "$IP0_B" ]]; then
|
||||||
|
echo "[ERR] Expected initial IP $IP0_B for node-b, got $IP_BEFORE" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
compose() {
|
||||||
|
if docker compose version >/dev/null 2>&1; then
|
||||||
|
docker compose "$@"
|
||||||
|
else
|
||||||
|
docker-compose "$@"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "[INFO] Recreating node-b with static IP 172.29.0.200..."
|
||||||
|
pushd "$TEST_ROOT" >/dev/null
|
||||||
|
compose -p argus-sys rm -sf node-b || true
|
||||||
|
popd >/dev/null
|
||||||
|
|
||||||
|
docker rm -f argus-node-b >/dev/null 2>&1 || true
|
||||||
|
|
||||||
|
AGENT_BIN_PATH="$(cat "$TMP_DIR/agent_binary_path")"
|
||||||
|
|
||||||
|
docker run -d \
|
||||||
|
--name argus-node-b \
|
||||||
|
--hostname dev-yyrshare-uuuu10-ep2f-pod-0 \
|
||||||
|
--network argus-sys-net \
|
||||||
|
--ip 172.29.0.200 \
|
||||||
|
--dns 172.29.0.2 \
|
||||||
|
-e MASTER_ENDPOINT=http://master.argus.com:3000 \
|
||||||
|
-e REPORT_INTERVAL_SECONDS=2 \
|
||||||
|
-e ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133} \
|
||||||
|
-e ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015} \
|
||||||
|
-e ES_HOST=es \
|
||||||
|
-e ES_PORT=9200 \
|
||||||
|
-p 2021:2020 \
|
||||||
|
-v "$TEST_ROOT/private-node2/argus/agent/dev-yyrshare-uuuu10-ep2f-pod-0:/private/argus/agent/dev-yyrshare-uuuu10-ep2f-pod-0" \
|
||||||
|
-v "$AGENT_BIN_PATH:/usr/local/bin/argus-agent:ro" \
|
||||||
|
-v "$SCRIPT_DIR/node_entrypoint.sh:/usr/local/bin/node-entrypoint.sh:ro" \
|
||||||
|
-v "$REPO_ROOT/src/log/fluent-bit/build/start-fluent-bit.sh:/assets/start-fluent-bit.sh:ro" \
|
||||||
|
-v "$REPO_ROOT/src/log/fluent-bit/build/etc:/assets/fluent-bit/etc:ro" \
|
||||||
|
-v "$REPO_ROOT/src/log/fluent-bit/build/packages:/assets/fluent-bit/packages:ro" \
|
||||||
|
--entrypoint /usr/local/bin/node-entrypoint.sh \
|
||||||
|
ubuntu:22.04 >/dev/null
|
||||||
|
|
||||||
|
echo "[INFO] Waiting for node-b to re-register with new IP..."
|
||||||
|
for _ in {1..40}; do
|
||||||
|
sleep 3
|
||||||
|
if curl -fsS "$API_BASE/nodes/$ID_B" -o "$TMP_DIR/node_b_after.json"; then
|
||||||
|
if python3 - "$TMP_DIR/node_b_after.json" "$LAST0" <<'PY'
|
||||||
|
import json,sys
|
||||||
|
node=json.load(open(sys.argv[1]))
|
||||||
|
last0=sys.argv[2]
|
||||||
|
ip=node.get("meta_data",{}).get("ip")
|
||||||
|
lu=node.get("last_updated")
|
||||||
|
assert ip=="172.29.0.200"
|
||||||
|
assert lu and lu!=last0
|
||||||
|
PY
|
||||||
|
then
|
||||||
|
echo "[OK] node-b re-registered with new IP 172.29.0.200"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "[ERR] node-b did not update to IP 172.29.0.200 in time" >&2
|
||||||
|
exit 1
|
||||||
|
|
38
src/sys/tests/scripts/09_down.sh
Executable file
38
src/sys/tests/scripts/09_down.sh
Executable file
@ -0,0 +1,38 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||||
|
|
||||||
|
compose() {
|
||||||
|
if docker compose version >/dev/null 2>&1; then
|
||||||
|
docker compose "$@"
|
||||||
|
else
|
||||||
|
docker-compose "$@"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
docker rm -f argus-node-b >/dev/null 2>&1 || true
|
||||||
|
|
||||||
|
pushd "$TEST_ROOT" >/dev/null
|
||||||
|
compose -p argus-sys down --remove-orphans || true
|
||||||
|
popd >/dev/null
|
||||||
|
|
||||||
|
echo "[INFO] Cleaning private directories..."
|
||||||
|
if [[ -d "$TEST_ROOT/private" ]]; then
|
||||||
|
docker run --rm -v "$TEST_ROOT/private:/target" ubuntu:24.04 chown -R "$(id -u):$(id -g)" /target >/dev/null 2>&1 || true
|
||||||
|
rm -rf "$TEST_ROOT/private"
|
||||||
|
fi
|
||||||
|
if [[ -d "$TEST_ROOT/private-nodea" ]]; then
|
||||||
|
docker run --rm -v "$TEST_ROOT/private-nodea:/target" ubuntu:24.04 chown -R "$(id -u):$(id -g)" /target >/dev/null 2>&1 || true
|
||||||
|
rm -rf "$TEST_ROOT/private-nodea"
|
||||||
|
fi
|
||||||
|
if [[ -d "$TEST_ROOT/private-node2" ]]; then
|
||||||
|
docker run --rm -v "$TEST_ROOT/private-node2:/target" ubuntu:24.04 chown -R "$(id -u):$(id -g)" /target >/dev/null 2>&1 || true
|
||||||
|
rm -rf "$TEST_ROOT/private-node2"
|
||||||
|
fi
|
||||||
|
|
||||||
|
rm -rf "$TEST_ROOT/tmp" "$TEST_ROOT/.env" || true
|
||||||
|
|
||||||
|
echo "[OK] Cleaned up system E2E"
|
||||||
|
|
57
src/sys/tests/scripts/node_entrypoint.sh
Executable file
57
src/sys/tests/scripts/node_entrypoint.sh
Executable file
@ -0,0 +1,57 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
LOG_PREFIX="[NODE]"
|
||||||
|
RUNTIME_USER="argusagent"
|
||||||
|
RUNTIME_GROUP="argusagent"
|
||||||
|
AGENT_UID="${ARGUS_BUILD_UID:-2133}"
|
||||||
|
AGENT_GID="${ARGUS_BUILD_GID:-2015}"
|
||||||
|
HOSTNAME_VAL="${HOSTNAME:-unknown}"
|
||||||
|
|
||||||
|
log() { echo "${LOG_PREFIX} $*"; }
|
||||||
|
|
||||||
|
# Prepare runtime user
|
||||||
|
if ! getent group "$AGENT_GID" >/dev/null 2>&1; then
|
||||||
|
groupadd -g "$AGENT_GID" "$RUNTIME_GROUP" || true
|
||||||
|
else
|
||||||
|
RUNTIME_GROUP="$(getent group "$AGENT_GID" | cut -d: -f1)"
|
||||||
|
fi
|
||||||
|
if ! getent passwd "$AGENT_UID" >/dev/null 2>&1; then
|
||||||
|
useradd -u "$AGENT_UID" -g "$AGENT_GID" -M -s /bin/bash "$RUNTIME_USER" || true
|
||||||
|
else
|
||||||
|
RUNTIME_USER="$(getent passwd "$AGENT_UID" | cut -d: -f1)"
|
||||||
|
fi
|
||||||
|
log "runtime user: $RUNTIME_USER ($AGENT_UID:$AGENT_GID)"
|
||||||
|
|
||||||
|
# Ensure agent data dirs exist (host volumes mounted)
|
||||||
|
AGENT_DIR="/private/argus/agent/${HOSTNAME_VAL}"
|
||||||
|
HEALTH_DIR="${AGENT_DIR}/health"
|
||||||
|
mkdir -p "$HEALTH_DIR"
|
||||||
|
chown -R "$AGENT_UID:$AGENT_GID" "$AGENT_DIR" 2>/dev/null || true
|
||||||
|
|
||||||
|
# Stage Fluent Bit assets into /private to reuse existing startup script
|
||||||
|
mkdir -p /private
|
||||||
|
if [[ -f /assets/start-fluent-bit.sh ]]; then
|
||||||
|
cp /assets/start-fluent-bit.sh /private/start-fluent-bit.sh
|
||||||
|
chmod +x /private/start-fluent-bit.sh
|
||||||
|
fi
|
||||||
|
if [[ -d /assets/fluent-bit/etc ]]; then
|
||||||
|
rm -rf /private/etc && mkdir -p /private
|
||||||
|
cp -r /assets/fluent-bit/etc /private/
|
||||||
|
fi
|
||||||
|
if [[ -d /assets/fluent-bit/packages ]]; then
|
||||||
|
cp -r /assets/fluent-bit/packages /private/
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Start Fluent Bit in background (will block, so run via bash -lc &)
|
||||||
|
if [[ -x /private/start-fluent-bit.sh ]]; then
|
||||||
|
log "starting fluent-bit"
|
||||||
|
bash -lc '/private/start-fluent-bit.sh' &
|
||||||
|
else
|
||||||
|
log "missing /private/start-fluent-bit.sh; fluent-bit will not start"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Start agent in foreground as runtime user
|
||||||
|
log "starting argus-agent"
|
||||||
|
exec su -s /bin/bash -c /usr/local/bin/argus-agent "$RUNTIME_USER"
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user