Compare commits
No commits in common. "bb77c710dd8693c2af37004bb5be44800ffb8aa4" and "6d3992a03368cc253e6776ae8b1faec213c042dd" have entirely different histories.
bb77c710dd
...
6d3992a033
@ -5,10 +5,3 @@
|
||||
项目文档:【腾讯文档】GPU集群运维系统
|
||||
https://docs.qq.com/doc/DQUxDdmhIZ1dpeERk
|
||||
|
||||
## 构建账号配置
|
||||
|
||||
镜像构建和运行账号的 UID/GID 可通过 `configs/build_user.conf` 配置,详细说明见 `doc/build-user-config.md`。
|
||||
|
||||
## 本地端口占用提示
|
||||
|
||||
如需运行 BIND 模块端到端测试且宿主机 53 端口已占用,可通过环境变量 `HOST_DNS_PORT`(默认 1053)指定对外映射端口,例如 `HOST_DNS_PORT=12053 ./scripts/00_e2e_test.sh`。
|
||||
|
@ -21,7 +21,6 @@ EOF
|
||||
}
|
||||
|
||||
use_intranet=false
|
||||
build_master=true
|
||||
build_master_offline=false
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
@ -30,12 +29,7 @@ while [[ $# -gt 0 ]]; do
|
||||
use_intranet=true
|
||||
shift
|
||||
;;
|
||||
--master)
|
||||
build_master=true
|
||||
shift
|
||||
;;
|
||||
--master-offline)
|
||||
build_master=true
|
||||
build_master_offline=true
|
||||
shift
|
||||
;;
|
||||
@ -148,23 +142,11 @@ fi
|
||||
|
||||
echo ""
|
||||
|
||||
if [[ "$build_master" == true ]]; then
|
||||
echo ""
|
||||
echo "🔄 Building Master image..."
|
||||
if [[ "$build_master_offline" == true ]]; then
|
||||
echo "🏗️ Building master offline image"
|
||||
pushd "$master_root" >/dev/null
|
||||
master_args=("--tag" "argus-master:latest")
|
||||
if [[ "$use_intranet" == true ]]; then
|
||||
master_args+=("--intranet")
|
||||
fi
|
||||
if [[ "$build_master_offline" == true ]]; then
|
||||
master_args+=("--offline")
|
||||
fi
|
||||
if ./scripts/build_images.sh "${master_args[@]}"; then
|
||||
if [[ "$build_master_offline" == true ]]; then
|
||||
if ./scripts/build_images.sh --offline --tag argus-master:offline; then
|
||||
images_built+=("argus-master:offline")
|
||||
else
|
||||
images_built+=("argus-master:latest")
|
||||
fi
|
||||
else
|
||||
build_failed=true
|
||||
fi
|
||||
|
2
src/agent/.gitignore
vendored
2
src/agent/.gitignore
vendored
@ -1,5 +1,3 @@
|
||||
build/
|
||||
*.egg-info/
|
||||
__pycache__/
|
||||
|
||||
.env
|
||||
|
@ -1,19 +1,19 @@
|
||||
services:
|
||||
bind:
|
||||
image: ${BIND_IMAGE_TAG:-argus-bind9:latest}
|
||||
image: ${BIND_IMAGE_TAG:-argus-bind9:e2e}
|
||||
container_name: argus-bind-agent-e2e
|
||||
volumes:
|
||||
- ./private:/private
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
default:
|
||||
ipv4_address: 172.28.0.2
|
||||
environment:
|
||||
- "ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}"
|
||||
- "ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}"
|
||||
restart: always
|
||||
|
||||
master:
|
||||
image: argus-master:latest
|
||||
image: argus-master:dev
|
||||
container_name: argus-master-agent-e2e
|
||||
depends_on:
|
||||
- bind
|
||||
@ -32,10 +32,9 @@ services:
|
||||
networks:
|
||||
default:
|
||||
ipv4_address: 172.28.0.10
|
||||
restart: always
|
||||
|
||||
agent:
|
||||
image: ubuntu:22.04
|
||||
image: ubuntu:24.04
|
||||
container_name: argus-agent-e2e
|
||||
hostname: dev-e2euser-e2einst-pod-0
|
||||
depends_on:
|
||||
@ -58,7 +57,6 @@ services:
|
||||
networks:
|
||||
default:
|
||||
ipv4_address: 172.28.0.20
|
||||
restart: always
|
||||
|
||||
networks:
|
||||
default:
|
||||
|
@ -15,17 +15,9 @@ AGENT_HEALTH_DIR="$PRIVATE_ROOT/argus/agent/$AGENT_HOSTNAME/health"
|
||||
MASTER_PRIVATE_DIR="$PRIVATE_ROOT/argus/master"
|
||||
METRIC_PRIVATE_DIR="$PRIVATE_ROOT/argus/metric/prometheus"
|
||||
DNS_DIR="$PRIVATE_ROOT/argus/etc"
|
||||
BIND_IMAGE_TAG="${BIND_IMAGE_TAG:-argus-bind9:latest}"
|
||||
BIND_IMAGE_TAG="${BIND_IMAGE_TAG:-argus-bind9:e2e}"
|
||||
BIND_ROOT="$(cd "$MASTER_ROOT/../bind" && pwd)"
|
||||
|
||||
ensure_image() {
|
||||
local image="$1"
|
||||
if ! docker image inspect "$image" >/dev/null 2>&1; then
|
||||
echo "[ERROR] Docker image '$image' 未找到,请先运行统一构建脚本 (例如 ./build/build_images.sh) 生成所需镜像" >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
mkdir -p "$AGENT_CONFIG_DIR"
|
||||
mkdir -p "$AGENT_HEALTH_DIR"
|
||||
mkdir -p "$MASTER_PRIVATE_DIR"
|
||||
@ -43,8 +35,9 @@ else
|
||||
echo "[WARN] bind update script missing at $BIND_ROOT/build/update-dns.sh"
|
||||
fi
|
||||
|
||||
ensure_image "argus-master:latest"
|
||||
ensure_image "$BIND_IMAGE_TAG"
|
||||
pushd "$MASTER_ROOT" >/dev/null
|
||||
./scripts/build_images.sh --tag argus-master:dev
|
||||
popd >/dev/null
|
||||
|
||||
AGENT_BINARY="$AGENT_ROOT/dist/argus-agent"
|
||||
|
||||
@ -57,6 +50,11 @@ if [[ ! -x "$AGENT_BINARY" ]]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 中文提示:构建测试专用 bind9 镜像,确保解析服务可用
|
||||
pushd "$REPO_ROOT" >/dev/null
|
||||
docker build -f src/bind/build/Dockerfile -t "$BIND_IMAGE_TAG" .
|
||||
popd >/dev/null
|
||||
|
||||
echo "$AGENT_BINARY" > "$TMP_ROOT/agent_binary_path"
|
||||
echo "$BIND_IMAGE_TAG" > "$TMP_ROOT/bind_image_tag"
|
||||
|
||||
|
@ -28,7 +28,7 @@ if [[ ! -x "$AGENT_BINARY" ]]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
BIND_IMAGE_TAG_VALUE="argus-bind9:latest"
|
||||
BIND_IMAGE_TAG_VALUE="argus-bind9:e2e"
|
||||
if [[ -f "$TMP_ROOT/bind_image_tag" ]]; then
|
||||
BIND_IMAGE_TAG_VALUE="$(cat "$TMP_ROOT/bind_image_tag")"
|
||||
fi
|
||||
|
@ -10,7 +10,6 @@ AGENT_HOSTNAME="dev-e2euser-e2einst-pod-0"
|
||||
NETWORK_NAME="tests_default"
|
||||
NEW_AGENT_IP="172.28.0.200"
|
||||
ENTRYPOINT_SCRIPT="$SCRIPT_DIR/agent_entrypoint.sh"
|
||||
ENV_FILE="$TEST_ROOT/.env"
|
||||
|
||||
# 中文提示:重启场景也需要同样的入口脚本,确保 DNS 注册逻辑一致
|
||||
if [[ ! -f "$ENTRYPOINT_SCRIPT" ]]; then
|
||||
@ -29,21 +28,6 @@ if [[ ! -x "$AGENT_BINARY" ]]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ -f "$ENV_FILE" ]]; then
|
||||
set -a
|
||||
# shellcheck disable=SC1090
|
||||
source "$ENV_FILE"
|
||||
set +a
|
||||
else
|
||||
REPO_ROOT="$(cd "$TEST_ROOT/../../.." && pwd)"
|
||||
# shellcheck disable=SC1090
|
||||
source "$REPO_ROOT/scripts/common/build_user.sh"
|
||||
load_build_user
|
||||
fi
|
||||
|
||||
AGENT_UID="${ARGUS_BUILD_UID:-2133}"
|
||||
AGENT_GID="${ARGUS_BUILD_GID:-2015}"
|
||||
|
||||
compose() {
|
||||
if docker compose version >/dev/null 2>&1; then
|
||||
docker compose "$@"
|
||||
@ -96,10 +80,8 @@ if ! docker run -d \
|
||||
-v "$ENTRYPOINT_SCRIPT:/usr/local/bin/agent-entrypoint.sh:ro" \
|
||||
-e MASTER_ENDPOINT=http://master.argus.com:3000 \
|
||||
-e REPORT_INTERVAL_SECONDS=2 \
|
||||
-e ARGUS_BUILD_UID="$AGENT_UID" \
|
||||
-e ARGUS_BUILD_GID="$AGENT_GID" \
|
||||
--entrypoint /usr/local/bin/agent-entrypoint.sh \
|
||||
ubuntu:22.04 >/dev/null; then
|
||||
ubuntu:24.04 >/dev/null; then
|
||||
echo "[ERROR] Failed to start agent container with custom IP" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
@ -3,8 +3,8 @@ services:
|
||||
image: argus-bind9:latest
|
||||
container_name: argus-bind9-test
|
||||
ports:
|
||||
- "${HOST_DNS_PORT:-1053}:53/tcp"
|
||||
- "${HOST_DNS_PORT:-1053}:53/udp"
|
||||
- "53:53/tcp"
|
||||
- "53:53/udp"
|
||||
volumes:
|
||||
- ./private:/private
|
||||
restart: unless-stopped
|
||||
|
@ -7,9 +7,6 @@
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
HOST_DNS_PORT="${HOST_DNS_PORT:-1053}"
|
||||
|
||||
export HOST_DNS_PORT
|
||||
|
||||
echo "=========================================="
|
||||
echo "BIND9 DNS Server End-to-End Test Suite"
|
||||
|
@ -7,17 +7,13 @@ set -e
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
TEST_DIR="$(dirname "$SCRIPT_DIR")"
|
||||
HOST_DNS_PORT="${HOST_DNS_PORT:-1053}"
|
||||
|
||||
export HOST_DNS_PORT
|
||||
|
||||
cd "$TEST_DIR"
|
||||
|
||||
echo "Starting BIND9 test container..."
|
||||
|
||||
# Ensure private directory exists with proper permissions
|
||||
mkdir -p private/argus/bind
|
||||
mkdir -p private/argus/etc
|
||||
mkdir -p private
|
||||
chmod 777 private
|
||||
|
||||
# Start the container
|
||||
@ -39,4 +35,4 @@ fi
|
||||
|
||||
echo ""
|
||||
echo "BIND9 test environment is ready!"
|
||||
echo "DNS server listening on localhost:${HOST_DNS_PORT}"
|
||||
echo "DNS server listening on localhost:53"
|
@ -5,10 +5,7 @@
|
||||
|
||||
set -e
|
||||
|
||||
HOST_DNS_PORT="${HOST_DNS_PORT:-1053}"
|
||||
|
||||
echo "Testing DNS resolution with dig..."
|
||||
echo "Using DNS server localhost:${HOST_DNS_PORT}"
|
||||
|
||||
# Function to test DNS query
|
||||
test_dns_query() {
|
||||
@ -22,7 +19,7 @@ test_dns_query() {
|
||||
echo "Expected IP: $expected_ip"
|
||||
|
||||
# Perform dig query
|
||||
result=$(dig @localhost -p "$HOST_DNS_PORT" "$hostname".argus.com A +short 2>/dev/null || echo "QUERY_FAILED")
|
||||
result=$(dig @localhost $hostname.argus.com A +short 2>/dev/null || echo "QUERY_FAILED")
|
||||
|
||||
if [ "$result" = "QUERY_FAILED" ]; then
|
||||
echo "✗ DNS query failed"
|
||||
|
@ -6,13 +6,10 @@
|
||||
|
||||
set -e
|
||||
|
||||
HOST_DNS_PORT="${HOST_DNS_PORT:-1053}"
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
TEST_DIR="$(dirname "$SCRIPT_DIR")"
|
||||
|
||||
echo "=== DNS Auto-Sync Functionality Test ==="
|
||||
echo "Using DNS server localhost:${HOST_DNS_PORT}"
|
||||
|
||||
# Check if container is running
|
||||
if ! docker compose ps | grep -q "Up"; then
|
||||
@ -39,7 +36,7 @@ test_dns_query() {
|
||||
# Wait a moment for DNS cache
|
||||
sleep 2
|
||||
|
||||
result=$(dig @localhost -p "$HOST_DNS_PORT" "$hostname".argus.com A +short 2>/dev/null || echo "QUERY_FAILED")
|
||||
result=$(dig @localhost $hostname.argus.com A +short 2>/dev/null || echo "QUERY_FAILED")
|
||||
|
||||
if [ "$result" = "$expected_ip" ]; then
|
||||
echo "✓ $result"
|
||||
@ -93,7 +90,7 @@ echo ""
|
||||
echo "Step 2: Testing initial DNS configuration..."
|
||||
|
||||
# Get current IP for web.argus.com (may have been changed by previous tests)
|
||||
current_web_ip=$(dig @localhost -p "$HOST_DNS_PORT" web.argus.com A +short 2>/dev/null || echo "UNKNOWN")
|
||||
current_web_ip=$(dig @localhost web.argus.com A +short 2>/dev/null || echo "UNKNOWN")
|
||||
echo "Current web.argus.com IP: $current_web_ip"
|
||||
|
||||
# Test that DNS is working (regardless of specific IP)
|
||||
@ -188,7 +185,7 @@ docker compose exec bind9 bash -c 'echo "this is not an IP address" > /private/a
|
||||
wait_for_sync
|
||||
|
||||
# Verify invalid record was not added (should fail to resolve)
|
||||
result=$(dig @localhost -p "$HOST_DNS_PORT" invalid.argus.com A +short 2>/dev/null || echo "NO_RESULT")
|
||||
result=$(dig @localhost invalid.argus.com A +short 2>/dev/null || echo "NO_RESULT")
|
||||
if [ "$result" = "NO_RESULT" ] || [ -z "$result" ]; then
|
||||
echo "✓ Invalid IP correctly ignored"
|
||||
else
|
||||
|
@ -5,13 +5,10 @@
|
||||
|
||||
set -e
|
||||
|
||||
HOST_DNS_PORT="${HOST_DNS_PORT:-1053}"
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
TEST_DIR="$(dirname "$SCRIPT_DIR")"
|
||||
|
||||
echo "=== DNS Configuration Reload Test ==="
|
||||
echo "Using DNS server localhost:${HOST_DNS_PORT}"
|
||||
|
||||
# Check if container is running
|
||||
if ! docker compose ps | grep -q "Up"; then
|
||||
@ -35,7 +32,7 @@ test_dns_query() {
|
||||
echo "Testing: $description"
|
||||
echo "Query: $hostname.argus.com -> Expected: $expected_ip"
|
||||
|
||||
result=$(dig @localhost -p "$HOST_DNS_PORT" "$hostname".argus.com A +short 2>/dev/null || echo "QUERY_FAILED")
|
||||
result=$(dig @localhost $hostname.argus.com A +short 2>/dev/null || echo "QUERY_FAILED")
|
||||
|
||||
if [ "$result" = "$expected_ip" ]; then
|
||||
echo "✓ $result"
|
||||
|
@ -5,13 +5,10 @@
|
||||
|
||||
set -e
|
||||
|
||||
HOST_DNS_PORT="${HOST_DNS_PORT:-1053}"
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
TEST_DIR="$(dirname "$SCRIPT_DIR")"
|
||||
|
||||
echo "=== Configuration Persistence Test ==="
|
||||
echo "Using DNS server localhost:${HOST_DNS_PORT}"
|
||||
|
||||
# Check if dig is available
|
||||
if ! command -v dig &> /dev/null; then
|
||||
@ -28,7 +25,7 @@ test_dns_query() {
|
||||
echo "Testing: $description"
|
||||
echo "Query: $hostname.argus.com -> Expected: $expected_ip"
|
||||
|
||||
result=$(dig @localhost -p "$HOST_DNS_PORT" "$hostname".argus.com A +short 2>/dev/null || echo "QUERY_FAILED")
|
||||
result=$(dig @localhost $hostname.argus.com A +short 2>/dev/null || echo "QUERY_FAILED")
|
||||
|
||||
if [ "$result" = "$expected_ip" ]; then
|
||||
echo "✓ $result"
|
||||
|
@ -7,9 +7,6 @@ set -e
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
TEST_DIR="$(dirname "$SCRIPT_DIR")"
|
||||
HOST_DNS_PORT="${HOST_DNS_PORT:-1053}"
|
||||
|
||||
export HOST_DNS_PORT
|
||||
|
||||
# Parse command line arguments
|
||||
FULL_CLEANUP=true
|
||||
|
@ -17,7 +17,6 @@ services:
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 30
|
||||
restart: always
|
||||
|
||||
kibana:
|
||||
build:
|
||||
@ -74,11 +73,13 @@ services:
|
||||
interval: 15s
|
||||
timeout: 10s
|
||||
retries: 30
|
||||
restart: always
|
||||
|
||||
bind9:
|
||||
image: argus-bind9:latest
|
||||
ports:
|
||||
- "53:53/tcp"
|
||||
- "53:53/udp"
|
||||
volumes:
|
||||
- ./private/argus:/private/argus/
|
||||
restart: always
|
||||
restart: unless-stopped
|
||||
|
||||
|
@ -15,9 +15,9 @@ mkdir -p "$root/private/argus/etc/"
|
||||
|
||||
# 设置数据目录权限(ES 和 Kibana 容器都使用 UID 1000)
|
||||
echo "[INFO] Setting permissions for data directories..."
|
||||
chown -R "${ARGUS_BUILD_UID}:${ARGUS_BUILD_GID}" "$root/private/argus/log/elasticsearch" 2>/dev/null || true
|
||||
chown -R "${ARGUS_BUILD_UID}:${ARGUS_BUILD_GID}" "$root/private/argus/log/kibana" 2>/dev/null || true
|
||||
chown -R "${ARGUS_BUILD_UID}:${ARGUS_BUILD_GID}" "$root/private/argus/etc" 2>/dev/null || true
|
||||
sudo chown -R "${ARGUS_BUILD_UID}:${ARGUS_BUILD_GID}" "$root/private/argus/log/elasticsearch" 2>/dev/null || true
|
||||
sudo chown -R "${ARGUS_BUILD_UID}:${ARGUS_BUILD_GID}" "$root/private/argus/log/kibana" 2>/dev/null || true
|
||||
sudo chown -R "${ARGUS_BUILD_UID}:${ARGUS_BUILD_GID}" "$root/private/argus/etc" 2>/dev/null || true
|
||||
|
||||
echo "[INFO] Supervisor-based containers will manage their own scripts and configurations"
|
||||
|
||||
|
@ -1,42 +1,7 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# ES endpoint and wait strategy
|
||||
ES="${ES:-http://localhost:9200}"
|
||||
es_wait_attempts="${ES_WAIT_ATTEMPTS:-60}" # total attempts to wait for ES
|
||||
es_wait_interval="${ES_WAIT_INTERVAL:-2}" # seconds between attempts
|
||||
|
||||
echo "[i] 查询 ES 端点:$ES"
|
||||
|
||||
wait_for_es() {
|
||||
local attempt=1
|
||||
while (( attempt <= es_wait_attempts )); do
|
||||
# 等待集群达到至少 yellow 状态;请求失败则重试
|
||||
if curl -fsS "$ES/_cluster/health?wait_for_status=yellow&timeout=1s" >/dev/null 2>&1; then
|
||||
echo "[ok] Elasticsearch 已就绪 (attempt=${attempt}/${es_wait_attempts})"
|
||||
return 0
|
||||
fi
|
||||
echo "[..] 等待 Elasticsearch 可用中 (${attempt}/${es_wait_attempts})"
|
||||
sleep "${es_wait_interval}"
|
||||
(( attempt++ ))
|
||||
done
|
||||
echo "[err] Elasticsearch 在 ${es_wait_attempts} 次尝试后仍不可用"
|
||||
return 1
|
||||
}
|
||||
|
||||
safe_count() {
|
||||
# 对缺失索引返回 0,避免 404 触发失败
|
||||
local pattern="$1"
|
||||
local json
|
||||
json=$(curl -fsS "$ES/${pattern}/_count?ignore_unavailable=true&allow_no_indices=true" 2>/dev/null || echo '{}')
|
||||
echo "$json" | sed -E 's/.*"count":([0-9]+).*/\1/' | awk 'NF{print $0;exit} END{if(NR==0)print 0}'
|
||||
}
|
||||
|
||||
wait_for_es
|
||||
|
||||
# 列出相关索引(可能为空,允许)
|
||||
curl -fsS "$ES/_cat/indices?v" | egrep 'train-|infer-|logstash' || true
|
||||
|
||||
# 打印计数,缺失索引按 0 处理
|
||||
printf "train-* 计数:"; safe_count "train-*"; echo
|
||||
printf "infer-* 计数:"; safe_count "infer-*"; echo
|
||||
printf "train-* 计数:"; curl -fsS "$ES/train-*/_count" | sed -E 's/.*"count":([0-9]+).*/\1/'; echo
|
||||
printf "infer-* 计数:"; curl -fsS "$ES/infer-*/_count" | sed -E 's/.*"count":([0-9]+).*/\1/'; echo
|
||||
|
@ -11,7 +11,7 @@ Argus Master 是基于 Flask + SQLite 的节点管理服务,负责:
|
||||
|
||||
```bash
|
||||
cd src/master
|
||||
./scripts/build_images.sh # 生成 argus-master:latest 镜像
|
||||
./scripts/build_images.sh # 生成 argus-master:dev 镜像
|
||||
```
|
||||
|
||||
如需离线构建,先在有网环境运行准备脚本:
|
||||
@ -25,7 +25,7 @@ cd src/master
|
||||
|
||||
```bash
|
||||
cd src/master
|
||||
./scripts/build_images.sh --offline --tag argus-master:latest
|
||||
./scripts/build_images.sh --offline --tag argus-master:dev
|
||||
```
|
||||
|
||||
若内网缺少 `python:3.11-slim`,请提前在外网 `docker save` 后通过离线介质 `docker load`。
|
||||
|
@ -8,14 +8,14 @@ Usage: $0 [--intranet] [--offline] [--tag <image_tag>]
|
||||
Options:
|
||||
--intranet 使用指定的 PyPI 镜像源(默认清华镜像)。
|
||||
--offline 完全离线构建,依赖 offline_wheels/ 目录中的离线依赖包。
|
||||
--tag <image_tag> 自定义镜像标签,默认 argus-master:latest。
|
||||
--tag <image_tag> 自定义镜像标签,默认 argus-master:dev。
|
||||
USAGE
|
||||
}
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)"
|
||||
MODULE_ROOT="$PROJECT_ROOT/src/master"
|
||||
IMAGE_TAG="${IMAGE_TAG:-argus-master:latest}"
|
||||
IMAGE_TAG="${IMAGE_TAG:-argus-master:dev}"
|
||||
DOCKERFILE="src/master/Dockerfile"
|
||||
BUILD_ARGS=()
|
||||
OFFLINE_MODE=0
|
||||
|
@ -8,7 +8,7 @@ usage() {
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
DEFAULT_OUTPUT="$PROJECT_ROOT/images/argus-master-dev.tar"
|
||||
IMAGE_TAG="${IMAGE_TAG:-argus-master:latest}"
|
||||
IMAGE_TAG="${IMAGE_TAG:-argus-master:dev}"
|
||||
OUTPUT_PATH="$DEFAULT_OUTPUT"
|
||||
|
||||
while [[ "$#" -gt 0 ]]; do
|
||||
|
@ -1,6 +1,6 @@
|
||||
services:
|
||||
master:
|
||||
image: ${MASTER_IMAGE_TAG:-argus-master:latest}
|
||||
image: ${MASTER_IMAGE_TAG:-argus-master:dev}
|
||||
container_name: argus-master-e2e
|
||||
environment:
|
||||
- OFFLINE_THRESHOLD_SECONDS=6
|
||||
|
@ -17,7 +17,7 @@ SCRIPTS=(
|
||||
|
||||
for script in "${SCRIPTS[@]}"; do
|
||||
echo "[TEST] Running $script"
|
||||
MASTER_IMAGE_TAG="${MASTER_IMAGE_TAG:-argus-master:latest}" "$SCRIPT_DIR/$script"
|
||||
MASTER_IMAGE_TAG="${MASTER_IMAGE_TAG:-argus-master:dev}" "$SCRIPT_DIR/$script"
|
||||
echo "[TEST] $script completed"
|
||||
echo
|
||||
done
|
||||
|
@ -44,7 +44,7 @@ fi
|
||||
|
||||
pushd "$TEST_ROOT" >/dev/null
|
||||
compose down --remove-orphans || true
|
||||
MASTER_IMAGE_TAG="${MASTER_IMAGE_TAG:-argus-master:latest}" compose up -d
|
||||
MASTER_IMAGE_TAG="${MASTER_IMAGE_TAG:-argus-master:dev}" compose up -d
|
||||
popd >/dev/null
|
||||
|
||||
echo "[INFO] Master container is up on http://localhost:31300"
|
||||
|
@ -1,2 +0,0 @@
|
||||
|
||||
|
@ -1,138 +0,0 @@
|
||||
# ARGUS 系统级端到端测试(Sys E2E)
|
||||
|
||||
本目录包含将 log 与 agent 两线验证合并后的系统级端到端测试。依赖 bind/master/es/kibana + 两个“日志节点”(每个节点容器内同时运行 Fluent Bit 与 argus-agent)。
|
||||
|
||||
---
|
||||
|
||||
## 一、如何运行
|
||||
|
||||
- 前置条件
|
||||
- 已构建镜像:`argus-elasticsearch:latest`、`argus-kibana:latest`、`argus-bind9:latest`、`argus-master:latest`
|
||||
- 可用根目录命令构建:`./build/build_images.sh [--intranet]`
|
||||
- 主机具备 Docker 与 Docker Compose。
|
||||
|
||||
- UID/GID 配置(用于容器内文件属主与挂载卷写入权限)
|
||||
- 默认值:`UID=2133`、`GID=2015`。
|
||||
- 方式 A(推荐):在仓库根目录创建 `configs/build_user.local.conf`:
|
||||
|
||||
UID=<你的宿主用户UID>
|
||||
GID=<你的宿主用户GID>
|
||||
|
||||
例如:
|
||||
|
||||
UID=1000
|
||||
GID=1000
|
||||
|
||||
- 方式 B:通过环境变量覆盖(优先级最高):
|
||||
|
||||
export ARGUS_BUILD_UID=1000
|
||||
export ARGUS_BUILD_GID=1000
|
||||
|
||||
- 说明:`scripts/common/build_user.sh` 会按顺序读取 `configs/build_user.local.conf` → `configs/build_user.conf` → 环境变量,最终值会用于镜像构建参数与测试脚本,并在 `01_bootstrap.sh` 中对 `src/sys/tests/private/argus/*` 进行 `chown` 以匹配容器内运行用户。
|
||||
|
||||
- 一键执行
|
||||
- `cd src/sys/tests`
|
||||
- `./scripts/00_e2e_test.sh`
|
||||
|
||||
- 分步执行(推荐用于排查)
|
||||
- `./scripts/01_bootstrap.sh` 生成目录/拷贝 `update-dns.sh`/构建 agent 二进制/写 `.env`
|
||||
- `./scripts/02_up.sh` 启动 Compose 栈(工程名 `argus-sys`)
|
||||
- `./scripts/03_wait_ready.sh` 等待 ES/Kibana/Master/Fluent‑Bit/Bind 就绪(Kibana 必须返回 200 且 overall.level=available)
|
||||
- `./scripts/04_verify_dns_routing.sh` 校验 bind 解析与节点内域名解析
|
||||
- `./scripts/05_agent_register.sh` 获取两个节点的 `node_id` 与初始 IP,检查本地 `node.json`
|
||||
- `./scripts/06_write_health_and_assert.sh` 写健康文件并断言 `nodes.json` 仅包含 2 个在线节点
|
||||
- `./scripts/07_logs_send_and_assert.sh` 向两个节点写日志,断言 ES `train-*`/`infer-*` 计数增长
|
||||
- `./scripts/08_restart_agent_reregister.sh` `node-b` 改为固定 IP `172.29.0.200`,验证保持同一节点 ID 且 IP/时间戳更新
|
||||
- `./scripts/09_down.sh` 回收容器、网络并清理 `private*/`、`tmp/`
|
||||
|
||||
- 重置环境
|
||||
- 任何阶段失败可执行 `./scripts/09_down.sh` 后重跑 `01→…`。
|
||||
|
||||
---
|
||||
|
||||
## 二、测试部署架构(docker-compose)
|
||||
|
||||
- 网络
|
||||
- 自定义 bridge:`argus-sys-net`,子网 `172.29.0.0/16`
|
||||
- 固定地址:bind=`172.29.0.2`,master=`172.29.0.10`
|
||||
|
||||
- 服务与端口
|
||||
- `bind`(`argus-bind9:latest`):监听 53/tcp+udp;负责同步 `*.argus.com` 记录
|
||||
- `master`(`argus-master:latest`):对外 `32300→3000`;API `http://localhost:32300`
|
||||
- `es`(`argus-elasticsearch:latest`):`9200→9200`;单节点,无安全
|
||||
- `kibana`(`argus-kibana:latest`):`5601→5601`;通过 `ELASTICSEARCH_HOSTS=http://es:9200` 访问 ES
|
||||
- `node-a`(`ubuntu:22.04`):同时运行 Fluent Bit + argus-agent,`hostname=dev-yyrshare-nbnyx10-cp2f-pod-0`,`2020→2020`
|
||||
- `node-b`(`ubuntu:22.04`):同时运行 Fluent Bit + argus-agent,`hostname=dev-yyrshare-uuuu10-ep2f-pod-0`,`2021→2020`
|
||||
|
||||
- 卷与目录
|
||||
- 核心服务(bind/master/es/kibana)共享宿主 `./private` 挂载到容器 `/private`
|
||||
- 两个节点使用独立数据卷,互不与核心服务混用:
|
||||
- node-a:`./private-nodea/argus/agent/<HOST> → /private/argus/agent/<HOST>`
|
||||
- node-b:`./private-nodeb/argus/agent/<HOST> → /private/argus/agent/<HOST>`
|
||||
- 节点容器的 Fluent Bit/agent 资产以只读方式挂载到 `/assets`/`/usr/local/bin/argus-agent`
|
||||
|
||||
- DNS 配置
|
||||
- 节点容器通过 compose 配置 `dns: [172.29.0.2]` 指向 bind,不挂载 `/etc/resolv.conf`,也不依赖 `update-dns.sh`
|
||||
- master/es/kibana 仍共享 `./private`,master 启动会写 `/private/argus/etc/master.argus.com` 供 bind 同步 A 记录
|
||||
|
||||
- 节点入口
|
||||
- `scripts/node_entrypoint.sh`:
|
||||
- 复制 `/assets/fluent-bit/*` 到容器 `/private`,后台启动 Fluent Bit(监听 2020)
|
||||
- 以运行用户(映射 UID/GID)前台启动 `argus-agent`
|
||||
- 节点环境变量:`MASTER_ENDPOINT=http://master.argus.com:3000`、`REPORT_INTERVAL_SECONDS=2`、`ES_HOST=es`、`ES_PORT=9200`、`CLUSTER=local`、`RACK=dev`
|
||||
|
||||
---
|
||||
|
||||
## 三、脚本与验证目标
|
||||
|
||||
- `01_bootstrap.sh`
|
||||
- 目的:准备目录结构、修正 ES/Kibana 数据目录属主、分发 `update-dns.sh`(仅核心服务使用)、构建 agent 二进制、写 `.env`
|
||||
- 失败排查:若 ES 无法写入数据,重跑本步骤确保目录属主为指定 UID/GID
|
||||
|
||||
- `02_up.sh`
|
||||
- 目的:以工程名 `argus-sys` 启动全栈;自动清理旧栈/网络
|
||||
|
||||
- `03_wait_ready.sh`
|
||||
- 目的:等待关键端口/健康接口可用
|
||||
- 判定:
|
||||
- ES `/_cluster/health?wait_for_status=yellow` 成功
|
||||
- Kibana `GET /api/status` 返回 200 且 `overall.level=available`
|
||||
- Master `/readyz` 成功
|
||||
- Fluent Bit 指标接口 `:2020/:2021` 可访问
|
||||
- bind `named-checkconf` 通过
|
||||
|
||||
- `04_verify_dns_routing.sh`
|
||||
- 目的:验证从 bind → 节点容器的解析链路
|
||||
- 判定:
|
||||
- `private/argus/etc/master.argus.com` 存在且为 master IP
|
||||
- 在 node-a/node-b 内 `getent hosts master.argus.com` 成功解析到 master IP
|
||||
|
||||
- `05_agent_register.sh`
|
||||
- 目的:确认两个节点注册到 master 并持久化 `node.json`
|
||||
- 输出:`tmp/node_id_a|b`、`tmp/initial_ip_a|b`、`tmp/detail_*.json`
|
||||
|
||||
- `06_write_health_and_assert.sh`
|
||||
- 目的:模拟节点健康上报并在 master 侧可见;`nodes.json` 仅保留在线节点
|
||||
- 操作:写 `log-fluentbit.json`、`metric-node-exporter.json` 至两个节点的 health 目录
|
||||
|
||||
- `07_logs_send_and_assert.sh`
|
||||
- 目的:通过 Fluent Bit 将两类日志注入 ES,计数应较基线增长且达到阈值(≥4)
|
||||
- 同时校验 ES 健康 `green|yellow`
|
||||
|
||||
- `08_restart_agent_reregister.sh`
|
||||
- 目的:验证节点重启与 IP 变更时保持相同 `id` 并更新 `meta_data.ip` 与 `last_updated`
|
||||
- 操作:以固定 IP `172.29.0.200` 重建 node‑b 后轮询校验
|
||||
|
||||
- `09_down.sh`
|
||||
- 目的:栈销毁与环境清理;必要时使用临时容器修正属主再删除 `private*` 目录
|
||||
|
||||
---
|
||||
|
||||
### 常见问题与排查
|
||||
- Kibana 长时间 503:机器较慢时初始化较久;脚本最长等待 ~15 分钟;先确认 ES 已就绪。
|
||||
- Fluent Bit 指标未就绪:检查节点容器日志与环境变量 `CLUSTER/RACK` 是否设置;确认入口脚本已经复制资产到 `/private`。
|
||||
- ES 无法启动:多为宿主目录权限问题;重跑 `01_bootstrap.sh`,或手动 `chown -R <UID:GID> src/sys/tests/private/argus/log/*`。
|
||||
|
||||
---
|
||||
|
||||
如需更严格的断言(例如 Kibana 载入具体插件、ES 文档字段校验),可在 `07_*.sh` 中追加查询与校验逻辑。
|
@ -1,139 +0,0 @@
|
||||
version: "3.8"
|
||||
|
||||
networks:
|
||||
default:
|
||||
name: argus-sys-net
|
||||
driver: bridge
|
||||
ipam:
|
||||
driver: default
|
||||
config:
|
||||
- subnet: 172.29.0.0/16
|
||||
|
||||
services:
|
||||
bind:
|
||||
image: ${BIND_IMAGE_TAG:-argus-bind9:latest}
|
||||
container_name: argus-bind-sys
|
||||
networks:
|
||||
default:
|
||||
ipv4_address: 172.29.0.2
|
||||
volumes:
|
||||
- ./private:/private
|
||||
restart: unless-stopped
|
||||
|
||||
master:
|
||||
image: ${MASTER_IMAGE_TAG:-argus-master:latest}
|
||||
container_name: argus-master-sys
|
||||
depends_on:
|
||||
- bind
|
||||
environment:
|
||||
- OFFLINE_THRESHOLD_SECONDS=6
|
||||
- ONLINE_THRESHOLD_SECONDS=2
|
||||
- SCHEDULER_INTERVAL_SECONDS=1
|
||||
- ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}
|
||||
- ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}
|
||||
ports:
|
||||
- "32300:3000"
|
||||
volumes:
|
||||
- ./private/argus/master:/private/argus/master
|
||||
- ./private/argus/metric/prometheus:/private/argus/metric/prometheus
|
||||
- ./private/argus/etc:/private/argus/etc
|
||||
networks:
|
||||
default:
|
||||
ipv4_address: 172.29.0.10
|
||||
restart: unless-stopped
|
||||
|
||||
es:
|
||||
image: argus-elasticsearch:latest
|
||||
container_name: argus-es-sys
|
||||
environment:
|
||||
- discovery.type=single-node
|
||||
- xpack.security.enabled=false
|
||||
- ES_JAVA_OPTS=-Xms512m -Xmx512m
|
||||
- ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}
|
||||
- ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}
|
||||
volumes:
|
||||
- ./private/argus/log/elasticsearch:/private/argus/log/elasticsearch
|
||||
- ./private/argus/etc:/private/argus/etc
|
||||
ports:
|
||||
- "9200:9200"
|
||||
restart: unless-stopped
|
||||
|
||||
kibana:
|
||||
image: argus-kibana:latest
|
||||
container_name: argus-kibana-sys
|
||||
environment:
|
||||
- ELASTICSEARCH_HOSTS=http://es.log.argus.com:9200
|
||||
- ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}
|
||||
- ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}
|
||||
volumes:
|
||||
- ./private/argus/log/kibana:/private/argus/log/kibana
|
||||
- ./private/argus/etc:/private/argus/etc
|
||||
depends_on:
|
||||
- es
|
||||
ports:
|
||||
- "5601:5601"
|
||||
restart: unless-stopped
|
||||
|
||||
node-a:
|
||||
image: ubuntu:22.04
|
||||
container_name: argus-node-a
|
||||
hostname: dev-yyrshare-nbnyx10-cp2f-pod-0
|
||||
depends_on:
|
||||
- master
|
||||
- bind
|
||||
- es
|
||||
environment:
|
||||
- MASTER_ENDPOINT=http://master.argus.com:3000
|
||||
- REPORT_INTERVAL_SECONDS=2
|
||||
- ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}
|
||||
- ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}
|
||||
- ES_HOST=es
|
||||
- ES_PORT=9200
|
||||
- CLUSTER=local
|
||||
- RACK=dev
|
||||
volumes:
|
||||
- ./private-nodea/argus/agent/dev-yyrshare-nbnyx10-cp2f-pod-0:/private/argus/agent/dev-yyrshare-nbnyx10-cp2f-pod-0
|
||||
- ../../agent/dist/argus-agent:/usr/local/bin/argus-agent:ro
|
||||
- ./scripts/node_entrypoint.sh:/usr/local/bin/node-entrypoint.sh:ro
|
||||
- ../../log/fluent-bit/build/start-fluent-bit.sh:/assets/start-fluent-bit.sh:ro
|
||||
- ../../log/fluent-bit/build/etc:/assets/fluent-bit/etc:ro
|
||||
- ../../log/fluent-bit/build/packages:/assets/fluent-bit/packages:ro
|
||||
entrypoint:
|
||||
- /usr/local/bin/node-entrypoint.sh
|
||||
dns:
|
||||
- 172.29.0.2
|
||||
ports:
|
||||
- "2020:2020"
|
||||
restart: unless-stopped
|
||||
|
||||
node-b:
|
||||
image: ubuntu:22.04
|
||||
container_name: argus-node-b
|
||||
hostname: dev-yyrshare-uuuu10-ep2f-pod-0
|
||||
depends_on:
|
||||
- master
|
||||
- bind
|
||||
- es
|
||||
environment:
|
||||
- MASTER_ENDPOINT=http://master.argus.com:3000
|
||||
- REPORT_INTERVAL_SECONDS=2
|
||||
- ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}
|
||||
- ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}
|
||||
- ES_HOST=es
|
||||
- ES_PORT=9200
|
||||
- CLUSTER=local
|
||||
- RACK=dev
|
||||
volumes:
|
||||
- ./private-nodeb/argus/agent/dev-yyrshare-uuuu10-ep2f-pod-0:/private/argus/agent/dev-yyrshare-uuuu10-ep2f-pod-0
|
||||
- ../../agent/dist/argus-agent:/usr/local/bin/argus-agent:ro
|
||||
- ./scripts/node_entrypoint.sh:/usr/local/bin/node-entrypoint.sh:ro
|
||||
- ../../log/fluent-bit/build/start-fluent-bit.sh:/assets/start-fluent-bit.sh:ro
|
||||
- ../../log/fluent-bit/build/etc:/assets/fluent-bit/etc:ro
|
||||
- ../../log/fluent-bit/build/packages:/assets/fluent-bit/packages:ro
|
||||
entrypoint:
|
||||
- /usr/local/bin/node-entrypoint.sh
|
||||
dns:
|
||||
- 172.29.0.2
|
||||
ports:
|
||||
- "2021:2020"
|
||||
restart: unless-stopped
|
@ -1,26 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
|
||||
SCRIPTS=(
|
||||
"01_bootstrap.sh"
|
||||
"02_up.sh"
|
||||
"03_wait_ready.sh"
|
||||
"04_verify_dns_routing.sh"
|
||||
"05_agent_register.sh"
|
||||
"06_write_health_and_assert.sh"
|
||||
"07_logs_send_and_assert.sh"
|
||||
"08_restart_agent_reregister.sh"
|
||||
"09_down.sh"
|
||||
)
|
||||
|
||||
for script in "${SCRIPTS[@]}"; do
|
||||
echo "[SYS-E2E] Running $script"
|
||||
"$SCRIPT_DIR/$script"
|
||||
echo "[SYS-E2E] $script completed"
|
||||
echo
|
||||
done
|
||||
|
||||
echo "[SYS-E2E] All tests completed"
|
||||
|
@ -1,77 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
REPO_ROOT="$(cd "$TEST_ROOT/../../.." && pwd)"
|
||||
|
||||
PRIVATE_CORE="$TEST_ROOT/private"
|
||||
PRIVATE_NODEA="$TEST_ROOT/private-nodea"
|
||||
PRIVATE_NODEB="$TEST_ROOT/private-nodeb"
|
||||
TMP_DIR="$TEST_ROOT/tmp"
|
||||
|
||||
source "$REPO_ROOT/scripts/common/build_user.sh"
|
||||
load_build_user
|
||||
|
||||
ensure_image() {
|
||||
local image="$1"
|
||||
if ! docker image inspect "$image" >/dev/null 2>&1; then
|
||||
echo "[ERROR] Missing image: $image. Please run ./build/build_images.sh" >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
echo "[INFO] Preparing directories..."
|
||||
mkdir -p \
|
||||
"$PRIVATE_CORE/argus/etc" \
|
||||
"$PRIVATE_CORE/argus/bind" \
|
||||
"$PRIVATE_CORE/argus/master" \
|
||||
"$PRIVATE_CORE/argus/metric/prometheus" \
|
||||
"$PRIVATE_CORE/argus/log/elasticsearch" \
|
||||
"$PRIVATE_CORE/argus/log/kibana" \
|
||||
"$PRIVATE_NODEA/argus/agent/dev-yyrshare-nbnyx10-cp2f-pod-0/health" \
|
||||
"$PRIVATE_NODEB/argus/agent/dev-yyrshare-uuuu10-ep2f-pod-0/health" \
|
||||
"$TMP_DIR"
|
||||
|
||||
# Align ownership for supervisor-managed services (ES/Kibana expect UID/GID inside container)
|
||||
echo "[INFO] Fixing ownership for core private directories..."
|
||||
chown -R "${ARGUS_BUILD_UID}:${ARGUS_BUILD_GID}" \
|
||||
"$PRIVATE_CORE/argus/log/elasticsearch" \
|
||||
"$PRIVATE_CORE/argus/log/kibana" \
|
||||
"$PRIVATE_CORE/argus/etc" 2>/dev/null || true
|
||||
|
||||
echo "[INFO] Distributing update-dns.sh for core services (bind/master/es/kibana)"
|
||||
BIND_UPDATE_SRC="$REPO_ROOT/src/bind/build/update-dns.sh"
|
||||
BIND_UPDATE_DEST="$PRIVATE_CORE/argus/etc/update-dns.sh"
|
||||
if [[ -f "$BIND_UPDATE_SRC" ]]; then
|
||||
cp "$BIND_UPDATE_SRC" "$BIND_UPDATE_DEST"
|
||||
chmod +x "$BIND_UPDATE_DEST"
|
||||
else
|
||||
echo "[WARN] bind update-dns.sh not found at $BIND_UPDATE_SRC"
|
||||
fi
|
||||
|
||||
echo "[INFO] Ensuring images present..."
|
||||
ensure_image "argus-elasticsearch:latest"
|
||||
ensure_image "argus-kibana:latest"
|
||||
ensure_image "argus-bind9:latest"
|
||||
ensure_image "argus-master:latest"
|
||||
|
||||
echo "[INFO] Building agent binary..."
|
||||
pushd "$REPO_ROOT/src/agent" >/dev/null
|
||||
./scripts/build_binary.sh
|
||||
popd >/dev/null
|
||||
|
||||
AGENT_BIN="$REPO_ROOT/src/agent/dist/argus-agent"
|
||||
if [[ ! -x "$AGENT_BIN" ]]; then
|
||||
echo "[ERROR] Agent binary not found at $AGENT_BIN" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "$AGENT_BIN" > "$TMP_DIR/agent_binary_path"
|
||||
|
||||
echo "[INFO] Writing .env with UID/GID"
|
||||
cat > "$TEST_ROOT/.env" <<EOF
|
||||
ARGUS_BUILD_UID=$ARGUS_BUILD_UID
|
||||
ARGUS_BUILD_GID=$ARGUS_BUILD_GID
|
||||
EOF
|
||||
|
||||
echo "[OK] Bootstrap completed"
|
@ -1,22 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
|
||||
compose() {
|
||||
if docker compose version >/dev/null 2>&1; then
|
||||
docker compose "$@"
|
||||
else
|
||||
docker-compose "$@"
|
||||
fi
|
||||
}
|
||||
|
||||
echo "[INFO] Bringing up system stack..."
|
||||
pushd "$TEST_ROOT" >/dev/null
|
||||
compose -p argus-sys down --remove-orphans || true
|
||||
compose -p argus-sys up -d
|
||||
popd >/dev/null
|
||||
|
||||
echo "[OK] Services started: master:32300 es:9200 kibana:5601 node-a:2020 node-b:2021"
|
||||
|
@ -1,75 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
|
||||
compose() {
|
||||
if docker compose version >/dev/null 2>&1; then
|
||||
docker compose "$@"
|
||||
else
|
||||
docker-compose "$@"
|
||||
fi
|
||||
}
|
||||
|
||||
service_id() {
|
||||
compose -p argus-sys ps -q "$1"
|
||||
}
|
||||
|
||||
wait_http() {
|
||||
local url="$1"; local attempts="${2:-120}"; local i=1
|
||||
while (( i <= attempts )); do
|
||||
if curl -fsS "$url" >/dev/null 2>&1; then return 0; fi
|
||||
echo "[..] waiting $url ($i/$attempts)"; sleep 5; ((i++))
|
||||
done
|
||||
echo "[ERR] Timeout waiting for $url" >&2; return 1
|
||||
}
|
||||
|
||||
echo "[INFO] Waiting for ES/Kibana/Master/Fluent Bit/Bind..."
|
||||
|
||||
# ES (>= yellow)
|
||||
attempt=1; max=120
|
||||
while (( attempt <= max )); do
|
||||
if curl -fsS "http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=1s" >/dev/null 2>&1; then
|
||||
break
|
||||
fi
|
||||
echo "[..] waiting ES ($attempt/$max)"; sleep 5; ((attempt++))
|
||||
done
|
||||
[[ $attempt -le $max ]] || { echo "[ERR] ES not ready" >&2; exit 1; }
|
||||
|
||||
# Kibana: must be HTTP 200 and overall.level=available
|
||||
echo "[INFO] Waiting for Kibana to be available (HTTP 200)..."
|
||||
kb_attempt=1; kb_max=180
|
||||
while (( kb_attempt <= kb_max )); do
|
||||
body=$(curl -sS "http://localhost:5601/api/status" 2>/dev/null || true)
|
||||
code=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:5601/api/status" || echo 000)
|
||||
if [[ "$code" == "200" ]]; then
|
||||
if echo "$body" | grep -q '"level":"available"'; then
|
||||
echo "[OK] Kibana available (HTTP 200)"
|
||||
break
|
||||
fi
|
||||
fi
|
||||
echo "[..] waiting kibana 200 ($kb_attempt/$kb_max), last_code=$code"
|
||||
sleep 5
|
||||
((kb_attempt++))
|
||||
done
|
||||
if (( kb_attempt > kb_max )); then
|
||||
echo "[ERR] Kibana did not reach HTTP 200 available in time" >&2; exit 1
|
||||
fi
|
||||
|
||||
# Master
|
||||
wait_http "http://localhost:32300/readyz" 120
|
||||
|
||||
# Fluent Bit (host metrics on host ports)
|
||||
wait_http "http://localhost:2020/api/v2/metrics" 120
|
||||
wait_http "http://localhost:2021/api/v2/metrics" 120
|
||||
|
||||
# Bind config check
|
||||
BIND_ID="$(service_id bind)"
|
||||
if [[ -n "$BIND_ID" ]]; then
|
||||
docker exec "$BIND_ID" named-checkconf >/dev/null
|
||||
else
|
||||
echo "[WARN] bind container id not found"
|
||||
fi
|
||||
|
||||
echo "[OK] All services are ready"
|
@ -1,54 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
|
||||
compose() {
|
||||
if docker compose version >/dev/null 2>&1; then
|
||||
docker compose "$@"
|
||||
else
|
||||
docker-compose "$@"
|
||||
fi
|
||||
}
|
||||
|
||||
service_id() {
|
||||
compose -p argus-sys ps -q "$1"
|
||||
}
|
||||
|
||||
echo "[INFO] Verifying DNS routing via bind..."
|
||||
|
||||
# Check master IP file exists in shared private
|
||||
MASTER_FILE="$TEST_ROOT/private/argus/etc/master.argus.com"
|
||||
if [[ ! -f "$MASTER_FILE" ]]; then
|
||||
echo "[ERR] master.argus.com file missing at $MASTER_FILE" >&2
|
||||
exit 1
|
||||
fi
|
||||
MASTER_IP_HOST="$(cat "$MASTER_FILE" | tr -d '\r\n' || true)"
|
||||
echo "[INFO] master.argus.com file content: ${MASTER_IP_HOST}"
|
||||
|
||||
# dig inside bind container
|
||||
BIN_ID="$(service_id bind)"
|
||||
if [[ -n "$BIN_ID" ]]; then
|
||||
DIG_IP="$(docker exec "$BIN_ID" dig +short master.argus.com A | tail -n1 || true)"
|
||||
echo "[INFO] dig(master.argus.com) from bind container -> $DIG_IP"
|
||||
if [[ -z "$DIG_IP" ]]; then
|
||||
echo "[ERR] bind did not resolve master.argus.com" >&2; exit 1
|
||||
fi
|
||||
else
|
||||
echo "[WARN] bind container not found; skip dig"
|
||||
fi
|
||||
|
||||
for node in node-a node-b; do
|
||||
CID="$(service_id "$node")"
|
||||
echo "[INFO] Checking resolution inside $node..."
|
||||
if ! docker exec "$CID" getent hosts master.argus.com >/dev/null 2>&1; then
|
||||
echo "[ERR] $node cannot resolve master.argus.com" >&2
|
||||
exit 1
|
||||
fi
|
||||
RES="$(docker exec "$CID" getent hosts master.argus.com | awk '{print $1}' | head -n1)"
|
||||
echo "[OK] $node resolved master.argus.com -> $RES"
|
||||
done
|
||||
|
||||
echo "[OK] DNS routing verified"
|
||||
|
@ -1,87 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
TMP_DIR="$TEST_ROOT/tmp"
|
||||
|
||||
API_BASE="http://localhost:32300/api/v1/master"
|
||||
|
||||
HOST_A="dev-yyrshare-nbnyx10-cp2f-pod-0"
|
||||
HOST_B="dev-yyrshare-uuuu10-ep2f-pod-0"
|
||||
|
||||
mkdir -p "$TMP_DIR"
|
||||
|
||||
echo "[INFO] Waiting for agent nodes to register..."
|
||||
|
||||
extract_node() {
|
||||
local name="$1"; local output="$2"; local json_file="$3"
|
||||
python3 - "$name" "$output" "$json_file" <<'PY'
|
||||
import json, sys, pathlib
|
||||
name = sys.argv[1]
|
||||
out = pathlib.Path(sys.argv[2])
|
||||
json_file = sys.argv[3]
|
||||
with open(json_file, 'r') as fh:
|
||||
data = json.load(fh)
|
||||
node = next((n for n in data if n.get("name") == name), None)
|
||||
if node:
|
||||
out.write_text(node["id"]) # save id
|
||||
print(node["id"]) # also print for shell capture
|
||||
PY
|
||||
}
|
||||
|
||||
ID_A=""; ID_B=""
|
||||
for _ in {1..60}; do
|
||||
sleep 2
|
||||
resp=$(curl -fsS "$API_BASE/nodes" 2>/dev/null || true)
|
||||
if [[ -z "$resp" ]]; then
|
||||
continue
|
||||
fi
|
||||
# only try to parse when it's a JSON array
|
||||
if ! echo "$resp" | head -c1 | grep -q '\['; then
|
||||
continue
|
||||
fi
|
||||
echo "$resp" > "$TMP_DIR/nodes_list.json"
|
||||
ID_A=$(extract_node "$HOST_A" "$TMP_DIR/node_id_a" "$TMP_DIR/nodes_list.json" 2>/dev/null || true)
|
||||
ID_B=$(extract_node "$HOST_B" "$TMP_DIR/node_id_b" "$TMP_DIR/nodes_list.json" 2>/dev/null || true)
|
||||
if [[ -s "$TMP_DIR/node_id_a" && -s "$TMP_DIR/node_id_b" ]]; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ ! -s "$TMP_DIR/node_id_a" || ! -s "$TMP_DIR/node_id_b" ]]; then
|
||||
echo "[ERR] Agents did not register in time" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
node_detail() {
|
||||
local id="$1"; local out="$2"
|
||||
curl -fsS "$API_BASE/nodes/$id" -o "$out"
|
||||
}
|
||||
|
||||
node_detail "$(cat "$TMP_DIR/node_id_a")" "$TMP_DIR/detail_a.json"
|
||||
node_detail "$(cat "$TMP_DIR/node_id_b")" "$TMP_DIR/detail_b.json"
|
||||
|
||||
python3 - "$TMP_DIR/detail_a.json" "$TMP_DIR/initial_ip_a" <<'PY'
|
||||
import json, sys, pathlib
|
||||
node=json.load(open(sys.argv[1]))
|
||||
ip=node.get("meta_data",{}).get("ip")
|
||||
assert ip, "missing ip"
|
||||
pathlib.Path(sys.argv[2]).write_text(ip)
|
||||
PY
|
||||
|
||||
python3 - "$TMP_DIR/detail_b.json" "$TMP_DIR/initial_ip_b" <<'PY'
|
||||
import json, sys, pathlib
|
||||
node=json.load(open(sys.argv[1]))
|
||||
ip=node.get("meta_data",{}).get("ip")
|
||||
assert ip, "missing ip"
|
||||
pathlib.Path(sys.argv[2]).write_text(ip)
|
||||
PY
|
||||
|
||||
NODE_JSON_A="$TEST_ROOT/private-nodea/argus/agent/$HOST_A/node.json"
|
||||
NODE_JSON_B="$TEST_ROOT/private-nodeb/argus/agent/$HOST_B/node.json"
|
||||
|
||||
[[ -f "$NODE_JSON_A" ]] || { echo "[ERR] node.json missing for $HOST_A" >&2; exit 1; }
|
||||
[[ -f "$NODE_JSON_B" ]] || { echo "[ERR] node.json missing for $HOST_B" >&2; exit 1; }
|
||||
|
||||
echo "[OK] Agents registered: $(cat "$TMP_DIR/node_id_a") , $(cat "$TMP_DIR/node_id_b")"
|
@ -1,67 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
TMP_DIR="$TEST_ROOT/tmp"
|
||||
|
||||
API_BASE="http://localhost:32300/api/v1/master"
|
||||
|
||||
HOST_A="dev-yyrshare-nbnyx10-cp2f-pod-0"
|
||||
HOST_B="dev-yyrshare-uuuu10-ep2f-pod-0"
|
||||
|
||||
HEALTH_A="$TEST_ROOT/private-nodea/argus/agent/$HOST_A/health"
|
||||
HEALTH_B="$TEST_ROOT/private-nodeb/argus/agent/$HOST_B/health"
|
||||
|
||||
write_health() {
|
||||
local dir="$1"; mkdir -p "$dir"
|
||||
cat > "$dir/log-fluentbit.json" <<JSON
|
||||
{ "status": "healthy", "timestamp": "2024-10-05T12:05:00Z" }
|
||||
JSON
|
||||
cat > "$dir/metric-node-exporter.json" <<JSON
|
||||
{ "status": "healthy", "timestamp": "2024-10-05T12:05:00Z" }
|
||||
JSON
|
||||
}
|
||||
|
||||
echo "[INFO] Writing health files for both nodes..."
|
||||
write_health "$HEALTH_A"
|
||||
write_health "$HEALTH_B"
|
||||
|
||||
ID_A="$(cat "$TMP_DIR/node_id_a")"
|
||||
ID_B="$(cat "$TMP_DIR/node_id_b")"
|
||||
|
||||
check_health() {
|
||||
local id="$1"; local tries=40
|
||||
for _ in $(seq 1 $tries); do
|
||||
sleep 2
|
||||
resp=$(curl -fsS "$API_BASE/nodes/$id" 2>/dev/null || true)
|
||||
[[ -z "$resp" ]] && continue
|
||||
echo "$resp" > "$TMP_DIR/node_${id}_detail.json"
|
||||
if python3 - "$TMP_DIR/node_${id}_detail.json" <<'PY'
|
||||
import json,sys
|
||||
node=json.load(open(sys.argv[1]))
|
||||
h=node.get("health",{})
|
||||
sys.exit(0 if ("log-fluentbit" in h and "metric-node-exporter" in h) else 1)
|
||||
PY
|
||||
then return 0; fi
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
check_health "$ID_A" || { echo "[ERR] health keys not reported for node A" >&2; exit 1; }
|
||||
check_health "$ID_B" || { echo "[ERR] health keys not reported for node B" >&2; exit 1; }
|
||||
|
||||
NODES_JSON="$TEST_ROOT/private/argus/metric/prometheus/nodes.json"
|
||||
if [[ ! -f "$NODES_JSON" ]]; then
|
||||
echo "[ERR] nodes.json missing at $NODES_JSON" >&2; exit 1
|
||||
fi
|
||||
|
||||
python3 - "$NODES_JSON" <<'PY'
|
||||
import json,sys
|
||||
with open(sys.argv[1]) as h:
|
||||
nodes=json.load(h)
|
||||
assert isinstance(nodes,list)
|
||||
assert len(nodes) == 2, f"expected 2 nodes online, got {len(nodes)}"
|
||||
PY
|
||||
|
||||
echo "[OK] Health reported and nodes.json has 2 online nodes"
|
@ -1,63 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
echo "[INFO] Sending logs via node-a/node-b and asserting ES counts..."
|
||||
|
||||
get_count() {
|
||||
local idx="$1"
|
||||
curl -s "http://localhost:9200/${idx}/_count?ignore_unavailable=true&allow_no_indices=true" | sed -E 's/.*"count":([0-9]+).*/\1/' | awk 'NF{print $0;exit} END{if(NR==0)print 0}'
|
||||
}
|
||||
|
||||
train0=$(get_count "train-*")
|
||||
infer0=$(get_count "infer-*")
|
||||
base=$((train0 + infer0))
|
||||
echo "[INFO] initial counts: train=${train0} infer=${infer0} total=${base}"
|
||||
|
||||
send_logs() {
|
||||
local cname="$1"; local hosttag="$2"
|
||||
docker exec "$cname" sh -lc 'mkdir -p /logs/train /logs/infer'
|
||||
docker exec "$cname" sh -lc "ts=\
|
||||
\$(date '+%F %T'); echo \"\$ts INFO [$hosttag] training step=1 loss=1.23 model=bert\" >> /logs/train/train-demo.log"
|
||||
docker exec "$cname" sh -lc "ts=\
|
||||
\$(date '+%F %T'); echo \"\$ts INFO [$hosttag] training step=2 loss=1.10 model=bert\" >> /logs/train/train-demo.log"
|
||||
docker exec "$cname" sh -lc "ts=\
|
||||
\$(date '+%F %T'); echo \"\$ts WARN [$hosttag] inference slow on batch=2 latency=1.9s\" >> /logs/infer/infer-demo.log"
|
||||
}
|
||||
|
||||
# Determine container names
|
||||
node_a=$(docker ps --format '{{.Names}}' | grep -E '^argus-node-a$|argus-sys-node-a-1' | head -n1)
|
||||
node_b=$(docker ps --format '{{.Names}}' | grep -E '^argus-node-b$|argus-sys-node-b-1' | head -n1)
|
||||
|
||||
send_logs "$node_a" "host01"
|
||||
send_logs "$node_b" "host02"
|
||||
|
||||
echo "[INFO] Waiting for ES to ingest..."
|
||||
sleep 10
|
||||
|
||||
train1=$(get_count "train-*")
|
||||
infer1=$(get_count "infer-*")
|
||||
final=$((train1 + infer1))
|
||||
echo "[INFO] final counts: train=${train1} infer=${infer1} total=${final}"
|
||||
|
||||
if (( final <= base )); then
|
||||
echo "[ERR] ES total did not increase (${base} -> ${final})" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if (( final < 4 )); then
|
||||
echo "[ERR] ES total below expected threshold: ${final} < 4" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Health endpoints
|
||||
es_health=$(curl -s "http://localhost:9200/_cluster/health" | grep -o '"status":"[^"]*"' | cut -d'"' -f4)
|
||||
if [[ "$es_health" != "green" && "$es_health" != "yellow" ]]; then
|
||||
echo "[ERR] ES health not green/yellow: $es_health" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! curl -fs "http://localhost:5601/api/status" >/dev/null 2>&1; then
|
||||
echo "[WARN] Kibana status endpoint not available"
|
||||
fi
|
||||
|
||||
echo "[OK] ES counts increased and services healthy"
|
@ -1,94 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
TMP_DIR="$TEST_ROOT/tmp"
|
||||
REPO_ROOT="$(cd "$TEST_ROOT/../../.." && pwd)"
|
||||
|
||||
API_BASE="http://localhost:32300/api/v1/master"
|
||||
|
||||
ID_B="$(cat "$TMP_DIR/node_id_b")"
|
||||
IP0_B="$(cat "$TMP_DIR/initial_ip_b")"
|
||||
|
||||
detail_before="$TMP_DIR/node_b_before.json"
|
||||
curl -fsS "$API_BASE/nodes/$ID_B" -o "$detail_before"
|
||||
LAST0=$(python3 - "$detail_before" <<'PY'
|
||||
import json,sys
|
||||
node=json.load(open(sys.argv[1]))
|
||||
print(node.get("last_updated",""))
|
||||
PY
|
||||
)
|
||||
IP_BEFORE=$(python3 - "$detail_before" <<'PY'
|
||||
import json,sys
|
||||
node=json.load(open(sys.argv[1]))
|
||||
print(node.get("meta_data",{}).get("ip",""))
|
||||
PY
|
||||
)
|
||||
|
||||
if [[ "$IP_BEFORE" != "$IP0_B" ]]; then
|
||||
echo "[ERR] Expected initial IP $IP0_B for node-b, got $IP_BEFORE" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
compose() {
|
||||
if docker compose version >/dev/null 2>&1; then
|
||||
docker compose "$@"
|
||||
else
|
||||
docker-compose "$@"
|
||||
fi
|
||||
}
|
||||
|
||||
echo "[INFO] Recreating node-b with static IP 172.29.0.200..."
|
||||
pushd "$TEST_ROOT" >/dev/null
|
||||
compose -p argus-sys rm -sf node-b || true
|
||||
popd >/dev/null
|
||||
|
||||
docker rm -f argus-node-b >/dev/null 2>&1 || true
|
||||
|
||||
AGENT_BIN_PATH="$(cat "$TMP_DIR/agent_binary_path")"
|
||||
|
||||
docker run -d \
|
||||
--name argus-node-b \
|
||||
--hostname dev-yyrshare-uuuu10-ep2f-pod-0 \
|
||||
--network argus-sys-net \
|
||||
--ip 172.29.0.200 \
|
||||
--dns 172.29.0.2 \
|
||||
-e MASTER_ENDPOINT=http://master.argus.com:3000 \
|
||||
-e REPORT_INTERVAL_SECONDS=2 \
|
||||
-e ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133} \
|
||||
-e ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015} \
|
||||
-e ES_HOST=es \
|
||||
-e ES_PORT=9200 \
|
||||
-p 2021:2020 \
|
||||
-v "$TEST_ROOT/private-nodeb/argus/agent/dev-yyrshare-uuuu10-ep2f-pod-0:/private/argus/agent/dev-yyrshare-uuuu10-ep2f-pod-0" \
|
||||
-v "$AGENT_BIN_PATH:/usr/local/bin/argus-agent:ro" \
|
||||
-v "$SCRIPT_DIR/node_entrypoint.sh:/usr/local/bin/node-entrypoint.sh:ro" \
|
||||
-v "$REPO_ROOT/src/log/fluent-bit/build/start-fluent-bit.sh:/assets/start-fluent-bit.sh:ro" \
|
||||
-v "$REPO_ROOT/src/log/fluent-bit/build/etc:/assets/fluent-bit/etc:ro" \
|
||||
-v "$REPO_ROOT/src/log/fluent-bit/build/packages:/assets/fluent-bit/packages:ro" \
|
||||
--entrypoint /usr/local/bin/node-entrypoint.sh \
|
||||
ubuntu:22.04 >/dev/null
|
||||
|
||||
echo "[INFO] Waiting for node-b to re-register with new IP..."
|
||||
for _ in {1..40}; do
|
||||
sleep 3
|
||||
if curl -fsS "$API_BASE/nodes/$ID_B" -o "$TMP_DIR/node_b_after.json"; then
|
||||
if python3 - "$TMP_DIR/node_b_after.json" "$LAST0" <<'PY'
|
||||
import json,sys
|
||||
node=json.load(open(sys.argv[1]))
|
||||
last0=sys.argv[2]
|
||||
ip=node.get("meta_data",{}).get("ip")
|
||||
lu=node.get("last_updated")
|
||||
assert ip=="172.29.0.200"
|
||||
assert lu and lu!=last0
|
||||
PY
|
||||
then
|
||||
echo "[OK] node-b re-registered with new IP 172.29.0.200"
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
echo "[ERR] node-b did not update to IP 172.29.0.200 in time" >&2
|
||||
exit 1
|
@ -1,37 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
|
||||
compose() {
|
||||
if docker compose version >/dev/null 2>&1; then
|
||||
docker compose "$@"
|
||||
else
|
||||
docker-compose "$@"
|
||||
fi
|
||||
}
|
||||
|
||||
docker rm -f argus-node-b >/dev/null 2>&1 || true
|
||||
|
||||
pushd "$TEST_ROOT" >/dev/null
|
||||
compose -p argus-sys down --remove-orphans || true
|
||||
popd >/dev/null
|
||||
|
||||
echo "[INFO] Cleaning private directories..."
|
||||
if [[ -d "$TEST_ROOT/private" ]]; then
|
||||
docker run --rm -v "$TEST_ROOT/private:/target" ubuntu:24.04 chown -R "$(id -u):$(id -g)" /target >/dev/null 2>&1 || true
|
||||
rm -rf "$TEST_ROOT/private"
|
||||
fi
|
||||
if [[ -d "$TEST_ROOT/private-nodea" ]]; then
|
||||
docker run --rm -v "$TEST_ROOT/private-nodea:/target" ubuntu:24.04 chown -R "$(id -u):$(id -g)" /target >/dev/null 2>&1 || true
|
||||
rm -rf "$TEST_ROOT/private-nodea"
|
||||
fi
|
||||
if [[ -d "$TEST_ROOT/private-nodeb" ]]; then
|
||||
docker run --rm -v "$TEST_ROOT/private-nodeb:/target" ubuntu:24.04 chown -R "$(id -u):$(id -g)" /target >/dev/null 2>&1 || true
|
||||
rm -rf "$TEST_ROOT/private-nodeb"
|
||||
fi
|
||||
|
||||
rm -rf "$TEST_ROOT/tmp" "$TEST_ROOT/.env" || true
|
||||
|
||||
echo "[OK] Cleaned up system E2E"
|
@ -1,57 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
LOG_PREFIX="[NODE]"
|
||||
RUNTIME_USER="argusagent"
|
||||
RUNTIME_GROUP="argusagent"
|
||||
AGENT_UID="${ARGUS_BUILD_UID:-2133}"
|
||||
AGENT_GID="${ARGUS_BUILD_GID:-2015}"
|
||||
HOSTNAME_VAL="${HOSTNAME:-unknown}"
|
||||
|
||||
log() { echo "${LOG_PREFIX} $*"; }
|
||||
|
||||
# Prepare runtime user
|
||||
if ! getent group "$AGENT_GID" >/dev/null 2>&1; then
|
||||
groupadd -g "$AGENT_GID" "$RUNTIME_GROUP" || true
|
||||
else
|
||||
RUNTIME_GROUP="$(getent group "$AGENT_GID" | cut -d: -f1)"
|
||||
fi
|
||||
if ! getent passwd "$AGENT_UID" >/dev/null 2>&1; then
|
||||
useradd -u "$AGENT_UID" -g "$AGENT_GID" -M -s /bin/bash "$RUNTIME_USER" || true
|
||||
else
|
||||
RUNTIME_USER="$(getent passwd "$AGENT_UID" | cut -d: -f1)"
|
||||
fi
|
||||
log "runtime user: $RUNTIME_USER ($AGENT_UID:$AGENT_GID)"
|
||||
|
||||
# Ensure agent data dirs exist (host volumes mounted)
|
||||
AGENT_DIR="/private/argus/agent/${HOSTNAME_VAL}"
|
||||
HEALTH_DIR="${AGENT_DIR}/health"
|
||||
mkdir -p "$HEALTH_DIR"
|
||||
chown -R "$AGENT_UID:$AGENT_GID" "$AGENT_DIR" 2>/dev/null || true
|
||||
|
||||
# Stage Fluent Bit assets into /private to reuse existing startup script
|
||||
mkdir -p /private
|
||||
if [[ -f /assets/start-fluent-bit.sh ]]; then
|
||||
cp /assets/start-fluent-bit.sh /private/start-fluent-bit.sh
|
||||
chmod +x /private/start-fluent-bit.sh
|
||||
fi
|
||||
if [[ -d /assets/fluent-bit/etc ]]; then
|
||||
rm -rf /private/etc && mkdir -p /private
|
||||
cp -r /assets/fluent-bit/etc /private/
|
||||
fi
|
||||
if [[ -d /assets/fluent-bit/packages ]]; then
|
||||
cp -r /assets/fluent-bit/packages /private/
|
||||
fi
|
||||
|
||||
# Start Fluent Bit in background (will block, so run via bash -lc &)
|
||||
if [[ -x /private/start-fluent-bit.sh ]]; then
|
||||
log "starting fluent-bit"
|
||||
bash -lc '/private/start-fluent-bit.sh' &
|
||||
else
|
||||
log "missing /private/start-fluent-bit.sh; fluent-bit will not start"
|
||||
fi
|
||||
|
||||
# Start agent in foreground as runtime user
|
||||
log "starting argus-agent"
|
||||
exec su -s /bin/bash -c /usr/local/bin/argus-agent "$RUNTIME_USER"
|
||||
|
Loading…
x
Reference in New Issue
Block a user