完成a6000测试系统构建、部署、测试整合 #35
@ -254,6 +254,54 @@ if [[ "$build_metric" == true ]]; then
|
||||
done
|
||||
fi
|
||||
|
||||
# =======================================
|
||||
# Web & Alert module images
|
||||
# =======================================
|
||||
|
||||
echo ""
|
||||
echo "Building Web and Alert module images..."
|
||||
|
||||
# Pre-pull commonly used base images for stability
|
||||
web_alert_base_images=(
|
||||
"node:20"
|
||||
"ubuntu:24.04"
|
||||
)
|
||||
|
||||
for base_image in "${web_alert_base_images[@]}"; do
|
||||
if ! pull_base_image "$base_image"; then
|
||||
build_failed=true
|
||||
fi
|
||||
done
|
||||
|
||||
web_builds=(
|
||||
"Web Frontend|src/web/build_tools/frontend/Dockerfile|argus-web-frontend:latest|."
|
||||
"Web Proxy|src/web/build_tools/proxy/Dockerfile|argus-web-proxy:latest|."
|
||||
)
|
||||
|
||||
for build_spec in "${web_builds[@]}"; do
|
||||
IFS='|' read -r image_label dockerfile_path image_tag build_context <<< "$build_spec"
|
||||
if build_image "$image_label" "$dockerfile_path" "$image_tag" "$build_context"; then
|
||||
images_built+=("$image_tag")
|
||||
else
|
||||
build_failed=true
|
||||
fi
|
||||
echo ""
|
||||
done
|
||||
|
||||
alert_builds=(
|
||||
"Alertmanager|src/alert/alertmanager/build/Dockerfile|argus-alertmanager:latest|."
|
||||
)
|
||||
|
||||
for build_spec in "${alert_builds[@]}"; do
|
||||
IFS='|' read -r image_label dockerfile_path image_tag build_context <<< "$build_spec"
|
||||
if build_image "$image_label" "$dockerfile_path" "$image_tag" "$build_context"; then
|
||||
images_built+=("$image_tag")
|
||||
else
|
||||
build_failed=true
|
||||
fi
|
||||
echo ""
|
||||
done
|
||||
|
||||
echo "======================================="
|
||||
echo "📦 Build Summary"
|
||||
echo "======================================="
|
||||
|
||||
@ -71,6 +71,9 @@ declare -A images=(
|
||||
["argus-metric-ftp:latest"]="argus-metric-ftp-latest.tar"
|
||||
["argus-metric-prometheus:latest"]="argus-metric-prometheus-latest.tar"
|
||||
["argus-metric-grafana:latest"]="argus-metric-grafana-latest.tar"
|
||||
["argus-web-frontend:latest"]="argus-web-frontend-latest.tar"
|
||||
["argus-web-proxy:latest"]="argus-web-proxy-latest.tar"
|
||||
["argus-alertmanager:latest"]="argus-alertmanager-latest.tar"
|
||||
)
|
||||
|
||||
# 函数:检查镜像是否存在
|
||||
|
||||
@ -20,10 +20,10 @@ RUN wget https://github.com/prometheus/alertmanager/releases/download/v${ALERTMA
|
||||
|
||||
ENV ALERTMANAGER_BASE_PATH=/private/argus/alert/alertmanager
|
||||
|
||||
ARG ARGUS_UID=2133
|
||||
ARG ARGUS_GID=2015
|
||||
ENV ARGUS_UID=${ARGUS_UID}
|
||||
ENV ARGUS_GID=${ARGUS_GID}
|
||||
ARG ARGUS_BUILD_UID=2133
|
||||
ARG ARGUS_BUILD_GID=2015
|
||||
ENV ARGUS_BUILD_UID=${ARGUS_BUILD_UID}
|
||||
ENV ARGUS_BUILD_GID=${ARGUS_BUILD_GID}
|
||||
|
||||
RUN mkdir -p /usr/share/alertmanager && \
|
||||
mkdir -p ${ALERTMANAGER_BASE_PATH} && \
|
||||
@ -33,16 +33,25 @@ RUN mkdir -p /usr/share/alertmanager && \
|
||||
|
||||
# 创建 alertmanager 用户(可自定义 UID/GID)
|
||||
# 创建 alertmanager 用户组
|
||||
RUN groupadd -g ${ARGUS_GID} alertmanager
|
||||
RUN set -eux; \
|
||||
if ! getent group "${ARGUS_BUILD_GID}" >/dev/null; then \
|
||||
groupadd -g "${ARGUS_BUILD_GID}" alertmanager || true; \
|
||||
fi; \
|
||||
if id alertmanager >/dev/null 2>&1; then \
|
||||
current_uid="$(id -u alertmanager)"; \
|
||||
if [ "$current_uid" != "${ARGUS_BUILD_UID}" ] && ! getent passwd "${ARGUS_BUILD_UID}" >/dev/null; then \
|
||||
usermod -u "${ARGUS_BUILD_UID}" alertmanager; \
|
||||
fi; \
|
||||
usermod -g "${ARGUS_BUILD_GID}" alertmanager || true; \
|
||||
else \
|
||||
if ! getent passwd "${ARGUS_BUILD_UID}" >/dev/null; then \
|
||||
useradd -M -s /usr/sbin/nologin -u "${ARGUS_BUILD_UID}" -g "${ARGUS_BUILD_GID}" alertmanager; \
|
||||
else \
|
||||
echo "UID ${ARGUS_BUILD_UID} already exists; skip creating user 'alertmanager'"; \
|
||||
fi; \
|
||||
fi
|
||||
|
||||
# 创建 alertmanager 用户并指定组
|
||||
RUN useradd -M -s /usr/sbin/nologin -u ${ARGUS_UID} -g ${ARGUS_GID} alertmanager
|
||||
|
||||
RUN chown -R alertmanager:alertmanager /usr/share/alertmanager && \
|
||||
chown -R alertmanager:alertmanager /alertmanager && \
|
||||
chown -R alertmanager:alertmanager ${ALERTMANAGER_BASE_PATH} && \
|
||||
chown -R alertmanager:alertmanager /private/argus/etc && \
|
||||
chown -R alertmanager:alertmanager /usr/local/bin
|
||||
RUN chown -R "${ARGUS_BUILD_UID}:${ARGUS_BUILD_GID}" /usr/share/alertmanager /alertmanager ${ALERTMANAGER_BASE_PATH} /private/argus/etc /usr/local/bin || true
|
||||
|
||||
# 配置内网 apt 源 (如果指定了内网选项)
|
||||
RUN if [ "$USE_INTRANET" = "true" ]; then \
|
||||
@ -86,4 +95,3 @@ EXPOSE 9093
|
||||
|
||||
# 使用 supervisor 作为入口点
|
||||
CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
|
||||
|
||||
|
||||
@ -5,9 +5,9 @@ docker pull ubuntu:24.04
|
||||
source src/alert/tests/.env
|
||||
|
||||
docker build \
|
||||
--build-arg ARGUS_UID=${ARGUS_UID} \
|
||||
--build-arg ARGUS_GID=${ARGUS_GID} \
|
||||
--build-arg ARGUS_BUILD_UID=${ARGUS_BUILD_UID} \
|
||||
--build-arg ARGUS_BUILD_GID=${ARGUS_BUILD_GID} \
|
||||
-f src/alert/alertmanager/build/Dockerfile \
|
||||
-t argus-alertmanager:latest .
|
||||
|
||||
docker save -o argus-alertmanager-latest.tar argus-alertmanager:latest
|
||||
docker save -o argus-alertmanager-latest.tar argus-alertmanager:latest
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
DATA_ROOT=/home/argus/tmp/private/argus
|
||||
ARGUS_UID=1048
|
||||
ARGUS_GID=1048
|
||||
ARGUS_BUILD_UID=1048
|
||||
ARGUS_BUILD_GID=1048
|
||||
|
||||
USE_INTRANET=false
|
||||
|
||||
@ -4,15 +4,15 @@ services:
|
||||
context: ../../../
|
||||
dockerfile: src/alert/alertmanager/build/Dockerfile
|
||||
args:
|
||||
ARGUS_UID: ${ARGUS_UID:-2133}
|
||||
ARGUS_GID: ${ARGUS_GID:-2015}
|
||||
ARGUS_BUILD_UID: ${ARGUS_BUILD_UID:-2133}
|
||||
ARGUS_BUILD_GID: ${ARGUS_BUILD_GID:-2015}
|
||||
USE_INTRANET: ${USE_INTRANET:-false}
|
||||
image: argus-alertmanager:latest
|
||||
container_name: argus-alertmanager
|
||||
environment:
|
||||
- ALERTMANAGER_BASE_PATH=/private/argus/alert/alertmanager
|
||||
- ARGUS_UID=${ARGUS_UID:-2133}
|
||||
- ARGUS_GID=${ARGUS_GID:-2015}
|
||||
- ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}
|
||||
- ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}
|
||||
ports:
|
||||
- "${ARGUS_PORT:-9093}:9093"
|
||||
volumes:
|
||||
|
||||
@ -26,6 +26,7 @@ RUN apt-get update && \
|
||||
apt-get install -y \
|
||||
bind9 \
|
||||
bind9utils \
|
||||
dnsutils \
|
||||
bind9-doc \
|
||||
supervisor \
|
||||
net-tools \
|
||||
|
||||
@ -104,7 +104,26 @@ log_info "文件所有者: $OWNER"
|
||||
|
||||
# 确保发布目录存在
|
||||
log_info "确保发布目录存在: $PUBLISH_DIR"
|
||||
sudo mkdir -p "$PUBLISH_DIR"
|
||||
mkdir -p "$PUBLISH_DIR"
|
||||
|
||||
IFS=':' read -r OWNER_UID OWNER_GID <<< "$OWNER"
|
||||
if [[ -z "$OWNER_UID" || -z "$OWNER_GID" ]]; then
|
||||
log_error "--owner 格式不正确,应为 uid:gid"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
CURRENT_UID=$(id -u)
|
||||
CURRENT_GID=$(id -g)
|
||||
if [[ "$OWNER_UID" != "$CURRENT_UID" || "$OWNER_GID" != "$CURRENT_GID" ]]; then
|
||||
if [[ "$CURRENT_UID" -ne 0 ]]; then
|
||||
log_error "当前用户 (${CURRENT_UID}:${CURRENT_GID}) 无法设置所有者为 ${OWNER_UID}:${OWNER_GID}"
|
||||
log_error "请以目标用户运行脚本或预先调整目录权限"
|
||||
exit 1
|
||||
fi
|
||||
NEED_CHOWN=true
|
||||
else
|
||||
NEED_CHOWN=false
|
||||
fi
|
||||
|
||||
# 创建临时目录用于打包
|
||||
TEMP_PACKAGE_DIR="/tmp/argus-metric-package-$$"
|
||||
@ -208,26 +227,31 @@ fi
|
||||
TAR_NAME="argus-metric_$(echo $VERSION | tr '.' '_').tar.gz"
|
||||
log_info "创建发布包: $TAR_NAME"
|
||||
cd "$TEMP_PACKAGE_DIR"
|
||||
sudo tar -czf "$PUBLISH_DIR/$TAR_NAME" *
|
||||
tar -czf "$PUBLISH_DIR/$TAR_NAME" *
|
||||
cd - > /dev/null
|
||||
|
||||
# 设置文件所有者
|
||||
log_info "设置文件所有者为: $OWNER"
|
||||
sudo chown "$OWNER" "$PUBLISH_DIR/$TAR_NAME"
|
||||
if [[ "$NEED_CHOWN" == true ]]; then
|
||||
log_info "设置文件所有者为: $OWNER"
|
||||
chown "$OWNER" "$PUBLISH_DIR/$TAR_NAME"
|
||||
fi
|
||||
|
||||
# 清理临时目录
|
||||
rm -rf "$TEMP_PACKAGE_DIR"
|
||||
|
||||
# 更新 LATEST_VERSION 文件
|
||||
log_info "更新 LATEST_VERSION 文件..."
|
||||
echo "$VERSION" | sudo tee "$PUBLISH_DIR/LATEST_VERSION" > /dev/null
|
||||
sudo chown "$OWNER" "$PUBLISH_DIR/LATEST_VERSION"
|
||||
echo "$VERSION" > "$PUBLISH_DIR/LATEST_VERSION"
|
||||
if [[ "$NEED_CHOWN" == true ]]; then
|
||||
chown "$OWNER" "$PUBLISH_DIR/LATEST_VERSION"
|
||||
fi
|
||||
|
||||
# 复制 DNS 配置文件到发布目录根目录(直接从 config 目录复制)
|
||||
if [[ -f "config/dns.conf" ]]; then
|
||||
log_info "复制 DNS 配置文件到发布目录根目录..."
|
||||
sudo cp "config/dns.conf" "$PUBLISH_DIR/"
|
||||
sudo chown "$OWNER" "$PUBLISH_DIR/dns.conf"
|
||||
cp "config/dns.conf" "$PUBLISH_DIR/"
|
||||
if [[ "$NEED_CHOWN" == true ]]; then
|
||||
chown "$OWNER" "$PUBLISH_DIR/dns.conf"
|
||||
fi
|
||||
log_success "DNS 配置文件复制完成: $PUBLISH_DIR/dns.conf"
|
||||
else
|
||||
log_warning "未找到 config/dns.conf 文件,跳过 DNS 配置文件复制"
|
||||
@ -236,8 +260,10 @@ fi
|
||||
# 复制 setup.sh 到发布目录
|
||||
if [[ -f "scripts/setup.sh" ]]; then
|
||||
log_info "复制 setup.sh 到发布目录..."
|
||||
sudo cp "scripts/setup.sh" "$PUBLISH_DIR/"
|
||||
sudo chown "$OWNER" "$PUBLISH_DIR/setup.sh"
|
||||
cp "scripts/setup.sh" "$PUBLISH_DIR/"
|
||||
if [[ "$NEED_CHOWN" == true ]]; then
|
||||
chown "$OWNER" "$PUBLISH_DIR/setup.sh"
|
||||
fi
|
||||
fi
|
||||
|
||||
# 显示发布结果
|
||||
|
||||
@ -65,6 +65,8 @@ COPY grafana.ini /tmp/grafana.ini
|
||||
COPY datasources/datasources.yml /tmp/datasources.yml
|
||||
COPY dashboards/dashboards.yml /tmp/dashboards.yml
|
||||
COPY dashboards/default_dashboard_by_hostname.json /tmp/default_dashboard.json
|
||||
COPY dashboards/default_cluster_dashboard.json /tmp/default_cluster_dashboard.json
|
||||
COPY dashboards/default_dashboard_by_instance.json /tmp/default_dashboard_by_instance.json
|
||||
|
||||
# supervisor 配置
|
||||
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
|
||||
|
||||
@ -8,7 +8,7 @@ datasources:
|
||||
type: prometheus
|
||||
access: proxy
|
||||
uid: eezk1zvkie4g0a
|
||||
url: http://10.211.55.5:9090
|
||||
url: http://prom.metric.argus.com:9090
|
||||
isDefault: true
|
||||
editable: true
|
||||
jsonData:
|
||||
|
||||
@ -44,12 +44,18 @@ else
|
||||
fi
|
||||
|
||||
# 复制数据源配置文件到挂载目录
|
||||
if [ -f "/tmp/datasources.yml" ]; then
|
||||
echo "[INFO] Copying datasource configuration to /private/argus/metric/grafana/provisioning/datasources/"
|
||||
cp /tmp/datasources.yml /private/argus/metric/grafana/provisioning/datasources/datasources.yml
|
||||
echo "[INFO] Datasource configuration copied successfully"
|
||||
elif [ -d "/private/argus/metric/grafana/provisioning/datasources" ] && [ "$(ls -A /private/argus/metric/grafana/provisioning/datasources)" ]; then
|
||||
echo "[INFO] Found existing datasource provisioning files in /private/argus/metric/grafana/provisioning/datasources"
|
||||
DS_OUT="/private/argus/metric/grafana/provisioning/datasources/datasources.yml"
|
||||
PROM_DOMAIN="prom.metric.argus.com:9090"
|
||||
|
||||
if [ -f "/tmp/datasources.yml" ] && [ ! -f "$DS_OUT" ]; then
|
||||
echo "[INFO] Initializing datasource provisioning file from /tmp"
|
||||
cp /tmp/datasources.yml "$DS_OUT"
|
||||
fi
|
||||
|
||||
# 统一将数据源 URL 规范为 prom.metric.argus.com:9090
|
||||
if [ -f "$DS_OUT" ]; then
|
||||
sed -i -E "s#^\s*url:\s*http://[^[:space:]]+# url: http://$PROM_DOMAIN#g" "$DS_OUT" || true
|
||||
echo "[INFO] Datasource URL normalized to http://$PROM_DOMAIN"
|
||||
elif [ -d "/etc/grafana/provisioning/datasources" ] && [ "$(ls -A /etc/grafana/provisioning/datasources)" ]; then
|
||||
echo "[INFO] Found datasource provisioning files in /etc/grafana/provisioning/datasources"
|
||||
# 确保数据源配置目录权限正确
|
||||
@ -65,11 +71,33 @@ if [ -f "/tmp/dashboards.yml" ]; then
|
||||
echo "[INFO] Dashboard configuration copied successfully"
|
||||
fi
|
||||
|
||||
# 复制默认仪表板到挂载目录
|
||||
if [ -f "/tmp/default_dashboard.json" ]; then
|
||||
echo "[INFO] Copying default dashboard to /private/argus/metric/grafana/provisioning/dashboards/"
|
||||
cp /tmp/default_dashboard.json /private/argus/metric/grafana/provisioning/dashboards/default_dashboard.json
|
||||
echo "[INFO] Default dashboard copied successfully"
|
||||
# 复制默认仪表板到挂载目录(按需,不覆盖已存在文件)
|
||||
copy_dashboard_if_missing() {
|
||||
local src="$1"; local dst_name="$2"
|
||||
local dst_dir="/private/argus/metric/grafana/provisioning/dashboards"
|
||||
local dst="$dst_dir/$dst_name"
|
||||
if [ -f "$src" ]; then
|
||||
if [ ! -f "$dst" ]; then
|
||||
echo "[INFO] Installing dashboard: $dst_name"
|
||||
cp "$src" "$dst"
|
||||
else
|
||||
echo "[INFO] Dashboard exists, skip: $dst_name"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
copy_dashboard_if_missing "/tmp/default_dashboard.json" "default_dashboard.json"
|
||||
copy_dashboard_if_missing "/tmp/default_cluster_dashboard.json" "default_cluster_dashboard.json"
|
||||
copy_dashboard_if_missing "/tmp/default_dashboard_by_instance.json" "default_dashboard_by_instance.json"
|
||||
|
||||
# 规范面板中的数据源字段:将字符串 "prometheus" 替换为 null(使用默认数据源)
|
||||
DB_DIR="/private/argus/metric/grafana/provisioning/dashboards"
|
||||
if [ -d "$DB_DIR" ]; then
|
||||
for f in "$DB_DIR"/*.json; do
|
||||
[ -f "$f" ] || continue
|
||||
sed -i -E 's/"datasource"\s*:\s*"prometheus"/"datasource": null/g' "$f" || true
|
||||
done
|
||||
echo "[INFO] Normalized dashboard datasource to default (null)"
|
||||
fi
|
||||
|
||||
# 启动 Grafana
|
||||
|
||||
@ -1,9 +1,5 @@
|
||||
-version: "3.8"
|
||||
|
||||
networks:
|
||||
default:
|
||||
external: true
|
||||
name: argus-sys-net
|
||||
sysnet:
|
||||
driver: bridge
|
||||
ipam:
|
||||
driver: default
|
||||
@ -15,7 +11,7 @@ services:
|
||||
image: ${BIND_IMAGE_TAG:-argus-bind9:latest}
|
||||
container_name: argus-bind-sys
|
||||
networks:
|
||||
default:
|
||||
sysnet:
|
||||
ipv4_address: 172.29.0.2
|
||||
volumes:
|
||||
- ./private:/private
|
||||
@ -39,7 +35,7 @@ services:
|
||||
- ./private/argus/metric/prometheus:/private/argus/metric/prometheus
|
||||
- ./private/argus/etc:/private/argus/etc
|
||||
networks:
|
||||
default:
|
||||
sysnet:
|
||||
ipv4_address: 172.29.0.10
|
||||
restart: unless-stopped
|
||||
|
||||
@ -58,6 +54,9 @@ services:
|
||||
ports:
|
||||
- "9200:9200"
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
sysnet:
|
||||
ipv4_address: 172.29.0.3
|
||||
|
||||
kibana:
|
||||
image: argus-kibana:latest
|
||||
@ -74,6 +73,9 @@ services:
|
||||
ports:
|
||||
- "5601:5601"
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
sysnet:
|
||||
ipv4_address: 172.29.0.4
|
||||
|
||||
node-a:
|
||||
image: ubuntu:22.04
|
||||
@ -106,6 +108,8 @@ services:
|
||||
ports:
|
||||
- "2020:2020"
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- sysnet
|
||||
|
||||
node-b:
|
||||
image: ubuntu:22.04
|
||||
@ -138,6 +142,8 @@ services:
|
||||
ports:
|
||||
- "2021:2020"
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- sysnet
|
||||
|
||||
ftp:
|
||||
image: argus-metric-ftp:latest
|
||||
@ -160,7 +166,7 @@ services:
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
- /etc/timezone:/etc/timezone:ro
|
||||
networks:
|
||||
default:
|
||||
sysnet:
|
||||
ipv4_address: 172.29.0.40
|
||||
logging:
|
||||
driver: "json-file"
|
||||
@ -185,7 +191,7 @@ services:
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
- /etc/timezone:/etc/timezone:ro
|
||||
networks:
|
||||
default:
|
||||
sysnet:
|
||||
ipv4_address: 172.29.0.41
|
||||
logging:
|
||||
driver: "json-file"
|
||||
@ -205,6 +211,9 @@ services:
|
||||
- GF_SERVER_HTTP_PORT=3000
|
||||
- GF_LOG_LEVEL=warn
|
||||
- GF_LOG_MODE=console
|
||||
- GF_PATHS_PROVISIONING=/private/argus/metric/grafana/provisioning
|
||||
- GF_AUTH_ANONYMOUS_ENABLED=true
|
||||
- GF_AUTH_ANONYMOUS_ORG_ROLE=Viewer
|
||||
ports:
|
||||
- "${GRAFANA_PORT:-3000}:3000"
|
||||
volumes:
|
||||
@ -213,7 +222,7 @@ services:
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
- /etc/timezone:/etc/timezone:ro
|
||||
networks:
|
||||
default:
|
||||
sysnet:
|
||||
ipv4_address: 172.29.0.42
|
||||
depends_on:
|
||||
- prometheus
|
||||
@ -224,7 +233,7 @@ services:
|
||||
max-file: "3"
|
||||
|
||||
test-node:
|
||||
image: argus-metric-test-node:latest
|
||||
image: ubuntu:22.04
|
||||
container_name: argus-metric-test-node
|
||||
hostname: test-metric-node-001
|
||||
restart: unless-stopped
|
||||
@ -240,13 +249,21 @@ services:
|
||||
- FTP_USER=${FTP_USER:-ftpuser}
|
||||
- FTP_PASSWORD=${FTP_PASSWORD:-ZGClab1234!}
|
||||
- FTP_PORT=${FTP_PORT:-21}
|
||||
- ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}
|
||||
- ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}
|
||||
- METRIC_NODE_ROLE=cpu
|
||||
volumes:
|
||||
- ./private/argus/agent:/private/argus/agent
|
||||
- ./scripts/metric/test-node-entrypoint.sh:/usr/local/bin/metric-test-node-entrypoint.sh:ro
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
- /etc/timezone:/etc/timezone:ro
|
||||
command: sleep infinity
|
||||
entrypoint:
|
||||
- /usr/local/bin/metric-test-node-entrypoint.sh
|
||||
command:
|
||||
- sleep
|
||||
- infinity
|
||||
networks:
|
||||
default:
|
||||
sysnet:
|
||||
ipv4_address: 172.29.0.50
|
||||
logging:
|
||||
driver: "json-file"
|
||||
@ -255,7 +272,8 @@ services:
|
||||
max-file: "3"
|
||||
|
||||
test-gpu-node:
|
||||
image: argus-metric-test-gpu-node:latest
|
||||
profiles: ["gpu"]
|
||||
image: nvidia/cuda:12.2.2-runtime-ubuntu22.04
|
||||
container_name: argus-metric-test-gpu-node
|
||||
hostname: test-metric-gpu-node-001
|
||||
restart: unless-stopped
|
||||
@ -278,13 +296,21 @@ services:
|
||||
- NVIDIA_VISIBLE_DEVICES=all
|
||||
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||
- GPU_MODE=gpu
|
||||
- ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}
|
||||
- ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}
|
||||
- METRIC_NODE_ROLE=gpu
|
||||
volumes:
|
||||
- ./private/argus/agent:/private/argus/agent
|
||||
- ./scripts/metric/test-node-entrypoint.sh:/usr/local/bin/metric-test-node-entrypoint.sh:ro
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
- /etc/timezone:/etc/timezone:ro
|
||||
command: sleep infinity
|
||||
entrypoint:
|
||||
- /usr/local/bin/metric-test-node-entrypoint.sh
|
||||
command:
|
||||
- sleep
|
||||
- infinity
|
||||
networks:
|
||||
default:
|
||||
sysnet:
|
||||
ipv4_address: 172.29.0.51
|
||||
logging:
|
||||
driver: "json-file"
|
||||
|
||||
@ -3,6 +3,38 @@ set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
|
||||
ENABLE_GPU=false
|
||||
|
||||
usage() {
|
||||
cat <<'EOF'
|
||||
Usage: 00_e2e_test.sh [options]
|
||||
|
||||
Options:
|
||||
--enable-gpu 启用 GPU 相关拓扑与测试流程
|
||||
-h, --help 显示帮助信息
|
||||
EOF
|
||||
}
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--enable-gpu)
|
||||
ENABLE_GPU=true
|
||||
shift
|
||||
;;
|
||||
-h|--help)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "Unknown argument: $1" >&2
|
||||
usage
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
export ARGUS_SYS_ENABLE_GPU=$ENABLE_GPU
|
||||
|
||||
SCRIPTS=(
|
||||
"01_bootstrap.sh"
|
||||
"02_up.sh"
|
||||
@ -12,6 +44,11 @@ SCRIPTS=(
|
||||
"06_write_health_and_assert.sh"
|
||||
"07_logs_send_and_assert.sh"
|
||||
"08_restart_agent_reregister.sh"
|
||||
"10_metric_publish.sh"
|
||||
"11_metric_node_install.sh"
|
||||
"12_metric_gpu_install.sh"
|
||||
"13_metric_verify.sh"
|
||||
"14_metric_cleanup.sh"
|
||||
"09_down.sh"
|
||||
)
|
||||
|
||||
@ -23,4 +60,3 @@ for script in "${SCRIPTS[@]}"; do
|
||||
done
|
||||
|
||||
echo "[SYS-E2E] All tests completed"
|
||||
|
||||
|
||||
@ -22,6 +22,24 @@ ensure_image() {
|
||||
}
|
||||
|
||||
echo "[INFO] Preparing directories..."
|
||||
ensure_writable_dir() {
|
||||
local path="$1"
|
||||
local parent
|
||||
parent="$(dirname "$path")"
|
||||
mkdir -p "$parent" 2>/dev/null || true
|
||||
mkdir -p "$path" 2>/dev/null || true
|
||||
if [[ ! -w "$path" ]]; then
|
||||
docker run --rm -v "$parent:/target" ubuntu:24.04 bash -lc "chown -R $(id -u):$(id -g) /target" >/dev/null 2>&1 || true
|
||||
fi
|
||||
mkdir -p "$path"
|
||||
}
|
||||
|
||||
# preflight: make base dirs writable if inherited from root-owned mounts
|
||||
ensure_writable_dir "$PRIVATE_CORE/argus"
|
||||
ensure_writable_dir "$PRIVATE_CORE/argus/metric"
|
||||
ensure_writable_dir "$PRIVATE_CORE/argus/metric/grafana"
|
||||
ensure_writable_dir "$PRIVATE_CORE/argus/metric/prometheus"
|
||||
|
||||
mkdir -p \
|
||||
"$PRIVATE_CORE/argus/etc" \
|
||||
"$PRIVATE_CORE/argus/bind" \
|
||||
@ -57,6 +75,8 @@ chown -R "${ARGUS_BUILD_UID}:${ARGUS_BUILD_GID}" \
|
||||
"$PRIVATE_CORE/argus/agent" \
|
||||
"$PRIVATE_CORE/argus/etc" 2>/dev/null || true
|
||||
|
||||
echo "[INFO] Using compose-managed network (auto-created by docker compose)"
|
||||
|
||||
echo "[INFO] Distributing update-dns.sh for core services (bind/master/es/kibana)"
|
||||
BIND_UPDATE_SRC="$REPO_ROOT/src/bind/build/update-dns.sh"
|
||||
BIND_UPDATE_DEST="$PRIVATE_CORE/argus/etc/update-dns.sh"
|
||||
@ -75,8 +95,6 @@ ensure_image "argus-master:latest"
|
||||
ensure_image "argus-metric-ftp:latest"
|
||||
ensure_image "argus-metric-prometheus:latest"
|
||||
ensure_image "argus-metric-grafana:latest"
|
||||
ensure_image "argus-metric-test-node:latest"
|
||||
ensure_image "argus-metric-test-gpu-node:latest"
|
||||
|
||||
echo "[INFO] Building agent binary..."
|
||||
pushd "$REPO_ROOT/src/agent" >/dev/null
|
||||
@ -91,19 +109,25 @@ fi
|
||||
echo "$AGENT_BIN" > "$TMP_DIR/agent_binary_path"
|
||||
|
||||
# 检测GPU环境
|
||||
echo "[INFO] 检测GPU环境..."
|
||||
REQUEST_GPU=${ARGUS_SYS_ENABLE_GPU:-false}
|
||||
GPU_CHECK_SCRIPT="$REPO_ROOT/src/metric/tests/scripts/common/check-gpu.sh"
|
||||
if [ -f "$GPU_CHECK_SCRIPT" ]; then
|
||||
if [[ "$REQUEST_GPU" == "true" ]]; then
|
||||
echo "[INFO] --enable-gpu 已启用,开始检测GPU环境..."
|
||||
if [[ -f "$GPU_CHECK_SCRIPT" ]]; then
|
||||
if bash "$GPU_CHECK_SCRIPT" >/dev/null 2>&1; then
|
||||
echo "[INFO] GPU环境可用,将启动test-gpu-node容器"
|
||||
GPU_AVAILABLE=true
|
||||
echo "[INFO] GPU环境可用,将在 compose 中启用 test-gpu-node"
|
||||
GPU_AVAILABLE=true
|
||||
else
|
||||
echo "[INFO] GPU环境不可用,跳过test-gpu-node容器"
|
||||
GPU_AVAILABLE=false
|
||||
echo "[ERROR] 未检测到可用 GPU,但指定了 --enable-gpu" >&2
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "[ERROR] 未找到 GPU 检测脚本: $GPU_CHECK_SCRIPT" >&2
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "[WARN] 未找到GPU检测脚本: $GPU_CHECK_SCRIPT,跳过GPU检测"
|
||||
GPU_AVAILABLE=false
|
||||
GPU_AVAILABLE=false
|
||||
echo "[INFO] GPU 支持未启用,跳过 GPU 检测"
|
||||
fi
|
||||
|
||||
echo "[INFO] Writing .env with UID/GID and metric configuration"
|
||||
@ -112,7 +136,7 @@ ARGUS_BUILD_UID=$ARGUS_BUILD_UID
|
||||
ARGUS_BUILD_GID=$ARGUS_BUILD_GID
|
||||
|
||||
# GPU 配置
|
||||
GPU_AVAILABLE=$GPU_AVAILABLE
|
||||
ENABLE_GPU=$GPU_AVAILABLE
|
||||
|
||||
# FTP 配置
|
||||
FTP_PORT=21
|
||||
|
||||
@ -15,39 +15,51 @@ compose() {
|
||||
|
||||
echo "[INFO] Bringing up system stack..."
|
||||
|
||||
# 检测GPU环境
|
||||
echo "[INFO] 检测GPU环境..."
|
||||
REQUEST_GPU=${ARGUS_SYS_ENABLE_GPU:-false}
|
||||
GPU_AVAILABLE=false
|
||||
GPU_CHECK_SCRIPT="$REPO_ROOT/src/metric/tests/scripts/common/check-gpu.sh"
|
||||
if [ -f "$GPU_CHECK_SCRIPT" ]; then
|
||||
|
||||
if [[ "$REQUEST_GPU" == "true" ]]; then
|
||||
echo "[INFO] --enable-gpu 生效,验证主机 GPU..."
|
||||
if [[ -f "$GPU_CHECK_SCRIPT" ]]; then
|
||||
if bash "$GPU_CHECK_SCRIPT" >/dev/null 2>&1; then
|
||||
echo "[INFO] GPU环境可用,将启动GPU测试节点"
|
||||
GPU_AVAILABLE=true
|
||||
GPU_AVAILABLE=true
|
||||
echo "[INFO] GPU 检测通过,将启动 gpu profile"
|
||||
else
|
||||
echo "[INFO] GPU环境不可用,将跳过GPU测试节点"
|
||||
GPU_AVAILABLE=false
|
||||
echo "[ERROR] 主机缺少可用 GPU,无法继续 --enable-gpu 流程" >&2
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "[ERROR] 未找到 GPU 检测脚本: $GPU_CHECK_SCRIPT" >&2
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "[WARN] 未找到GPU检测脚本: $GPU_CHECK_SCRIPT,跳过GPU检测"
|
||||
GPU_AVAILABLE=false
|
||||
echo "[INFO] 未启用 GPU 流程"
|
||||
fi
|
||||
|
||||
pushd "$TEST_ROOT" >/dev/null
|
||||
compose -p argus-sys down --remove-orphans || true
|
||||
|
||||
# 清理可能由 08 脚本创建的同名容器,避免 compose up 冲突
|
||||
for name in argus-node-b; do
|
||||
if docker ps -aqf "name=^${name}$" >/dev/null 2>&1 && [[ -n "$(docker ps -aqf "name=^${name}$")" ]]; then
|
||||
docker rm -f "$name" >/dev/null 2>&1 || true
|
||||
fi
|
||||
done
|
||||
|
||||
# 根据GPU可用性决定启动的服务
|
||||
if [ "$GPU_AVAILABLE" = true ]; then
|
||||
echo "[INFO] 启动所有服务(包括test-gpu-node)..."
|
||||
compose -p argus-sys up -d
|
||||
if [[ "$GPU_AVAILABLE" == true ]]; then
|
||||
echo "[INFO] 启动所有服务(包含 gpu profile)..."
|
||||
compose -p argus-sys --profile gpu up -d
|
||||
else
|
||||
echo "[INFO] 启动基础服务(跳过test-gpu-node)..."
|
||||
compose -p argus-sys up -d --scale test-gpu-node=0
|
||||
echo "[INFO] 启动基础服务(不含 gpu profile)..."
|
||||
compose -p argus-sys up -d
|
||||
fi
|
||||
|
||||
popd >/dev/null
|
||||
|
||||
if [ "$GPU_AVAILABLE" = true ]; then
|
||||
echo "[OK] Services started: master:32300 es:9200 kibana:5601 node-a:2020 node-b:2021 test-gpu-node:172.29.0.51"
|
||||
if [[ "$GPU_AVAILABLE" == true ]]; then
|
||||
echo "[OK] Services started: master:32300 es:9200 kibana:5601 node-a:2020 node-b:2021 test-gpu-node:172.29.0.51"
|
||||
else
|
||||
echo "[OK] Services started: master:32300 es:9200 kibana:5601 node-a:2020 node-b:2021 (test-gpu-node skipped)"
|
||||
echo "[OK] Services started: master:32300 es:9200 kibana:5601 node-a:2020 node-b:2021 (gpu skipped)"
|
||||
fi
|
||||
|
||||
|
||||
@ -4,20 +4,15 @@ set -euo pipefail
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
|
||||
compose() {
|
||||
if docker compose version >/dev/null 2>&1; then
|
||||
docker compose "$@"
|
||||
else
|
||||
docker-compose "$@"
|
||||
fi
|
||||
}
|
||||
|
||||
service_id() {
|
||||
compose -p argus-sys ps -q "$1"
|
||||
# 直接根据 container_name 获取容器ID,避免 compose project 名称不一致导致查找失败
|
||||
cid_by_name() {
|
||||
docker ps -aqf "name=^$1$"
|
||||
}
|
||||
|
||||
echo "[INFO] Verifying DNS routing via bind..."
|
||||
|
||||
pushd "$TEST_ROOT" >/dev/null
|
||||
|
||||
# Check master IP file exists in shared private
|
||||
MASTER_FILE="$TEST_ROOT/private/argus/etc/master.argus.com"
|
||||
if [[ ! -f "$MASTER_FILE" ]]; then
|
||||
@ -28,7 +23,7 @@ MASTER_IP_HOST="$(cat "$MASTER_FILE" | tr -d '\r\n' || true)"
|
||||
echo "[INFO] master.argus.com file content: ${MASTER_IP_HOST}"
|
||||
|
||||
# dig inside bind container
|
||||
BIN_ID="$(service_id bind)"
|
||||
BIN_ID="$(cid_by_name argus-bind-sys)"
|
||||
if [[ -n "$BIN_ID" ]]; then
|
||||
DIG_IP="$(docker exec "$BIN_ID" dig +short master.argus.com A | tail -n1 || true)"
|
||||
echo "[INFO] dig(master.argus.com) from bind container -> $DIG_IP"
|
||||
@ -39,8 +34,8 @@ else
|
||||
echo "[WARN] bind container not found; skip dig"
|
||||
fi
|
||||
|
||||
for node in node-a node-b; do
|
||||
CID="$(service_id "$node")"
|
||||
for node in argus-node-a argus-node-b; do
|
||||
CID="$(cid_by_name "$node")"
|
||||
echo "[INFO] Checking resolution inside $node..."
|
||||
if ! docker exec "$CID" getent hosts master.argus.com >/dev/null 2>&1; then
|
||||
echo "[ERR] $node cannot resolve master.argus.com" >&2
|
||||
@ -50,5 +45,6 @@ for node in node-a node-b; do
|
||||
echo "[OK] $node resolved master.argus.com -> $RES"
|
||||
done
|
||||
|
||||
echo "[OK] DNS routing verified"
|
||||
popd >/dev/null
|
||||
|
||||
echo "[OK] DNS routing verified"
|
||||
|
||||
@ -49,8 +49,35 @@ for _ in {1..60}; do
|
||||
fi
|
||||
done
|
||||
|
||||
# 若仍未全部注册,尝试重启 node-b 并再等待一轮(兼容 DNS/启动时序抖动)
|
||||
if [[ ! -s "$TMP_DIR/node_id_a" || ! -s "$TMP_DIR/node_id_b" ]]; then
|
||||
echo "[ERR] Agents did not register in time" >&2
|
||||
echo "[WARN] node-a or node-b not registered in first window; restarting node-b and retrying..." >&2
|
||||
# 仅重启 node-b,避免影响 es/kibana/master
|
||||
if docker ps --format '{{.Names}}' | grep -q '^argus-node-b$'; then
|
||||
docker restart argus-node-b >/dev/null 2>&1 || true
|
||||
fi
|
||||
# 再等待一轮(最多 120 秒)
|
||||
> "$TMP_DIR/node_id_b"
|
||||
for _ in {1..60}; do
|
||||
sleep 2
|
||||
resp=$(curl -fsS "$API_BASE/nodes" 2>/dev/null || true)
|
||||
[[ -z "$resp" ]] && continue
|
||||
if ! echo "$resp" | head -c1 | grep -q '\['; then
|
||||
continue
|
||||
fi
|
||||
echo "$resp" > "$TMP_DIR/nodes_list.json"
|
||||
ID_A=$(extract_node "$HOST_A" "$TMP_DIR/node_id_a" "$TMP_DIR/nodes_list.json" 2>/dev/null || true)
|
||||
ID_B=$(extract_node "$HOST_B" "$TMP_DIR/node_id_b" "$TMP_DIR/nodes_list.json" 2>/dev/null || true)
|
||||
if [[ -s "$TMP_DIR/node_id_a" && -s "$TMP_DIR/node_id_b" ]]; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
if [[ ! -s "$TMP_DIR/node_id_a" || ! -s "$TMP_DIR/node_id_b" ]]; then
|
||||
echo "[ERR] Agents did not register in time (after retry)" >&2
|
||||
echo "[HINT] Current /nodes response:" >&2
|
||||
sed -n '1,200p' "$TMP_DIR/nodes_list.json" >&2 || true
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
@ -3,9 +3,19 @@ set -euo pipefail
|
||||
|
||||
echo "[INFO] Sending logs via node-a/node-b and asserting ES counts..."
|
||||
|
||||
# Robust count helper: tolerates 404/503 and non-JSON responses, returns integer >=0
|
||||
get_count() {
|
||||
local idx="$1"
|
||||
curl -s "http://localhost:9200/${idx}/_count?ignore_unavailable=true&allow_no_indices=true" | sed -E 's/.*"count":([0-9]+).*/\1/' | awk 'NF{print $0;exit} END{if(NR==0)print 0}'
|
||||
local idx="$1"; local tmp; tmp=$(mktemp)
|
||||
local code
|
||||
code=$(curl -s -o "$tmp" -w "%{http_code}" "http://localhost:9200/${idx}/_count?ignore_unavailable=true&allow_no_indices=true" || true)
|
||||
if [[ "$code" == "200" ]]; then
|
||||
local val
|
||||
val=$(jq -r '(.count // 0) | tonumber? // 0' "$tmp" 2>/dev/null || echo 0)
|
||||
echo "$val"
|
||||
else
|
||||
echo 0
|
||||
fi
|
||||
rm -f "$tmp"
|
||||
}
|
||||
|
||||
train0=$(get_count "train-*")
|
||||
@ -32,11 +42,26 @@ send_logs "$node_a" "host01"
|
||||
send_logs "$node_b" "host02"
|
||||
|
||||
echo "[INFO] Waiting for ES to ingest..."
|
||||
sleep 10
|
||||
# Proactively refresh indices (ignore errors if not created yet)
|
||||
curl -s -X POST "http://localhost:9200/train-*/_refresh" >/dev/null 2>&1 || true
|
||||
curl -s -X POST "http://localhost:9200/infer-*/_refresh" >/dev/null 2>&1 || true
|
||||
|
||||
train1=$(get_count "train-*")
|
||||
infer1=$(get_count "infer-*")
|
||||
final=$((train1 + infer1))
|
||||
# Retry up to 120s for counts to increase and reach threshold (>=4)
|
||||
final=0
|
||||
threshold=4
|
||||
for attempt in {1..60}; do
|
||||
train1=$(get_count "train-*")
|
||||
infer1=$(get_count "infer-*")
|
||||
final=$((train1 + infer1))
|
||||
if (( final > base && final >= threshold )); then
|
||||
break
|
||||
fi
|
||||
echo "[..] waiting ES counts increase to >=${threshold} ($attempt/60) current=${final} base=${base}"
|
||||
# refresh indices again to speed up visibility
|
||||
curl -s -X POST "http://localhost:9200/train-*/_refresh" >/dev/null 2>&1 || true
|
||||
curl -s -X POST "http://localhost:9200/infer-*/_refresh" >/dev/null 2>&1 || true
|
||||
sleep 2
|
||||
done
|
||||
echo "[INFO] final counts: train=${train1} infer=${infer1} total=${final}"
|
||||
|
||||
if (( final <= base )); then
|
||||
@ -44,6 +69,7 @@ if (( final <= base )); then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Minimal threshold to be tolerant: expect at least 4 documents (2 train + 1 infer per node)
|
||||
if (( final < 4 )); then
|
||||
echo "[ERR] ES total below expected threshold: ${final} < 4" >&2
|
||||
exit 1
|
||||
|
||||
@ -58,10 +58,25 @@ docker rm -f argus-node-b >/dev/null 2>&1 || true
|
||||
|
||||
AGENT_BIN_PATH="$(cat "$TMP_DIR/agent_binary_path")"
|
||||
|
||||
# 选择 compose 管理的网络名(默认 argus-sys_sysnet)。
|
||||
detect_sysnet() {
|
||||
if docker network inspect argus-sys_sysnet >/dev/null 2>&1; then
|
||||
echo argus-sys_sysnet; return
|
||||
fi
|
||||
# 回退:从 master 容器推断所连网络(取第一个)
|
||||
local n
|
||||
n=$(docker inspect -f '{{range $k, $_ := .NetworkSettings.Networks}}{{println $k}}{{end}}' argus-master-sys 2>/dev/null | head -n1 || true)
|
||||
if [[ -n "$n" ]]; then echo "$n"; return; fi
|
||||
# 最后兜底:尝试项目默认网络(不保证有 IPAM)
|
||||
echo argus-sys_default
|
||||
}
|
||||
SYSNET_NAME=$(detect_sysnet)
|
||||
echo "[INFO] Using docker network: $SYSNET_NAME"
|
||||
|
||||
docker run -d \
|
||||
--name argus-node-b \
|
||||
--hostname dev-yyrshare-uuuu10-ep2f-pod-0 \
|
||||
--network argus-sys-net \
|
||||
--network "$SYSNET_NAME" \
|
||||
--ip 172.29.0.200 \
|
||||
--dns 172.29.0.2 \
|
||||
-e MASTER_ENDPOINT=http://master.argus.com:3000 \
|
||||
|
||||
@ -12,12 +12,33 @@ compose() {
|
||||
fi
|
||||
}
|
||||
|
||||
docker rm -f argus-node-b >/dev/null 2>&1 || true
|
||||
|
||||
pushd "$TEST_ROOT" >/dev/null
|
||||
compose -p argus-sys down --remove-orphans || true
|
||||
compose down --remove-orphans || true
|
||||
popd >/dev/null
|
||||
|
||||
echo "[INFO] Force removing containers by name (if any)..."
|
||||
containers=(
|
||||
argus-node-a
|
||||
argus-node-b
|
||||
argus-metric-test-node
|
||||
argus-grafana
|
||||
argus-kibana-sys
|
||||
argus-master-sys
|
||||
argus-bind-sys
|
||||
argus-ftp
|
||||
argus-es-sys
|
||||
argus-prometheus
|
||||
)
|
||||
for c in "${containers[@]}"; do
|
||||
id=$(docker ps -aqf "name=^${c}$" || true)
|
||||
if [[ -n "$id" ]]; then
|
||||
docker rm -f "$id" >/dev/null 2>&1 || true
|
||||
fi
|
||||
done
|
||||
|
||||
echo "[INFO] Removing compose networks (handled by compose down)"
|
||||
|
||||
echo "[INFO] Cleaning private directories..."
|
||||
if [[ -d "$TEST_ROOT/private" ]]; then
|
||||
docker run --rm -v "$TEST_ROOT/private:/target" ubuntu:24.04 chown -R "$(id -u):$(id -g)" /target >/dev/null 2>&1 || true
|
||||
|
||||
66
src/sys/tests/scripts/10_metric_publish.sh
Executable file
66
src/sys/tests/scripts/10_metric_publish.sh
Executable file
@ -0,0 +1,66 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
REPO_ROOT="$(cd "$TEST_ROOT/../../.." && pwd)"
|
||||
|
||||
PLUGIN_DIR="$REPO_ROOT/src/metric/client-plugins/all-in-one-full"
|
||||
FTP_CONTAINER="argus-ftp"
|
||||
|
||||
if [[ ! -d "$PLUGIN_DIR" ]]; then
|
||||
echo "[SYS-METRIC] Metric client plugin directory not found: $PLUGIN_DIR" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ -f "$TEST_ROOT/.env" ]]; then
|
||||
# shellcheck source=/dev/null
|
||||
source "$TEST_ROOT/.env"
|
||||
fi
|
||||
|
||||
OWNER="${ARGUS_BUILD_UID:-2133}:${ARGUS_BUILD_GID:-2015}"
|
||||
|
||||
resolve_output_dir() {
|
||||
local host_mount
|
||||
if docker ps --format '{{.Names}}' | grep -q "^${FTP_CONTAINER}$"; then
|
||||
host_mount=$(docker inspect "$FTP_CONTAINER" --format '{{range .Mounts}}{{if eq .Destination "/private/argus/ftp"}}{{.Source}}{{end}}{{end}}' 2>/dev/null || true)
|
||||
if [[ -n "$host_mount" ]]; then
|
||||
echo "$host_mount/share"
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
echo "$TEST_ROOT/private/argus/metric/ftp/share"
|
||||
}
|
||||
|
||||
OUTPUT_DIR="$(resolve_output_dir)"
|
||||
mkdir -p "$OUTPUT_DIR"
|
||||
|
||||
if [[ ! -w "$OUTPUT_DIR" ]]; then
|
||||
echo "[SYS-METRIC] 无法写入 FTP 输出目录: $OUTPUT_DIR" >&2
|
||||
echo " 请确认目录权限与 ARGUS_BUILD_UID/GID 一致" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
pushd "$PLUGIN_DIR" >/dev/null
|
||||
|
||||
echo "[SYS-METRIC] Bumping metric artifact version..."
|
||||
bash scripts/version-manager.sh bump minor
|
||||
|
||||
VERSION_FILE="config/VERSION"
|
||||
if [[ ! -f "$VERSION_FILE" ]]; then
|
||||
echo "[SYS-METRIC] VERSION 文件缺失: $VERSION_FILE" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
VERSION=$(tr -d '\n' < "$VERSION_FILE")
|
||||
echo "[SYS-METRIC] 当前版本: $VERSION"
|
||||
|
||||
echo "[SYS-METRIC] Packaging metric artifact..."
|
||||
bash scripts/package_artifact.sh --force
|
||||
|
||||
echo "[SYS-METRIC] Publishing artifact to FTP share..."
|
||||
bash scripts/publish_artifact.sh "$VERSION" --output-dir "$OUTPUT_DIR" --owner "$OWNER"
|
||||
|
||||
popd >/dev/null
|
||||
|
||||
echo "[SYS-METRIC] Metric artifact published to $OUTPUT_DIR"
|
||||
50
src/sys/tests/scripts/11_metric_node_install.sh
Executable file
50
src/sys/tests/scripts/11_metric_node_install.sh
Executable file
@ -0,0 +1,50 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
|
||||
if [[ -f "$TEST_ROOT/.env" ]]; then
|
||||
# shellcheck source=/dev/null
|
||||
source "$TEST_ROOT/.env"
|
||||
fi
|
||||
|
||||
CONTAINER="argus-metric-test-node"
|
||||
|
||||
if ! docker ps --format '{{.Names}}' | grep -q "^${CONTAINER}$"; then
|
||||
echo "[SYS-METRIC] 容器 ${CONTAINER} 未运行,无法执行安装" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
FTP_HOST="${FTP_SERVER:-172.29.0.40}"
|
||||
FTP_USER="${FTP_USER:-ftpuser}"
|
||||
FTP_PASSWORD="${FTP_PASSWORD:-ZGClab1234!}"
|
||||
FTP_PORT="${FTP_PORT:-21}"
|
||||
|
||||
echo "[SYS-METRIC] 在 ${CONTAINER} 内执行安装 (FTP: ${FTP_HOST}:${FTP_PORT})"
|
||||
|
||||
docker exec \
|
||||
-e FTP_HOST="$FTP_HOST" \
|
||||
-e FTP_USER="$FTP_USER" \
|
||||
-e FTP_PASSWORD="$FTP_PASSWORD" \
|
||||
-e FTP_PORT="$FTP_PORT" \
|
||||
"$CONTAINER" bash -c '
|
||||
set -e
|
||||
|
||||
if ! command -v curl &>/dev/null; then
|
||||
echo "[SYS-METRIC] curl 未安装,开始安装依赖..."
|
||||
apt-get update >/dev/null && apt-get install -y curl >/dev/null
|
||||
fi
|
||||
|
||||
cd /tmp
|
||||
echo "[SYS-METRIC] 下载 setup.sh..."
|
||||
curl -u "${FTP_USER}:${FTP_PASSWORD}" "ftp://${FTP_HOST}:${FTP_PORT}/setup.sh" -o setup.sh
|
||||
|
||||
echo "[SYS-METRIC] 执行安装..."
|
||||
chmod +x setup.sh
|
||||
bash setup.sh --server "${FTP_HOST}" --user "${FTP_USER}" --password "${FTP_PASSWORD}" --port "${FTP_PORT}"
|
||||
|
||||
echo "[SYS-METRIC] 安装完成"
|
||||
'
|
||||
|
||||
echo "[SYS-METRIC] Metric test node 安装流程完成"
|
||||
64
src/sys/tests/scripts/12_metric_gpu_install.sh
Executable file
64
src/sys/tests/scripts/12_metric_gpu_install.sh
Executable file
@ -0,0 +1,64 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
|
||||
ENABLE_GPU=${ARGUS_SYS_ENABLE_GPU:-false}
|
||||
|
||||
if [[ "$ENABLE_GPU" != "true" ]]; then
|
||||
echo "[SYS-METRIC] 未启用 GPU 流程,跳过 GPU 节点安装"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [[ -f "$TEST_ROOT/.env" ]]; then
|
||||
# shellcheck source=/dev/null
|
||||
source "$TEST_ROOT/.env"
|
||||
fi
|
||||
|
||||
CONTAINER="argus-metric-test-gpu-node"
|
||||
|
||||
if ! docker ps --format '{{.Names}}' | grep -q "^${CONTAINER}$"; then
|
||||
echo "[SYS-METRIC] 预期启动的 ${CONTAINER} 未运行" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
FTP_HOST="${FTP_SERVER:-172.29.0.40}"
|
||||
FTP_USER="${FTP_USER:-ftpuser}"
|
||||
FTP_PASSWORD="${FTP_PASSWORD:-ZGClab1234!}"
|
||||
FTP_PORT="${FTP_PORT:-21}"
|
||||
|
||||
echo "[SYS-METRIC] 在 GPU 节点执行安装 (FTP: ${FTP_HOST}:${FTP_PORT})"
|
||||
|
||||
docker exec \
|
||||
-e FTP_HOST="$FTP_HOST" \
|
||||
-e FTP_USER="$FTP_USER" \
|
||||
-e FTP_PASSWORD="$FTP_PASSWORD" \
|
||||
-e FTP_PORT="$FTP_PORT" \
|
||||
"$CONTAINER" bash -c '
|
||||
set -e
|
||||
|
||||
if ! command -v nvidia-smi &>/dev/null; then
|
||||
echo "[SYS-METRIC] GPU 节点缺少 nvidia-smi" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
nvidia-smi >/dev/null || true
|
||||
|
||||
if ! command -v curl &>/dev/null; then
|
||||
echo "[SYS-METRIC] curl 未安装,开始安装依赖..."
|
||||
apt-get update >/dev/null && apt-get install -y curl >/dev/null
|
||||
fi
|
||||
|
||||
cd /tmp
|
||||
echo "[SYS-METRIC] 下载 setup.sh..."
|
||||
curl -u "${FTP_USER}:${FTP_PASSWORD}" "ftp://${FTP_HOST}:${FTP_PORT}/setup.sh" -o setup.sh
|
||||
|
||||
echo "[SYS-METRIC] 执行安装..."
|
||||
chmod +x setup.sh
|
||||
bash setup.sh --server "${FTP_HOST}" --user "${FTP_USER}" --password "${FTP_PASSWORD}" --port "${FTP_PORT}"
|
||||
|
||||
echo "[SYS-METRIC] GPU 节点安装完成"
|
||||
'
|
||||
|
||||
echo "[SYS-METRIC] Metric GPU 节点安装流程完成"
|
||||
40
src/sys/tests/scripts/13_metric_verify.sh
Executable file
40
src/sys/tests/scripts/13_metric_verify.sh
Executable file
@ -0,0 +1,40 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
|
||||
echo "[SYS-METRIC] Verify: master"
|
||||
"$SCRIPT_DIR/13_metric_verify_master.sh"
|
||||
echo
|
||||
|
||||
echo "[SYS-METRIC] Verify: prometheus"
|
||||
PROM_RETRIES=${PROM_VERIFY_RETRIES:-2}
|
||||
PROM_BACKOFF=${PROM_VERIFY_BACKOFF_SECONDS:-30}
|
||||
attempt=0
|
||||
while true; do
|
||||
if "$SCRIPT_DIR/13_metric_verify_prometheus.sh"; then
|
||||
break
|
||||
fi
|
||||
attempt=$((attempt+1))
|
||||
if (( attempt > PROM_RETRIES )); then
|
||||
echo "[ERR] prometheus verify failed after $PROM_RETRIES retries" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "[WARN] prometheus verify failed; retry $attempt/$PROM_RETRIES after ${PROM_BACKOFF}s"
|
||||
sleep "$PROM_BACKOFF"
|
||||
done
|
||||
echo
|
||||
|
||||
echo "[SYS-METRIC] Verify: dataplane"
|
||||
"$SCRIPT_DIR/13_metric_verify_dataplane.sh"
|
||||
echo
|
||||
|
||||
echo "[SYS-METRIC] Verify: grafana"
|
||||
"$SCRIPT_DIR/13_metric_verify_grafana.sh"
|
||||
echo
|
||||
|
||||
echo "[SYS-METRIC] Verify: grafana panels"
|
||||
"$SCRIPT_DIR/13_metric_verify_grafana_panels.sh"
|
||||
echo
|
||||
|
||||
echo "[SYS-METRIC] Metric verification completed"
|
||||
47
src/sys/tests/scripts/13_metric_verify_dataplane.sh
Executable file
47
src/sys/tests/scripts/13_metric_verify_dataplane.sh
Executable file
@ -0,0 +1,47 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
TMP_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")"/.. && pwd)/tmp/metric-verify"
|
||||
mkdir -p "$TMP_DIR"
|
||||
|
||||
PROM_BASE="http://localhost:9090/api/v1"
|
||||
INSTANCE="${METRIC_TEST_INSTANCE:-172.29.0.50:9100}"
|
||||
IP_ONLY="${INSTANCE%%:*}"
|
||||
|
||||
echo "[VERIFY:DATA] node exporter metrics present in container"
|
||||
docker exec argus-metric-test-node bash -lc "curl -fsS --max-time 5 http://localhost:9100/metrics | head -n 5" > "$TMP_DIR/node_metrics_head.txt" || { echo "[ERR] cannot fetch node exporter metrics" >&2; exit 1; }
|
||||
if ! grep -E "node_(exporter_build_info|time_seconds)" -q "$TMP_DIR/node_metrics_head.txt"; then
|
||||
echo "[WARN] head did not show expected lines; continuing (exporter may output later lines)"
|
||||
fi
|
||||
echo "[OK] node exporter endpoint reachable"
|
||||
|
||||
echo "[VERIFY:DATA] Prometheus has recent sample for build_info"
|
||||
curl -fsS --max-time 5 --get "$PROM_BASE/query" --data-urlencode "query=node_exporter_build_info{job=\"node\",ip=\"$IP_ONLY\"}" > "$TMP_DIR/prom_ne_build_info_1.json"
|
||||
|
||||
python3 - "$TMP_DIR/prom_ne_build_info_1.json" <<'PY'
|
||||
import json,sys,time
|
||||
j=json.load(open(sys.argv[1]))
|
||||
res=j.get('data',{}).get('result',[])
|
||||
assert res, 'no result for node_exporter_build_info'
|
||||
ts=float(res[0]['value'][0])
|
||||
now=time.time()
|
||||
assert now-ts<180, f"sample too old: now={now} ts={ts}"
|
||||
print(int(ts))
|
||||
PY
|
||||
T1=$?
|
||||
sleep 30
|
||||
curl -fsS --max-time 5 --get "$PROM_BASE/query" --data-urlencode "query=node_exporter_build_info{job=\"node\",ip=\"$IP_ONLY\"}" > "$TMP_DIR/prom_ne_build_info_2.json"
|
||||
|
||||
TS1=$(python3 - "$TMP_DIR/prom_ne_build_info_1.json" <<'PY'
|
||||
import json,sys
|
||||
print(float(json.load(open(sys.argv[1]))['data']['result'][0]['value'][0]))
|
||||
PY
|
||||
)
|
||||
TS2=$(python3 - "$TMP_DIR/prom_ne_build_info_2.json" <<'PY'
|
||||
import json,sys
|
||||
print(float(json.load(open(sys.argv[1]))['data']['result'][0]['value'][0]))
|
||||
PY
|
||||
)
|
||||
awk -v a="$TS1" -v b="$TS2" 'BEGIN{ if (b>=a) exit 0; else exit 1 }' || { echo "[ERR] sample timestamp did not advance" >&2; exit 1; }
|
||||
echo "[OK] sample timestamp advanced"
|
||||
echo "[DONE] dataplane verify"
|
||||
39
src/sys/tests/scripts/13_metric_verify_grafana.sh
Executable file
39
src/sys/tests/scripts/13_metric_verify_grafana.sh
Executable file
@ -0,0 +1,39 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
PROM_DOMAIN="prom.metric.argus.com:9090"
|
||||
GRAF="http://localhost:3000"
|
||||
|
||||
echo "[VERIFY:GRAFANA] /api/health"
|
||||
TMP_FILE="$(cd "$(dirname "$0")"/.. && pwd)/tmp/metric-verify/graf_health.json"
|
||||
mkdir -p "$(dirname "$TMP_FILE")"
|
||||
curl -fsS --max-time 10 "$GRAF/api/health" -o "$TMP_FILE" || { echo "[ERR] failed to GET /api/health" >&2; exit 1; }
|
||||
python3 - "$TMP_FILE" <<'PY'
|
||||
import sys,json
|
||||
with open(sys.argv[1],'r',encoding='utf-8') as f:
|
||||
j=json.load(f)
|
||||
assert j.get('database')=='ok', f"health not ok: {j}"
|
||||
print('OK')
|
||||
PY
|
||||
|
||||
echo "[VERIFY:GRAFANA] datasource URL uses domain: $PROM_DOMAIN"
|
||||
DS_FILE="/private/argus/metric/grafana/provisioning/datasources/datasources.yml"
|
||||
if ! docker exec argus-grafana sh -lc "test -f $DS_FILE"; then
|
||||
DS_FILE="/etc/grafana/provisioning/datasources/datasources.yml"
|
||||
fi
|
||||
docker exec argus-grafana sh -lc "grep -E 'url:\s*http://$PROM_DOMAIN' '$DS_FILE'" >/dev/null 2>&1 || { echo "[ERR] datasource not pointing to $PROM_DOMAIN" >&2; exit 1; }
|
||||
echo "[OK] datasource points to domain"
|
||||
|
||||
echo "[VERIFY:GRAFANA] bind resolution inside grafana"
|
||||
tries=0
|
||||
until docker exec argus-grafana getent hosts prom.metric.argus.com >/dev/null 2>&1; do
|
||||
tries=$((tries+1))
|
||||
if (( tries > 24 )); then
|
||||
echo "[ERR] grafana cannot resolve prom.metric.argus.com" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "[..] waiting DNS propagation in grafana ($tries/24)"; sleep 5
|
||||
done
|
||||
echo "[OK] domain resolves"
|
||||
|
||||
echo "[DONE] grafana verify"
|
||||
70
src/sys/tests/scripts/13_metric_verify_grafana_panels.sh
Executable file
70
src/sys/tests/scripts/13_metric_verify_grafana_panels.sh
Executable file
@ -0,0 +1,70 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
TMP_DIR="$TEST_ROOT/tmp/metric-verify"
|
||||
mkdir -p "$TMP_DIR"
|
||||
|
||||
GRAF="http://localhost:3000"
|
||||
HOSTNAME="${METRIC_TEST_HOSTNAME:-test-metric-node-001}"
|
||||
|
||||
echo "[VERIFY:GRAF-PANELS] resolve Prometheus datasource UID via Grafana"
|
||||
DS_JSON="$TMP_DIR/graf_ds.json"
|
||||
curl -fsS --max-time 10 "$GRAF/api/datasources" >"$DS_JSON"
|
||||
DS_UID=$(python3 - "$DS_JSON" <<'PY'
|
||||
import json,sys
|
||||
arr=json.load(open(sys.argv[1]))
|
||||
for ds in arr:
|
||||
if (ds.get('type')=='prometheus'):
|
||||
print(ds.get('uid',''))
|
||||
break
|
||||
PY
|
||||
)
|
||||
if [[ -z "$DS_UID" ]]; then echo "[ERR] no prometheus datasource found in grafana" >&2; exit 1; fi
|
||||
echo "[OK] Prometheus DS UID=$DS_UID"
|
||||
|
||||
proxy_query() {
|
||||
local q="$1"; local out="$2"
|
||||
curl -fsS --max-time 10 --get "$GRAF/api/datasources/proxy/uid/$DS_UID/api/v1/query" \
|
||||
--data-urlencode "query=$q" >"$out"
|
||||
}
|
||||
|
||||
assert_vector_recent_nonempty() {
|
||||
local json="$1"; local max_age_sec="${2:-180}"
|
||||
python3 - <<'PY' "$json" "$max_age_sec"
|
||||
import json,sys,time
|
||||
doc=json.load(open(sys.argv[1]))
|
||||
if doc.get('status')!='success':
|
||||
raise SystemExit('prom status != success')
|
||||
res=doc.get('data',{}).get('result',[])
|
||||
assert res, 'empty result'
|
||||
ts=float(res[0]['value'][0])
|
||||
assert time.time()-ts < float(sys.argv[2]), f'timestamp too old: {ts}'
|
||||
print(int(ts))
|
||||
PY
|
||||
}
|
||||
|
||||
echo "[VERIFY:GRAF-PANELS] Dashboard: Node and GPU Metrics — System Load"
|
||||
Q_NODE_LOAD="node_load1{hostname=\"$HOSTNAME\"}"
|
||||
proxy_query "$Q_NODE_LOAD" "$TMP_DIR/graf_panel_node_load.json"
|
||||
assert_vector_recent_nonempty "$TMP_DIR/graf_panel_node_load.json" 300 >/dev/null
|
||||
echo "[OK] node_load1 has recent sample via Grafana proxy"
|
||||
|
||||
echo "[VERIFY:GRAF-PANELS] Dashboard: Cluster Dashboard — Node online count"
|
||||
Q_NODE_ONLINE='count(count by(hostname) (up{job="node"} == 1))'
|
||||
proxy_query "$Q_NODE_ONLINE" "$TMP_DIR/graf_panel_node_online.json"
|
||||
python3 - "$TMP_DIR/graf_panel_node_online.json" <<'PY'
|
||||
import json,sys
|
||||
doc=json.load(open(sys.argv[1]))
|
||||
assert doc.get('status')=='success', 'prom status not success'
|
||||
res=doc.get('data',{}).get('result',[])
|
||||
assert res, 'no series for node online count'
|
||||
val=float(res[0]['value'][1])
|
||||
assert val>=1, f'node online < 1: {val}'
|
||||
print('OK',val)
|
||||
PY
|
||||
echo "[OK] cluster node online count >= 1 via Grafana proxy"
|
||||
|
||||
echo "[DONE] grafana panels verify"
|
||||
|
||||
105
src/sys/tests/scripts/13_metric_verify_master.sh
Executable file
105
src/sys/tests/scripts/13_metric_verify_master.sh
Executable file
@ -0,0 +1,105 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
TMP_DIR="$TEST_ROOT/tmp/metric-verify"
|
||||
mkdir -p "$TMP_DIR"
|
||||
|
||||
MASTER_BASE="http://localhost:32300/api/v1/master"
|
||||
HOSTNAME="${METRIC_TEST_HOSTNAME:-test-metric-node-001}"
|
||||
|
||||
curl_json() { curl -fsS --max-time 5 "$1"; }
|
||||
|
||||
echo "[VERIFY:MASTER] list nodes and locate target hostname=$HOSTNAME"
|
||||
ALL_NODES_JSON="$TMP_DIR/master_nodes.json"
|
||||
|
||||
# 重试等待节点出现在 /nodes 列表(最多 120s)
|
||||
NODE_ID=""
|
||||
for attempt in {1..24}; do
|
||||
curl_json "$MASTER_BASE/nodes" > "$ALL_NODES_JSON" || true
|
||||
NODE_ID=$(python3 - "$ALL_NODES_JSON" "$HOSTNAME" <<'PY'
|
||||
import json,sys
|
||||
try:
|
||||
nodes=json.load(open(sys.argv[1]))
|
||||
except Exception:
|
||||
nodes=[]
|
||||
name=sys.argv[2]
|
||||
for n in nodes:
|
||||
if n.get('name')==name:
|
||||
print(n.get('id',''))
|
||||
break
|
||||
PY
|
||||
)
|
||||
if [[ -n "$NODE_ID" ]]; then break; fi
|
||||
echo "[..] waiting node to appear in /nodes ($attempt/24)"; sleep 5
|
||||
done
|
||||
|
||||
if [[ -z "$NODE_ID" ]]; then
|
||||
echo "[ERR] master /nodes 中未找到 $HOSTNAME(等待超时)" >&2
|
||||
echo "[HINT] 当前 /nodes 列表如下:" >&2
|
||||
sed -n '1,160p' "$ALL_NODES_JSON" >&2 || true
|
||||
exit 1
|
||||
fi
|
||||
echo "[OK] node id=$NODE_ID"
|
||||
|
||||
echo "[VERIFY:MASTER] get node detail and assert fields"
|
||||
DETAIL1_JSON="$TMP_DIR/master_node_${NODE_ID}_detail_1.json"
|
||||
curl_json "$MASTER_BASE/nodes/$NODE_ID" > "$DETAIL1_JSON"
|
||||
|
||||
# 基础字段与健康项检查(不强制立即 online)
|
||||
python3 - "$DETAIL1_JSON" "$HOSTNAME" <<'PY'
|
||||
import json,sys,datetime
|
||||
j=json.load(open(sys.argv[1]))
|
||||
host=sys.argv[2]
|
||||
assert j.get('name')==host, f"name mismatch: {j.get('name')} != {host}"
|
||||
status=j.get('status')
|
||||
assert status in ('initialized','online','offline'), f"unexpected status: {status}"
|
||||
md=j.get('meta_data',{})
|
||||
assert md.get('hostname',j.get('name'))==host, 'meta_data.hostname mismatch'
|
||||
assert 'last_report' in j and j['last_report'], 'last_report missing'
|
||||
h=j.get('health',{})
|
||||
for key in ('metric-node-exporter','metric-fluent-bit','metric-argus-agent'):
|
||||
if key in h:
|
||||
assert h[key].get('status')=='healthy', f"{key} not healthy: {h[key]}"
|
||||
print('OK')
|
||||
PY
|
||||
|
||||
# 轮询等待 last_report 前进并最终转为 online(最多 90s),容忍短暂 5xx/网络错误
|
||||
attempt=0
|
||||
T_PRE=0
|
||||
until [[ $attempt -ge 18 ]]; do
|
||||
sleep 5
|
||||
DETAIL_CUR="$TMP_DIR/master_node_${NODE_ID}_detail_cur.json"
|
||||
if ! curl_json "$MASTER_BASE/nodes/$NODE_ID" > "$DETAIL_CUR" 2>/dev/null; then
|
||||
echo "[..] retrying node detail fetch ($attempt/18)"; ((attempt++)); continue
|
||||
fi
|
||||
read -r STATUS_CUR T_CUR < <(python3 - "$DETAIL_CUR" <<'PY'
|
||||
import json,sys,datetime
|
||||
j=json.load(open(sys.argv[1]))
|
||||
st=j.get('status','')
|
||||
ts=j.get('last_report','')
|
||||
if ts.endswith('Z'): ts=ts.replace('Z','+00:00')
|
||||
try:
|
||||
t=float(datetime.datetime.fromisoformat(ts).timestamp())
|
||||
except Exception:
|
||||
t=0.0
|
||||
print(st)
|
||||
print(t)
|
||||
PY
|
||||
)
|
||||
if awk -v a="$T_PRE" -v b="$T_CUR" 'BEGIN{exit !(b>a)}'; then
|
||||
T_PRE="$T_CUR"
|
||||
fi
|
||||
if [[ "$STATUS_CUR" == "online" ]]; then
|
||||
echo "[OK] status online and last_report progressed"
|
||||
break
|
||||
fi
|
||||
((attempt++))
|
||||
done
|
||||
if (( attempt >= 18 )) && [[ "$STATUS_CUR" != "online" ]]; then
|
||||
echo "[WARN] status did not reach online within timeout; continuing"
|
||||
fi
|
||||
|
||||
echo "$NODE_ID" > "$TMP_DIR/node_id_metric"
|
||||
echo "[DONE] master verify"
|
||||
142
src/sys/tests/scripts/13_metric_verify_prometheus.sh
Executable file
142
src/sys/tests/scripts/13_metric_verify_prometheus.sh
Executable file
@ -0,0 +1,142 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
TMP_DIR="$TEST_ROOT/tmp/metric-verify"
|
||||
mkdir -p "$TMP_DIR"
|
||||
|
||||
PROM_BASE="http://localhost:9090/api/v1"
|
||||
HOSTNAME="${METRIC_TEST_HOSTNAME:-test-metric-node-001}"
|
||||
|
||||
nodes_json="$TEST_ROOT/private/argus/metric/prometheus/nodes.json"
|
||||
targets_json="$TEST_ROOT/private/argus/metric/prometheus/targets/node_exporter.json"
|
||||
|
||||
echo "[VERIFY:PROM] nodes.json present and contains hostname=$HOSTNAME"
|
||||
[[ -f "$nodes_json" ]] || { echo "[ERR] $nodes_json missing" >&2; exit 1; }
|
||||
python3 - "$nodes_json" "$HOSTNAME" <<'PY'
|
||||
import json,sys
|
||||
arr=json.load(open(sys.argv[1]))
|
||||
host=sys.argv[2]
|
||||
assert any((i.get('hostname')==host) for i in arr), f"{host} not found in nodes.json"
|
||||
PY
|
||||
echo "[OK] nodes.json contains target"
|
||||
|
||||
echo "[VERIFY:PROM] file_sd targets exist for nodes.json entries"
|
||||
[[ -f "$targets_json" ]] || { echo "[ERR] $targets_json missing" >&2; exit 1; }
|
||||
python3 - "$nodes_json" "$targets_json" "$HOSTNAME" >"$TMP_DIR/prom_targets_ip_inst.txt" <<'PY'
|
||||
import json,sys
|
||||
nodes=json.load(open(sys.argv[1]))
|
||||
file_sd=json.load(open(sys.argv[2]))
|
||||
host=sys.argv[3]
|
||||
targets=set()
|
||||
for item in file_sd:
|
||||
for t in item.get('targets',[]): targets.add(t)
|
||||
# choose node matching hostname; fallback to first metric user node; otherwise first
|
||||
sel = None
|
||||
for n in nodes:
|
||||
if n.get('hostname') == host:
|
||||
sel = n
|
||||
break
|
||||
if not sel:
|
||||
for n in nodes:
|
||||
if n.get('user_id') == 'metric':
|
||||
sel = n
|
||||
break
|
||||
if not sel and nodes:
|
||||
sel = nodes[0]
|
||||
if not sel:
|
||||
raise SystemExit('nodes.json empty or no suitable node found')
|
||||
ip = sel['ip']
|
||||
inst = f"{ip}:9100"
|
||||
print(ip)
|
||||
print(inst)
|
||||
PY
|
||||
IP_FIRST=$(sed -n '1p' "$TMP_DIR/prom_targets_ip_inst.txt")
|
||||
INSTANCE=$(sed -n '2p' "$TMP_DIR/prom_targets_ip_inst.txt")
|
||||
echo "[INFO] expecting instance in file_sd: $INSTANCE"
|
||||
|
||||
# 尝试在 Prometheus 容器内主动刷新 targets(可选加速)
|
||||
if docker ps --format '{{.Names}}' | grep -q '^argus-prometheus$'; then
|
||||
echo "[..] triggering update_targets inside argus-prometheus"
|
||||
docker exec argus-prometheus bash -lc \
|
||||
'python3 /usr/local/bin/update_targets.py --config /private/argus/metric/prometheus/nodes.json --targets-dir /private/argus/metric/prometheus/targets >/dev/null 2>&1 || true'
|
||||
fi
|
||||
|
||||
# 给 Prometheus 一次初始 scrape 周期
|
||||
sleep 10
|
||||
|
||||
# 若短暂未生成,进行重试(最多 180s),期间多次触发刷新
|
||||
retry=0
|
||||
until jq -r '.[].targets[]' "$targets_json" 2>/dev/null | grep -q "^${IP_FIRST}:9100$"; do
|
||||
if (( retry >= 36 )); then
|
||||
echo "[ERR] ${IP_FIRST}:9100 not present in file_sd after timeout" >&2
|
||||
echo "[HINT] current targets file content:" >&2
|
||||
sed -n '1,200p' "$targets_json" >&2 || true
|
||||
exit 1
|
||||
fi
|
||||
if (( retry % 3 == 0 )) && docker ps --format '{{.Names}}' | grep -q '^argus-prometheus$'; then
|
||||
docker exec argus-prometheus bash -lc \
|
||||
'python3 /usr/local/bin/update_targets.py --config /private/argus/metric/prometheus/nodes.json --targets-dir /private/argus/metric/prometheus/targets >/dev/null 2>&1 || true'
|
||||
fi
|
||||
echo "[..] waiting file_sd refresh ($retry/36)"; sleep 5; ((retry++))
|
||||
done
|
||||
|
||||
# 改为以 PromQL up 指标作为健康依据,避免 targets 页面状态抖动
|
||||
echo "[VERIFY:PROM] up{job=\"node\",ip=\"$IP_FIRST\"} > 0"
|
||||
attempt=0
|
||||
until (( attempt >= 60 )); do
|
||||
curl -fsS --max-time 5 --get "$PROM_BASE/query" --data-urlencode "query=up{job=\"node\",ip=\"$IP_FIRST\"}" > "$TMP_DIR/prom_up_inst_active.json" || true
|
||||
if python3 - "$TMP_DIR/prom_up_inst_active.json" <<'PY'
|
||||
import json,sys
|
||||
try:
|
||||
j=json.load(open(sys.argv[1]))
|
||||
except Exception:
|
||||
raise SystemExit(1)
|
||||
res=j.get('data',{}).get('result',[])
|
||||
if res:
|
||||
try:
|
||||
val=float(res[0]['value'][1])
|
||||
if val>0: raise SystemExit(0)
|
||||
except Exception:
|
||||
pass
|
||||
raise SystemExit(1)
|
||||
PY
|
||||
then
|
||||
echo "[OK] up > 0 (control-plane scrape works)"; break
|
||||
fi
|
||||
if (( attempt % 6 == 0 )) && docker ps --format '{{.Names}}' | grep -q '^argus-prometheus$'; then
|
||||
docker exec argus-prometheus bash -lc \
|
||||
'python3 /usr/local/bin/update_targets.py --config /private/argus/metric/prometheus/nodes.json --targets-dir /private/argus/metric/prometheus/targets >/dev/null 2>&1 || true'
|
||||
fi
|
||||
echo "[..] waiting up{job=\"node\",ip=\"$IP_FIRST\"} > 0 ($attempt/60)"; sleep 5; ((attempt++))
|
||||
done
|
||||
if (( attempt >= 60 )); then
|
||||
echo "[ERR] up{job=\"node\",ip=\"$IP_FIRST\"} did not become > 0" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "[VERIFY:PROM] instant up query > 0"
|
||||
curl -fsS --max-time 5 --get "$PROM_BASE/query" --data-urlencode "query=up{job=\"node\",ip=\"$IP_FIRST\"}" > "$TMP_DIR/prom_up_inst.json"
|
||||
python3 - "$TMP_DIR/prom_up_inst.json" <<'PY'
|
||||
import json,sys
|
||||
j=json.load(open(sys.argv[1]))
|
||||
res=j.get('data',{}).get('result',[])
|
||||
assert res, 'empty result for up{job="node",instance=...}'
|
||||
val=float(res[0]['value'][1])
|
||||
assert val>0, f"up value not > 0: {val}"
|
||||
PY
|
||||
echo "[OK] up > 0"
|
||||
|
||||
echo "[VERIFY:PROM] count(up{job=\"node\"}==1) >= 1"
|
||||
curl -fsS --max-time 5 --get "$PROM_BASE/query" --data-urlencode "query=count(up{job=\"node\"}==1)" > "$TMP_DIR/prom_up_count.json"
|
||||
python3 - "$TMP_DIR/prom_up_count.json" <<'PY'
|
||||
import json,sys
|
||||
j=json.load(open(sys.argv[1]))
|
||||
res=j.get('data',{}).get('result',[])
|
||||
assert res, 'empty result for count(up{job="node"}==1)'
|
||||
val=float(res[0]['value'][1])
|
||||
assert val>=1, f"count < 1: {val}"
|
||||
PY
|
||||
echo "[OK] up count satisfied"
|
||||
echo "[DONE] prometheus verify"
|
||||
18
src/sys/tests/scripts/14_metric_cleanup.sh
Executable file
18
src/sys/tests/scripts/14_metric_cleanup.sh
Executable file
@ -0,0 +1,18 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
|
||||
FTP_SHARE="$TEST_ROOT/private/argus/metric/ftp/share"
|
||||
|
||||
if [[ -d "$FTP_SHARE" ]]; then
|
||||
echo "[SYS-METRIC] 清理 FTP 发布产物..."
|
||||
rm -f "$FTP_SHARE"/argus-metric_*.tar.gz 2>/dev/null || true
|
||||
rm -f "$FTP_SHARE"/LATEST_VERSION 2>/dev/null || true
|
||||
rm -f "$FTP_SHARE"/dns.conf "$FTP_SHARE"/setup.sh 2>/dev/null || true
|
||||
else
|
||||
echo "[SYS-METRIC] FTP 目录不存在,跳过清理"
|
||||
fi
|
||||
|
||||
echo "[SYS-METRIC] Metric 清理完成"
|
||||
45
src/sys/tests/scripts/metric/test-node-entrypoint.sh
Executable file
45
src/sys/tests/scripts/metric/test-node-entrypoint.sh
Executable file
@ -0,0 +1,45 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}
|
||||
ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}
|
||||
AGENT_ROOT=${AGENT_ROOT:-/private/argus/agent}
|
||||
PREPARED_FLAG="/tmp/.metric_node_prepared"
|
||||
|
||||
export DEBIAN_FRONTEND=${DEBIAN_FRONTEND:-noninteractive}
|
||||
|
||||
if [[ ! -f "$PREPARED_FLAG" ]]; then
|
||||
apt-get update -qq
|
||||
apt-get install -y -qq \
|
||||
curl \
|
||||
net-tools \
|
||||
iproute2 \
|
||||
lsof \
|
||||
procps \
|
||||
ca-certificates \
|
||||
gnupg2 || {
|
||||
echo "[metric-node] Failed to install base packages" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
mkdir -p "$(dirname "$PREPARED_FLAG")"
|
||||
touch "$PREPARED_FLAG"
|
||||
fi
|
||||
|
||||
if [[ -n "${TZ:-}" ]]; then
|
||||
ln -snf "/usr/share/zoneinfo/${TZ}" /etc/localtime 2>/dev/null || true
|
||||
echo "$TZ" > /etc/timezone 2>/dev/null || true
|
||||
fi
|
||||
|
||||
mkdir -p "$AGENT_ROOT"
|
||||
chown -R "${ARGUS_BUILD_UID}:${ARGUS_BUILD_GID}" "$AGENT_ROOT" 2>/dev/null || true
|
||||
|
||||
if [[ "${METRIC_NODE_ROLE:-cpu}" == "gpu" ]]; then
|
||||
if ! command -v nvidia-smi >/dev/null 2>&1; then
|
||||
echo "[metric-node] nvidia-smi not available but GPU role requested" >&2
|
||||
exit 1
|
||||
fi
|
||||
nvidia-smi || true
|
||||
fi
|
||||
|
||||
exec "$@"
|
||||
@ -46,7 +46,9 @@ fi
|
||||
# Start Fluent Bit in background (will block, so run via bash -lc &)
|
||||
if [[ -x /private/start-fluent-bit.sh ]]; then
|
||||
log "starting fluent-bit"
|
||||
bash -lc '/private/start-fluent-bit.sh' &
|
||||
sysctl -w fs.inotify.max_user_instances=512 >/dev/null 2>&1 || true
|
||||
sysctl -w fs.inotify.max_user_watches=524288 >/dev/null 2>&1 || true
|
||||
bash -lc 'ulimit -n 65536 || true; exec /private/start-fluent-bit.sh' &
|
||||
else
|
||||
log "missing /private/start-fluent-bit.sh; fluent-bit will not start"
|
||||
fi
|
||||
@ -54,4 +56,3 @@ fi
|
||||
# Start agent in foreground as runtime user
|
||||
log "starting argus-agent"
|
||||
exec su -s /bin/bash -c /usr/local/bin/argus-agent "$RUNTIME_USER"
|
||||
|
||||
|
||||
@ -24,24 +24,37 @@ RUN apt-get update && \
|
||||
apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
ENV FRONTEND_BASE_PATH=/private/argus/web/frontend
|
||||
ARG ARGUS_UID=2133
|
||||
ARG ARGUS_GID=2015
|
||||
ENV ARGUS_UID=${ARGUS_UID}
|
||||
ENV ARGUS_GID=${ARGUS_GID}
|
||||
ARG ARGUS_BUILD_UID=2133
|
||||
ARG ARGUS_BUILD_GID=2015
|
||||
ENV ARGUS_BUILD_UID=${ARGUS_BUILD_UID}
|
||||
ENV ARGUS_BUILD_GID=${ARGUS_BUILD_GID}
|
||||
|
||||
RUN mkdir -p ${FRONTEND_BASE_PATH} && \
|
||||
mkdir -p /private/argus/etc
|
||||
|
||||
# 创建 web 用户(可自定义 UID/GID)
|
||||
# 创建 web 用户组
|
||||
RUN groupadd -g ${ARGUS_GID} web
|
||||
|
||||
# 创建 web 用户并指定组
|
||||
RUN useradd -M -s /usr/sbin/nologin -u ${ARGUS_UID} -g ${ARGUS_GID} web
|
||||
|
||||
RUN chown -R web:web ${FRONTEND_BASE_PATH} && \
|
||||
chown -R web:web /private/argus/etc && \
|
||||
chown -R web:web /usr/local/bin
|
||||
RUN set -eux; \
|
||||
# 确保目标 GID 存在(组名可不固定)\
|
||||
if ! getent group "${ARGUS_BUILD_GID}" >/dev/null; then \
|
||||
groupadd -g "${ARGUS_BUILD_GID}" web || true; \
|
||||
fi; \
|
||||
# 若存在 web 用户则尽量对齐 UID/GID;否则仅在 UID 未被占用时创建
|
||||
if id web >/dev/null 2>&1; then \
|
||||
current_uid="$(id -u web)"; \
|
||||
if [ "$current_uid" != "${ARGUS_BUILD_UID}" ] && ! getent passwd "${ARGUS_BUILD_UID}" >/dev/null; then \
|
||||
usermod -u "${ARGUS_BUILD_UID}" web; \
|
||||
fi; \
|
||||
usermod -g "${ARGUS_BUILD_GID}" web || true; \
|
||||
else \
|
||||
if ! getent passwd "${ARGUS_BUILD_UID}" >/dev/null; then \
|
||||
useradd -M -s /usr/sbin/nologin -u "${ARGUS_BUILD_UID}" -g "${ARGUS_BUILD_GID}" web; \
|
||||
else \
|
||||
echo "UID ${ARGUS_BUILD_UID} already exists; skip creating user 'web'"; \
|
||||
fi; \
|
||||
fi; \
|
||||
# 用数值 UID:GID 赋权,避免依赖用户名/组名
|
||||
chown -R "${ARGUS_BUILD_UID}:${ARGUS_BUILD_GID}" ${FRONTEND_BASE_PATH} /private/argus/etc /usr/local/bin || true
|
||||
|
||||
# 配置内网 apt 源 (如果指定了内网选项)
|
||||
RUN if [ "$USE_INTRANET" = "true" ]; then \
|
||||
|
||||
@ -4,7 +4,7 @@ docker pull ubuntu:24.04
|
||||
source src/web/tests/.env
|
||||
|
||||
docker build \
|
||||
--build-arg ARGUS_UID=${ARGUS_UID} \
|
||||
--build-arg ARGUS_GID=${ARGUS_GID} \
|
||||
--build-arg ARGUS_BUILD_UID=${ARGUS_BUILD_UID} \
|
||||
--build-arg ARGUS_BUILD_GID=${ARGUS_BUILD_GID} \
|
||||
-f src/web/build_tools/frontend/Dockerfile -t argus-web-frontend:latest .
|
||||
docker save -o argus-web-frontend-latest.tar argus-web-frontend:latest
|
||||
|
||||
@ -8,8 +8,8 @@ DNS_SCRIPT="${DNS_DIR}/update-dns.sh"
|
||||
DOMAIN=web.argus.com
|
||||
WEB_DOMAIN_FILE="${DNS_DIR}/${DOMAIN}"
|
||||
RUNTIME_USER="${ARGUS_RUNTIME_USER:-argus}"
|
||||
RUNTIME_UID="${ARGUS_UID:-2133}"
|
||||
RUNTIME_GID="${ARGUS_GID:-2015}"
|
||||
RUNTIME_UID="${ARGUS_BUILD_UID:-2133}"
|
||||
RUNTIME_GID="${ARGUS_BUILD_GID:-2015}"
|
||||
|
||||
mkdir -p "$DNS_DIR"
|
||||
chown -R "$RUNTIME_UID:$RUNTIME_GID" "$DNS_DIR" 2>/dev/null || true
|
||||
|
||||
@ -8,24 +8,34 @@ RUN apt-get update && \
|
||||
apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
ENV FRONTEND_BASE_PATH=/private/argus/web/proxy
|
||||
ARG ARGUS_UID=2133
|
||||
ARG ARGUS_GID=2015
|
||||
ENV ARGUS_UID=${ARGUS_UID}
|
||||
ENV ARGUS_GID=${ARGUS_GID}
|
||||
ARG ARGUS_BUILD_UID=2133
|
||||
ARG ARGUS_BUILD_GID=2015
|
||||
ENV ARGUS_BUILD_UID=${ARGUS_BUILD_UID}
|
||||
ENV ARGUS_BUILD_GID=${ARGUS_BUILD_GID}
|
||||
|
||||
RUN mkdir -p ${FRONTEND_BASE_PATH} && \
|
||||
mkdir -p /private/argus/etc
|
||||
|
||||
# 创建 proxy 用户(可自定义 UID/GID)
|
||||
# 创建 proxy 用户组
|
||||
RUN groupadd -g ${ARGUS_GID} web_proxy
|
||||
|
||||
# 创建 proxy 用户并指定组
|
||||
RUN useradd -M -s /usr/sbin/nologin -u ${ARGUS_UID} -g ${ARGUS_GID} web_proxy
|
||||
|
||||
RUN chown -R web_proxy:web_proxy ${FRONTEND_BASE_PATH} && \
|
||||
chown -R web_proxy:web_proxy /private/argus/etc && \
|
||||
chown -R web_proxy:web_proxy /usr/local/bin
|
||||
RUN set -eux; \
|
||||
if ! getent group "${ARGUS_BUILD_GID}" >/dev/null; then \
|
||||
groupadd -g "${ARGUS_BUILD_GID}" web_proxy || true; \
|
||||
fi; \
|
||||
if id web_proxy >/dev/null 2>&1; then \
|
||||
current_uid="$(id -u web_proxy)"; \
|
||||
if [ "$current_uid" != "${ARGUS_BUILD_UID}" ] && ! getent passwd "${ARGUS_BUILD_UID}" >/dev/null; then \
|
||||
usermod -u "${ARGUS_BUILD_UID}" web_proxy; \
|
||||
fi; \
|
||||
usermod -g "${ARGUS_BUILD_GID}" web_proxy || true; \
|
||||
else \
|
||||
if ! getent passwd "${ARGUS_BUILD_UID}" >/dev/null; then \
|
||||
useradd -M -s /usr/sbin/nologin -u "${ARGUS_BUILD_UID}" -g "${ARGUS_BUILD_GID}" web_proxy; \
|
||||
else \
|
||||
echo "UID ${ARGUS_BUILD_UID} already exists; skip creating user 'web_proxy'"; \
|
||||
fi; \
|
||||
fi; \
|
||||
chown -R "${ARGUS_BUILD_UID}:${ARGUS_BUILD_GID}" ${FRONTEND_BASE_PATH} /private/argus/etc /usr/local/bin || true
|
||||
|
||||
# 配置内网 apt 源 (如果指定了内网选项)
|
||||
RUN if [ "$USE_INTRANET" = "true" ]; then \
|
||||
|
||||
@ -3,7 +3,7 @@ docker pull ubuntu:24.04
|
||||
source src/web/tests/.env
|
||||
|
||||
docker build \
|
||||
--build-arg ARGUS_UID=${ARGUS_UID} \
|
||||
--build-arg ARGUS_GID=${ARGUS_GID} \
|
||||
--build-arg ARGUS_BUILD_UID=${ARGUS_BUILD_UID} \
|
||||
--build-arg ARGUS_BUILD_GID=${ARGUS_BUILD_GID} \
|
||||
-f src/web/build_tools/proxy/Dockerfile -t argus-web-proxy:latest .
|
||||
docker save -o argus-web-proxy-latest.tar argus-web-proxy:latest
|
||||
|
||||
@ -9,8 +9,8 @@ DNS_CONF_PRIVATE="/private/argus/etc/dns.conf"
|
||||
DNS_CONF_SYSTEM="/etc/resolv.conf"
|
||||
DNS_DIR="/private/argus/etc"
|
||||
DNS_SCRIPT="${DNS_DIR}/update-dns.sh"
|
||||
RUNTIME_UID="${ARGUS_UID:-2133}"
|
||||
RUNTIME_GID="${ARGUS_GID:-2015}"
|
||||
RUNTIME_UID="${ARGUS_BUILD_UID:-2133}"
|
||||
RUNTIME_GID="${ARGUS_BUILD_GID:-2015}"
|
||||
|
||||
mkdir -p "$DNS_DIR"
|
||||
chown -R "$RUNTIME_UID:$RUNTIME_GID" "$DNS_DIR" 2>/dev/null || true
|
||||
|
||||
@ -4,15 +4,15 @@ services:
|
||||
context: ../../../
|
||||
dockerfile: src/web/build_tools/frontend/Dockerfile
|
||||
args:
|
||||
ARGUS_UID: ${ARGUS_UID:-2133}
|
||||
ARGUS_GID: ${ARGUS_GID:-2015}
|
||||
ARGUS_BUILD_UID: ${ARGUS_BUILD_UID:-2133}
|
||||
ARGUS_BUILD_GID: ${ARGUS_BUILD_GID:-2015}
|
||||
USE_INTRANET: ${USE_INTRANET:-false}
|
||||
image: argus-web-frontend:latest
|
||||
container_name: argus-web-frontend
|
||||
environment:
|
||||
- ALERTMANAGER_BASE_PATH=/private/argus/web/frontend
|
||||
- ARGUS_UID=${ARGUS_UID:-2133}
|
||||
- ARGUS_GID=${ARGUS_GID:-2015}
|
||||
- ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}
|
||||
- ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}
|
||||
ports:
|
||||
- "${ARGUS_WEB_PORT:-8080}:80"
|
||||
volumes:
|
||||
@ -31,14 +31,14 @@ services:
|
||||
context: ../../../
|
||||
dockerfile: src/web/build_tools/proxy/Dockerfile
|
||||
args:
|
||||
ARGUS_UID: ${ARGUS_UID:-2133}
|
||||
ARGUS_GID: ${ARGUS_GID:-2015}
|
||||
ARGUS_BUILD_UID: ${ARGUS_BUILD_UID:-2133}
|
||||
ARGUS_BUILD_GID: ${ARGUS_BUILD_GID:-2015}
|
||||
USE_INTRANET: ${USE_INTRANET:-false}
|
||||
image: argus-web-proxy:latest
|
||||
container_name: argus-web-proxy
|
||||
environment:
|
||||
- ARGUS_UID=${ARGUS_UID:-2133}
|
||||
- ARGUS_GID=${ARGUS_GID:-2015}
|
||||
- ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}
|
||||
- ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}
|
||||
ports:
|
||||
- "8088:80"
|
||||
volumes:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user