[#29] 完成proxy到web/grafana/prom/alert,遗留web 列表也信息获取不到
This commit is contained in:
parent
1d4208ed3c
commit
2c799f2c1e
@ -12,6 +12,7 @@ Options:
|
||||
--master-offline Build master offline image (requires src/master/offline_wheels.tar.gz)
|
||||
--metric Build metric module images (ftp, prometheus, grafana, test nodes)
|
||||
--no-cache Build all images without using Docker layer cache
|
||||
--only LIST Comma-separated targets to build: core,master,metric,web,alert,all
|
||||
-h, --help Show this help message
|
||||
|
||||
Examples:
|
||||
@ -24,9 +25,12 @@ EOF
|
||||
}
|
||||
|
||||
use_intranet=false
|
||||
build_core=true
|
||||
build_master=true
|
||||
build_master_offline=false
|
||||
build_metric=true
|
||||
build_web=true
|
||||
build_alert=true
|
||||
no_cache=false
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
@ -52,6 +56,26 @@ while [[ $# -gt 0 ]]; do
|
||||
no_cache=true
|
||||
shift
|
||||
;;
|
||||
--only)
|
||||
if [[ -z ${2:-} ]]; then
|
||||
echo "--only requires a target list" >&2; exit 1
|
||||
fi
|
||||
sel="$2"; shift 2
|
||||
# reset all, then enable selected
|
||||
build_core=false; build_master=false; build_metric=false; build_web=false; build_alert=false
|
||||
IFS=',' read -ra parts <<< "$sel"
|
||||
for p in "${parts[@]}"; do
|
||||
case "$p" in
|
||||
core) build_core=true ;;
|
||||
master) build_master=true ;;
|
||||
metric) build_metric=true ;;
|
||||
web) build_web=true ;;
|
||||
alert) build_alert=true ;;
|
||||
all) build_core=true; build_master=true; build_metric=true; build_web=true; build_alert=true ;;
|
||||
*) echo "Unknown --only target: $p" >&2; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
;;
|
||||
-h|--help)
|
||||
show_help
|
||||
exit 0
|
||||
@ -177,26 +201,28 @@ pull_base_image() {
|
||||
images_built=()
|
||||
build_failed=false
|
||||
|
||||
if build_image "Elasticsearch" "src/log/elasticsearch/build/Dockerfile" "argus-elasticsearch:latest"; then
|
||||
if [[ "$build_core" == true ]]; then
|
||||
if build_image "Elasticsearch" "src/log/elasticsearch/build/Dockerfile" "argus-elasticsearch:latest"; then
|
||||
images_built+=("argus-elasticsearch:latest")
|
||||
else
|
||||
else
|
||||
build_failed=true
|
||||
fi
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo ""
|
||||
|
||||
if build_image "Kibana" "src/log/kibana/build/Dockerfile" "argus-kibana:latest"; then
|
||||
if build_image "Kibana" "src/log/kibana/build/Dockerfile" "argus-kibana:latest"; then
|
||||
images_built+=("argus-kibana:latest")
|
||||
else
|
||||
else
|
||||
build_failed=true
|
||||
fi
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo ""
|
||||
|
||||
if build_image "BIND9" "src/bind/build/Dockerfile" "argus-bind9:latest"; then
|
||||
if build_image "BIND9" "src/bind/build/Dockerfile" "argus-bind9:latest"; then
|
||||
images_built+=("argus-bind9:latest")
|
||||
else
|
||||
else
|
||||
build_failed=true
|
||||
fi
|
||||
fi
|
||||
|
||||
echo ""
|
||||
@ -264,27 +290,28 @@ fi
|
||||
# Web & Alert module images
|
||||
# =======================================
|
||||
|
||||
echo ""
|
||||
echo "Building Web and Alert module images..."
|
||||
if [[ "$build_web" == true || "$build_alert" == true ]]; then
|
||||
echo ""
|
||||
echo "Building Web and Alert module images..."
|
||||
|
||||
# Pre-pull commonly used base images for stability
|
||||
web_alert_base_images=(
|
||||
# Pre-pull commonly used base images for stability
|
||||
web_alert_base_images=(
|
||||
"node:20"
|
||||
"ubuntu:24.04"
|
||||
)
|
||||
)
|
||||
|
||||
for base_image in "${web_alert_base_images[@]}"; do
|
||||
for base_image in "${web_alert_base_images[@]}"; do
|
||||
if ! pull_base_image "$base_image"; then
|
||||
build_failed=true
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
||||
web_builds=(
|
||||
if [[ "$build_web" == true ]]; then
|
||||
web_builds=(
|
||||
"Web Frontend|src/web/build_tools/frontend/Dockerfile|argus-web-frontend:latest|."
|
||||
"Web Proxy|src/web/build_tools/proxy/Dockerfile|argus-web-proxy:latest|."
|
||||
)
|
||||
|
||||
for build_spec in "${web_builds[@]}"; do
|
||||
)
|
||||
for build_spec in "${web_builds[@]}"; do
|
||||
IFS='|' read -r image_label dockerfile_path image_tag build_context <<< "$build_spec"
|
||||
if build_image "$image_label" "$dockerfile_path" "$image_tag" "$build_context"; then
|
||||
images_built+=("$image_tag")
|
||||
@ -292,13 +319,14 @@ for build_spec in "${web_builds[@]}"; do
|
||||
build_failed=true
|
||||
fi
|
||||
echo ""
|
||||
done
|
||||
done
|
||||
fi
|
||||
|
||||
alert_builds=(
|
||||
if [[ "$build_alert" == true ]]; then
|
||||
alert_builds=(
|
||||
"Alertmanager|src/alert/alertmanager/build/Dockerfile|argus-alertmanager:latest|."
|
||||
)
|
||||
|
||||
for build_spec in "${alert_builds[@]}"; do
|
||||
)
|
||||
for build_spec in "${alert_builds[@]}"; do
|
||||
IFS='|' read -r image_label dockerfile_path image_tag build_context <<< "$build_spec"
|
||||
if build_image "$image_label" "$dockerfile_path" "$image_tag" "$build_context"; then
|
||||
images_built+=("$image_tag")
|
||||
@ -306,7 +334,9 @@ for build_spec in "${alert_builds[@]}"; do
|
||||
build_failed=true
|
||||
fi
|
||||
echo ""
|
||||
done
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "======================================="
|
||||
echo "📦 Build Summary"
|
||||
|
||||
@ -12,6 +12,8 @@ VENV_DIR="$BUILD_ROOT/venv"
|
||||
|
||||
AGENT_BUILD_IMAGE="${AGENT_BUILD_IMAGE:-python:3.11-slim-bullseye}"
|
||||
AGENT_BUILD_USE_DOCKER="${AGENT_BUILD_USE_DOCKER:-1}"
|
||||
# 默认在容器内忽略代理以避免公司内网代理在 Docker 网络不可达导致 pip 失败(可用 0 关闭)
|
||||
AGENT_BUILD_IGNORE_PROXY="${AGENT_BUILD_IGNORE_PROXY:-1}"
|
||||
USED_DOCKER=0
|
||||
|
||||
run_host_build() {
|
||||
@ -71,6 +73,7 @@ run_docker_build() {
|
||||
pass_env_if_set http_proxy
|
||||
pass_env_if_set https_proxy
|
||||
pass_env_if_set no_proxy
|
||||
pass_env_if_set AGENT_BUILD_IGNORE_PROXY
|
||||
|
||||
build_script=$(cat <<'INNER'
|
||||
set -euo pipefail
|
||||
@ -82,6 +85,10 @@ rm -rf build dist
|
||||
mkdir -p build/pyinstaller dist
|
||||
python3 -m venv --copies build/venv
|
||||
source build/venv/bin/activate
|
||||
# 若指定忽略代理,则清空常见代理与 pip 镜像环境变量,避免容器内代理不可达
|
||||
if [ "${AGENT_BUILD_IGNORE_PROXY:-1}" = "1" ]; then
|
||||
unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY PIP_INDEX_URL PIP_EXTRA_INDEX_URL PIP_TRUSTED_HOST
|
||||
fi
|
||||
pip install --upgrade pip
|
||||
pip install .
|
||||
pip install pyinstaller==6.6.0
|
||||
|
||||
@ -9,14 +9,14 @@ RUN apt-get update && \
|
||||
apt-get install -y wget supervisor net-tools inetutils-ping vim ca-certificates passwd && \
|
||||
apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# 设置 Alertmanager 版本
|
||||
# 设置 Alertmanager 版本(与本地离线包保持一致)
|
||||
ARG ALERTMANAGER_VERSION=0.28.1
|
||||
|
||||
# 下载并解压 Alertmanager 二进制
|
||||
RUN wget https://github.com/prometheus/alertmanager/releases/download/v${ALERTMANAGER_VERSION}/alertmanager-${ALERTMANAGER_VERSION}.linux-amd64.tar.gz && \
|
||||
tar xvf alertmanager-${ALERTMANAGER_VERSION}.linux-amd64.tar.gz && \
|
||||
mv alertmanager-${ALERTMANAGER_VERSION}.linux-amd64 /usr/local/alertmanager && \
|
||||
rm alertmanager-${ALERTMANAGER_VERSION}.linux-amd64.tar.gz
|
||||
# 使用仓库内预置的离线包构建(无需联网)
|
||||
COPY src/alert/alertmanager/build/alertmanager-${ALERTMANAGER_VERSION}.linux-amd64.tar.gz /tmp/
|
||||
RUN tar xvf /tmp/alertmanager-${ALERTMANAGER_VERSION}.linux-amd64.tar.gz -C /tmp && \
|
||||
mv /tmp/alertmanager-${ALERTMANAGER_VERSION}.linux-amd64 /usr/local/alertmanager && \
|
||||
rm -f /tmp/alertmanager-${ALERTMANAGER_VERSION}.linux-amd64.tar.gz
|
||||
|
||||
ENV ALERTMANAGER_BASE_PATH=/private/argus/alert/alertmanager
|
||||
|
||||
@ -34,21 +34,20 @@ RUN mkdir -p /usr/share/alertmanager && \
|
||||
# 创建 alertmanager 用户(可自定义 UID/GID)
|
||||
# 创建 alertmanager 用户组
|
||||
RUN set -eux; \
|
||||
# 确保目标 GID 存在;若已被占用,直接使用该 GID(组名不限)\
|
||||
if ! getent group "${ARGUS_BUILD_GID}" >/dev/null; then \
|
||||
groupadd -g "${ARGUS_BUILD_GID}" alertmanager || true; \
|
||||
fi; \
|
||||
if id alertmanager >/dev/null 2>&1; then \
|
||||
current_uid="$(id -u alertmanager)"; \
|
||||
if [ "$current_uid" != "${ARGUS_BUILD_UID}" ] && ! getent passwd "${ARGUS_BUILD_UID}" >/dev/null; then \
|
||||
usermod -u "${ARGUS_BUILD_UID}" alertmanager; \
|
||||
# 确保存在 alertmanager 用户;若 UID 已被占用,跳过并继续使用现有 UID 的用户
|
||||
if ! id alertmanager >/dev/null 2>&1; then \
|
||||
if getent passwd "${ARGUS_BUILD_UID}" >/dev/null; then \
|
||||
# UID 已占用,则创建同名用户但不指定 UID(避免冲突),仅保证 user 存在
|
||||
useradd -M -s /usr/sbin/nologin -g "${ARGUS_BUILD_GID}" alertmanager || true; \
|
||||
else \
|
||||
useradd -M -s /usr/sbin/nologin -u "${ARGUS_BUILD_UID}" -g "${ARGUS_BUILD_GID}" alertmanager || true; \
|
||||
fi; \
|
||||
else \
|
||||
usermod -g "${ARGUS_BUILD_GID}" alertmanager || true; \
|
||||
else \
|
||||
if ! getent passwd "${ARGUS_BUILD_UID}" >/dev/null; then \
|
||||
useradd -M -s /usr/sbin/nologin -u "${ARGUS_BUILD_UID}" -g "${ARGUS_BUILD_GID}" alertmanager; \
|
||||
else \
|
||||
echo "UID ${ARGUS_BUILD_UID} already exists; skip creating user 'alertmanager'"; \
|
||||
fi; \
|
||||
fi
|
||||
|
||||
RUN chown -R "${ARGUS_BUILD_UID}:${ARGUS_BUILD_GID}" /usr/share/alertmanager /alertmanager ${ALERTMANAGER_BASE_PATH} /private/argus/etc /usr/local/bin || true
|
||||
|
||||
Binary file not shown.
22
src/alert/alertmanager/build/fetch-dist.sh
Normal file
22
src/alert/alertmanager/build/fetch-dist.sh
Normal file
@ -0,0 +1,22 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# 下载 Alertmanager 离线安装包到本目录,用于 Docker 构建时 COPY
|
||||
# 用法:
|
||||
# ./fetch-dist.sh [version]
|
||||
# 示例:
|
||||
# ./fetch-dist.sh 0.28.1
|
||||
|
||||
VER="${1:-0.28.1}"
|
||||
OUT="alertmanager-${VER}.linux-amd64.tar.gz"
|
||||
URL="https://github.com/prometheus/alertmanager/releases/download/v${VER}/${OUT}"
|
||||
|
||||
if [[ -f "$OUT" ]]; then
|
||||
echo "[INFO] $OUT already exists, skip download"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "[INFO] Downloading $URL"
|
||||
curl -fL --retry 3 --connect-timeout 10 -o "$OUT" "$URL"
|
||||
echo "[OK] Saved to $(pwd)/$OUT"
|
||||
|
||||
@ -7,10 +7,8 @@ ALERTMANAGER_BASE_PATH=${ALERTMANAGER_BASE_PATH:-/private/argus/alert/alertmanag
|
||||
|
||||
echo "[INFO] Alertmanager base path: ${ALERTMANAGER_BASE_PATH}"
|
||||
|
||||
# 生成配置文件
|
||||
echo "[INFO] Generating Alertmanager configuration file..."
|
||||
sed "s|\${ALERTMANAGER_BASE_PATH}|${ALERTMANAGER_BASE_PATH}|g" \
|
||||
/etc/alertmanager/alertmanager.yml > ${ALERTMANAGER_BASE_PATH}/alertmanager.yml
|
||||
# 使用容器内的 /etc/alertmanager/alertmanager.yml 作为配置文件,避免写入挂载卷导致的权限问题
|
||||
echo "[INFO] Using /etc/alertmanager/alertmanager.yml as configuration"
|
||||
|
||||
|
||||
# 记录容器 IP 地址
|
||||
|
||||
@ -42,7 +42,7 @@
|
||||
- `./scripts/05_agent_register.sh` 获取两个节点的 `node_id` 与初始 IP,检查本地 `node.json`
|
||||
- `./scripts/06_write_health_and_assert.sh` 写健康文件并断言 `nodes.json` 仅包含 2 个在线节点
|
||||
- `./scripts/07_logs_send_and_assert.sh` 向两个节点写日志,断言 ES `train-*`/`infer-*` 计数增长
|
||||
- `./scripts/08_restart_agent_reregister.sh` `node-b` 改为固定 IP `172.29.0.200`,验证保持同一节点 ID 且 IP/时间戳更新
|
||||
- `./scripts/08_restart_agent_reregister.sh` `node-b` 改为固定 IP `172.31.0.200`,验证保持同一节点 ID 且 IP/时间戳更新
|
||||
- `./scripts/09_down.sh` 回收容器、网络并清理 `private*/`、`tmp/`
|
||||
|
||||
- 重置环境
|
||||
@ -53,8 +53,8 @@
|
||||
## 二、测试部署架构(docker-compose)
|
||||
|
||||
- 网络
|
||||
- 自定义 bridge:`argus-sys-net`,子网 `172.29.0.0/16`
|
||||
- 固定地址:bind=`172.29.0.2`,master=`172.29.0.10`
|
||||
- 自定义 bridge:`argus-sys-net`,子网 `172.31.0.0/16`
|
||||
- 固定地址:bind=`172.31.0.2`,master=`172.31.0.10`
|
||||
|
||||
- 服务与端口
|
||||
- `bind`(`argus-bind9:latest`):监听 53/tcp+udp;负责同步 `*.argus.com` 记录
|
||||
@ -72,7 +72,7 @@
|
||||
- 节点容器的 Fluent Bit/agent 资产以只读方式挂载到 `/assets`/`/usr/local/bin/argus-agent`
|
||||
|
||||
- DNS 配置
|
||||
- 节点容器通过 compose 配置 `dns: [172.29.0.2]` 指向 bind,不挂载 `/etc/resolv.conf`,也不依赖 `update-dns.sh`
|
||||
- 节点容器通过 compose 配置 `dns: [172.31.0.2]` 指向 bind,不挂载 `/etc/resolv.conf`,也不依赖 `update-dns.sh`
|
||||
- master/es/kibana 仍共享 `./private`,master 启动会写 `/private/argus/etc/master.argus.com` 供 bind 同步 A 记录
|
||||
|
||||
- 节点入口
|
||||
|
||||
@ -4,7 +4,7 @@ networks:
|
||||
ipam:
|
||||
driver: default
|
||||
config:
|
||||
- subnet: 172.29.0.0/16
|
||||
- subnet: 172.31.0.0/16
|
||||
|
||||
services:
|
||||
bind:
|
||||
@ -12,7 +12,7 @@ services:
|
||||
container_name: argus-bind-sys
|
||||
networks:
|
||||
sysnet:
|
||||
ipv4_address: 172.29.0.2
|
||||
ipv4_address: 172.31.0.2
|
||||
volumes:
|
||||
- ./private:/private
|
||||
restart: unless-stopped
|
||||
@ -36,7 +36,7 @@ services:
|
||||
- ./private/argus/etc:/private/argus/etc
|
||||
networks:
|
||||
sysnet:
|
||||
ipv4_address: 172.29.0.10
|
||||
ipv4_address: 172.31.0.10
|
||||
restart: unless-stopped
|
||||
|
||||
es:
|
||||
@ -56,7 +56,7 @@ services:
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
sysnet:
|
||||
ipv4_address: 172.29.0.3
|
||||
ipv4_address: 172.31.0.3
|
||||
|
||||
kibana:
|
||||
image: argus-kibana:latest
|
||||
@ -75,7 +75,7 @@ services:
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
sysnet:
|
||||
ipv4_address: 172.29.0.4
|
||||
ipv4_address: 172.31.0.4
|
||||
|
||||
node-a:
|
||||
image: ubuntu:22.04
|
||||
@ -104,7 +104,7 @@ services:
|
||||
entrypoint:
|
||||
- /usr/local/bin/node-entrypoint.sh
|
||||
dns:
|
||||
- 172.29.0.2
|
||||
- 172.31.0.2
|
||||
ports:
|
||||
- "2020:2020"
|
||||
restart: unless-stopped
|
||||
@ -138,7 +138,7 @@ services:
|
||||
entrypoint:
|
||||
- /usr/local/bin/node-entrypoint.sh
|
||||
dns:
|
||||
- 172.29.0.2
|
||||
- 172.31.0.2
|
||||
ports:
|
||||
- "2021:2020"
|
||||
restart: unless-stopped
|
||||
@ -167,7 +167,7 @@ services:
|
||||
- /etc/timezone:/etc/timezone:ro
|
||||
networks:
|
||||
sysnet:
|
||||
ipv4_address: 172.29.0.40
|
||||
ipv4_address: 172.31.0.40
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
@ -192,7 +192,7 @@ services:
|
||||
- /etc/timezone:/etc/timezone:ro
|
||||
networks:
|
||||
sysnet:
|
||||
ipv4_address: 172.29.0.41
|
||||
ipv4_address: 172.31.0.41
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
@ -223,7 +223,7 @@ services:
|
||||
- /etc/timezone:/etc/timezone:ro
|
||||
networks:
|
||||
sysnet:
|
||||
ipv4_address: 172.29.0.42
|
||||
ipv4_address: 172.31.0.42
|
||||
depends_on:
|
||||
- prometheus
|
||||
logging:
|
||||
@ -232,6 +232,25 @@ services:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
# --- Added: Web Frontend (no host port; resolved by DNS as web.argus.com) ---
|
||||
web-frontend:
|
||||
image: argus-web-frontend:latest
|
||||
container_name: argus-web-frontend
|
||||
environment:
|
||||
- ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}
|
||||
- ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}
|
||||
volumes:
|
||||
- ./private/argus/etc:/private/argus/etc
|
||||
networks:
|
||||
sysnet:
|
||||
ipv4_address: 172.31.0.80
|
||||
restart: unless-stopped
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
test-node:
|
||||
image: ubuntu:22.04
|
||||
container_name: argus-metric-test-node
|
||||
@ -245,7 +264,7 @@ services:
|
||||
- TZ=Asia/Shanghai
|
||||
- DEBIAN_FRONTEND=noninteractive
|
||||
- FTP_DOMAIN=${FTP_DOMAIN:-ftp.metric.argus.com}
|
||||
- FTP_SERVER=${FTP_SERVER:-172.29.0.40}
|
||||
- FTP_SERVER=${FTP_SERVER:-172.31.0.40}
|
||||
- FTP_USER=${FTP_USER:-ftpuser}
|
||||
- FTP_PASSWORD=${FTP_PASSWORD:-ZGClab1234!}
|
||||
- FTP_PORT=${FTP_PORT:-21}
|
||||
@ -264,7 +283,7 @@ services:
|
||||
- infinity
|
||||
networks:
|
||||
sysnet:
|
||||
ipv4_address: 172.29.0.50
|
||||
ipv4_address: 172.31.0.50
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
@ -311,7 +330,62 @@ services:
|
||||
- infinity
|
||||
networks:
|
||||
sysnet:
|
||||
ipv4_address: 172.29.0.51
|
||||
ipv4_address: 172.31.0.51
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
# --- Added: Alertmanager ---
|
||||
alertmanager:
|
||||
image: argus-alertmanager:latest
|
||||
container_name: argus-alertmanager
|
||||
environment:
|
||||
- ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}
|
||||
- ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}
|
||||
volumes:
|
||||
- ./private/argus/etc:/private/argus/etc
|
||||
- ./private/argus/alert/alertmanager:/private/argus/alert/alertmanager
|
||||
networks:
|
||||
sysnet:
|
||||
ipv4_address: 172.31.0.82
|
||||
ports:
|
||||
- "9093:9093"
|
||||
restart: unless-stopped
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
# --- Added: Web Proxy (multi-port gateway) ---
|
||||
web-proxy:
|
||||
image: argus-web-proxy:latest
|
||||
container_name: argus-web-proxy
|
||||
depends_on:
|
||||
- bind
|
||||
- master
|
||||
- grafana
|
||||
- prometheus
|
||||
- kibana
|
||||
- alertmanager
|
||||
environment:
|
||||
- ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}
|
||||
- ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}
|
||||
volumes:
|
||||
- ./private/argus/etc:/private/argus/etc
|
||||
networks:
|
||||
sysnet:
|
||||
ipv4_address: 172.31.0.81
|
||||
ports:
|
||||
- "8080:8080"
|
||||
- "8081:8081"
|
||||
- "8082:8082"
|
||||
- "8083:8083"
|
||||
- "8084:8084"
|
||||
- "8085:8085"
|
||||
restart: unless-stopped
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
|
||||
@ -45,6 +45,7 @@ mkdir -p \
|
||||
"$PRIVATE_CORE/argus/bind" \
|
||||
"$PRIVATE_CORE/argus/master" \
|
||||
"$PRIVATE_CORE/argus/metric/prometheus" \
|
||||
"$PRIVATE_CORE/argus/alert/alertmanager" \
|
||||
"$PRIVATE_CORE/argus/metric/ftp/share" \
|
||||
"$PRIVATE_CORE/argus/metric/grafana/data" \
|
||||
"$PRIVATE_CORE/argus/metric/grafana/logs" \
|
||||
@ -71,10 +72,14 @@ chown -R "${ARGUS_BUILD_UID}:${ARGUS_BUILD_GID}" \
|
||||
"$PRIVATE_CORE/argus/log/kibana" \
|
||||
"$PRIVATE_CORE/argus/metric/grafana" \
|
||||
"$PRIVATE_CORE/argus/metric/prometheus" \
|
||||
"$PRIVATE_CORE/argus/alert" \
|
||||
"$PRIVATE_CORE/argus/metric/ftp" \
|
||||
"$PRIVATE_CORE/argus/agent" \
|
||||
"$PRIVATE_CORE/argus/etc" 2>/dev/null || true
|
||||
|
||||
# 确保 alert 与 etc 目录组可写,便于非 root 且仅匹配 GID 的服务写入运行文件
|
||||
chmod -R g+w "$PRIVATE_CORE/argus/alert" "$PRIVATE_CORE/argus/etc" 2>/dev/null || true
|
||||
|
||||
echo "[INFO] Using compose-managed network (auto-created by docker compose)"
|
||||
|
||||
echo "[INFO] Distributing update-dns.sh for core services (bind/master/es/kibana)"
|
||||
@ -95,6 +100,9 @@ ensure_image "argus-master:latest"
|
||||
ensure_image "argus-metric-ftp:latest"
|
||||
ensure_image "argus-metric-prometheus:latest"
|
||||
ensure_image "argus-metric-grafana:latest"
|
||||
ensure_image "argus-web-frontend:latest"
|
||||
ensure_image "argus-web-proxy:latest"
|
||||
ensure_image "argus-alertmanager:latest"
|
||||
|
||||
echo "[INFO] Building agent binary..."
|
||||
pushd "$REPO_ROOT/src/agent" >/dev/null
|
||||
|
||||
@ -47,19 +47,40 @@ for name in argus-node-b; do
|
||||
fi
|
||||
done
|
||||
|
||||
# 预检:检查多端口网关所需宿主端口是否空闲
|
||||
check_port_free() {
|
||||
local p="$1"
|
||||
if ss -ltnp 2>/dev/null | grep -q ":${p} "; then
|
||||
echo "[ERR] Host port ${p} is already in use. Please free it before running 02_up.sh" >&2
|
||||
ss -ltnp | awk -v p=":${p} " '$0 ~ p {print " " $0}' || true
|
||||
return 1
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
|
||||
for port in 8080 8081 8082 8083 8084 8085; do
|
||||
check_port_free "$port" || { echo "[ERR] Required port busy: $port"; exit 1; }
|
||||
done
|
||||
|
||||
# 根据GPU可用性决定启动的服务
|
||||
if [[ "$GPU_AVAILABLE" == true ]]; then
|
||||
echo "[INFO] 启动所有服务(包含 gpu profile)..."
|
||||
compose -p argus-sys --profile gpu up -d
|
||||
compose -p argus-sys --profile gpu up -d || true
|
||||
else
|
||||
echo "[INFO] 启动基础服务(不含 gpu profile)..."
|
||||
compose -p argus-sys up -d
|
||||
compose -p argus-sys up -d || true
|
||||
fi
|
||||
|
||||
# 若 web-proxy 处于 Created 状态,尝试单独启动一次(处理偶发 Address already in use 后端已释放的场景)
|
||||
if docker ps -a --format '{{.Names}}\t{{.Status}}' | grep -q '^argus-web-proxy\s\+Created'; then
|
||||
echo "[WARN] web-proxy in Created state; retry starting it..."
|
||||
docker start argus-web-proxy || true
|
||||
fi
|
||||
|
||||
popd >/dev/null
|
||||
|
||||
if [[ "$GPU_AVAILABLE" == true ]]; then
|
||||
echo "[OK] Services started: master:32300 es:9200 kibana:5601 node-a:2020 node-b:2021 test-gpu-node:172.29.0.51"
|
||||
echo "[OK] Services started: master:32300 es:9200 kibana:5601 node-a:2020 node-b:2021 test-gpu-node:172.31.0.51"
|
||||
else
|
||||
echo "[OK] Services started: master:32300 es:9200 kibana:5601 node-a:2020 node-b:2021 (gpu skipped)"
|
||||
fi
|
||||
|
||||
@ -29,6 +29,7 @@ echo "[INFO] Waiting for ES/Kibana/Master/Fluent Bit/Bind..."
|
||||
|
||||
# ES (>= yellow)
|
||||
attempt=1; max=120
|
||||
ES_T0=$(date +%s)
|
||||
while (( attempt <= max )); do
|
||||
if curl -fsS "http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=1s" >/dev/null 2>&1; then
|
||||
break
|
||||
@ -36,16 +37,19 @@ while (( attempt <= max )); do
|
||||
echo "[..] waiting ES ($attempt/$max)"; sleep 5; ((attempt++))
|
||||
done
|
||||
[[ $attempt -le $max ]] || { echo "[ERR] ES not ready" >&2; exit 1; }
|
||||
ES_T1=$(date +%s); echo "[TIME] ES ready in $((ES_T1-ES_T0))s"
|
||||
|
||||
# Kibana: must be HTTP 200 and overall.level=available
|
||||
echo "[INFO] Waiting for Kibana to be available (HTTP 200)..."
|
||||
kb_attempt=1; kb_max=180
|
||||
KB_T0=$(date +%s)
|
||||
while (( kb_attempt <= kb_max )); do
|
||||
body=$(curl -sS "http://localhost:5601/api/status" 2>/dev/null || true)
|
||||
code=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:5601/api/status" || echo 000)
|
||||
if [[ "$code" == "200" ]]; then
|
||||
if echo "$body" | grep -q '"level":"available"'; then
|
||||
echo "[OK] Kibana available (HTTP 200)"
|
||||
KB_T1=$(date +%s)
|
||||
echo "[OK] Kibana available (HTTP 200) in $((KB_T1-KB_T0))s"
|
||||
break
|
||||
fi
|
||||
fi
|
||||
@ -58,11 +62,13 @@ if (( kb_attempt > kb_max )); then
|
||||
fi
|
||||
|
||||
# Master
|
||||
MASTER_T0=$(date +%s)
|
||||
wait_http "http://localhost:32300/readyz" 120
|
||||
MASTER_T1=$(date +%s); echo "[TIME] Master readyz in $((MASTER_T1-MASTER_T0))s"
|
||||
|
||||
# Fluent Bit (host metrics on host ports)
|
||||
wait_http "http://localhost:2020/api/v2/metrics" 120
|
||||
wait_http "http://localhost:2021/api/v2/metrics" 120
|
||||
FB1_T0=$(date +%s); wait_http "http://localhost:2020/api/v2/metrics" 120; FB1_T1=$(date +%s); echo "[TIME] FluentBit:2020 in $((FB1_T1-FB1_T0))s"
|
||||
FB2_T0=$(date +%s); wait_http "http://localhost:2021/api/v2/metrics" 120; FB2_T1=$(date +%s); echo "[TIME] FluentBit:2021 in $((FB2_T1-FB2_T0))s"
|
||||
|
||||
# Bind config check
|
||||
BIND_ID="$(service_id bind)"
|
||||
@ -72,4 +78,63 @@ else
|
||||
echo "[WARN] bind container id not found"
|
||||
fi
|
||||
|
||||
# ========== Additional module readiness checks ==========
|
||||
|
||||
# Prometheus
|
||||
PROM_T0=$(date +%s); wait_http "http://localhost:9090/-/ready" 120; PROM_T1=$(date +%s); echo "[TIME] Prometheus ready in $((PROM_T1-PROM_T0))s"
|
||||
|
||||
# Grafana health (database: ok)
|
||||
echo "[INFO] Waiting for Grafana health..."
|
||||
gf_attempt=1; gf_max=120
|
||||
while (( gf_attempt <= gf_max )); do
|
||||
gf_body=$(curl -sS "http://localhost:3000/api/health" 2>/dev/null || true)
|
||||
gf_code=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:3000/api/health" || echo 000)
|
||||
if [[ "$gf_code" == "200" ]] && echo "$gf_body" | grep -q '"database"\s*:\s*"ok"'; then
|
||||
echo "[OK] Grafana health database=ok"
|
||||
break
|
||||
fi
|
||||
echo "[..] waiting grafana health ($gf_attempt/$gf_max), last_code=$gf_code"
|
||||
sleep 3; ((gf_attempt++))
|
||||
done
|
||||
if (( gf_attempt > gf_max )); then
|
||||
echo "[ERR] Grafana /api/health not ready" >&2; exit 1
|
||||
fi
|
||||
|
||||
# Alertmanager
|
||||
wait_http "http://localhost:9093/api/v2/status" 120
|
||||
|
||||
# Web proxy checks(按端口细化)
|
||||
code_for() { curl -s -o /dev/null -w "%{http_code}" "$1" || echo 000; }
|
||||
header_val() { curl -s -D - -o /dev/null "$@" | awk -F': ' 'BEGIN{IGNORECASE=1}$1=="Access-Control-Allow-Origin"{gsub("\r","",$2);print $2}'; }
|
||||
|
||||
echo "[INFO] Checking web-proxy ports..."
|
||||
|
||||
# 8080 首页必须 200
|
||||
tries=1; max=60; P8080_T0=$(date +%s)
|
||||
while (( tries <= max )); do
|
||||
c=$(code_for "http://localhost:8080/")
|
||||
if [[ "$c" == "200" ]]; then P8080_T1=$(date +%s); echo "[OK] 8080 / ($c) in $((P8080_T1-P8080_T0))s"; break; fi
|
||||
echo "[..] waiting 8080/ ($tries/$max), code=$c"; sleep 3; ((tries++))
|
||||
done
|
||||
(( tries <= max )) || { echo "[ERR] 8080/ not ready" >&2; exit 1; }
|
||||
|
||||
# 8083 Kibana 允许 200/302(上面已就绪,端口侧再快速确认)
|
||||
tries=1; max=40; P8083_T0=$(date +%s)
|
||||
while (( tries <= max )); do
|
||||
c=$(code_for "http://localhost:8083/")
|
||||
if [[ "$c" == "200" || "$c" == "302" ]]; then P8083_T1=$(date +%s); echo "[OK] 8083 / ($c) in $((P8083_T1-P8083_T0))s"; break; fi
|
||||
echo "[..] waiting 8083/ ($tries/$max), code=$c"; sleep 3; ((tries++))
|
||||
done
|
||||
(( tries <= max )) || { echo "[ERR] 8083/ not ready" >&2; exit 1; }
|
||||
|
||||
# 8084 Alertmanager + CORS
|
||||
P8084_T0=$(date +%s); wait_http "http://localhost:8084/api/v2/status" 60; P8084_T1=$(date +%s)
|
||||
cors=$(header_val -H "Origin: http://localhost:8080" "http://localhost:8084/api/v2/status" || true)
|
||||
if [[ -z "$cors" ]]; then echo "[ERR] 8084 CORS missing" >&2; exit 1; else echo "[OK] 8084 CORS: $cors in $((P8084_T1-P8084_T0))s"; fi
|
||||
|
||||
# 8085 Master /readyz + CORS(API 走 8085 才需跨域)
|
||||
P8085_T0=$(date +%s); wait_http "http://localhost:8085/readyz" 60; P8085_T1=$(date +%s)
|
||||
cors=$(header_val -H "Origin: http://localhost:8080" "http://localhost:8085/api/v1/master/nodes" || true)
|
||||
if [[ -z "$cors" ]]; then echo "[ERR] 8085 CORS missing" >&2; exit 1; else echo "[OK] 8085 CORS: $cors in $((P8085_T1-P8085_T0))s"; fi
|
||||
|
||||
echo "[OK] All services are ready"
|
||||
|
||||
@ -49,7 +49,7 @@ compose() {
|
||||
fi
|
||||
}
|
||||
|
||||
echo "[INFO] Recreating node-b with static IP 172.29.0.200..."
|
||||
echo "[INFO] Recreating node-b with static IP 172.31.0.200..."
|
||||
pushd "$TEST_ROOT" >/dev/null
|
||||
compose -p argus-sys rm -sf node-b || true
|
||||
popd >/dev/null
|
||||
@ -77,8 +77,8 @@ docker run -d \
|
||||
--name argus-node-b \
|
||||
--hostname dev-yyrshare-uuuu10-ep2f-pod-0 \
|
||||
--network "$SYSNET_NAME" \
|
||||
--ip 172.29.0.200 \
|
||||
--dns 172.29.0.2 \
|
||||
--ip 172.31.0.200 \
|
||||
--dns 172.31.0.2 \
|
||||
-e MASTER_ENDPOINT=http://master.argus.com:3000 \
|
||||
-e REPORT_INTERVAL_SECONDS=2 \
|
||||
-e ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133} \
|
||||
@ -105,15 +105,15 @@ node=json.load(open(sys.argv[1]))
|
||||
last0=sys.argv[2]
|
||||
ip=node.get("meta_data",{}).get("ip")
|
||||
lu=node.get("last_updated")
|
||||
assert ip=="172.29.0.200"
|
||||
assert ip=="172.31.0.200"
|
||||
assert lu and lu!=last0
|
||||
PY
|
||||
then
|
||||
echo "[OK] node-b re-registered with new IP 172.29.0.200"
|
||||
echo "[OK] node-b re-registered with new IP 172.31.0.200"
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
echo "[ERR] node-b did not update to IP 172.29.0.200 in time" >&2
|
||||
echo "[ERR] node-b did not update to IP 172.31.0.200 in time" >&2
|
||||
exit 1
|
||||
|
||||
@ -16,7 +16,7 @@ if ! docker ps --format '{{.Names}}' | grep -q "^${CONTAINER}$"; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
FTP_HOST="${FTP_SERVER:-172.29.0.40}"
|
||||
FTP_HOST="${FTP_SERVER:-172.31.0.40}"
|
||||
FTP_USER="${FTP_USER:-ftpuser}"
|
||||
FTP_PASSWORD="${FTP_PASSWORD:-ZGClab1234!}"
|
||||
FTP_PORT="${FTP_PORT:-21}"
|
||||
|
||||
@ -23,7 +23,7 @@ if ! docker ps --format '{{.Names}}' | grep -q "^${CONTAINER}$"; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
FTP_HOST="${FTP_SERVER:-172.29.0.40}"
|
||||
FTP_HOST="${FTP_SERVER:-172.31.0.40}"
|
||||
FTP_USER="${FTP_USER:-ftpuser}"
|
||||
FTP_PASSWORD="${FTP_PASSWORD:-ZGClab1234!}"
|
||||
FTP_PORT="${FTP_PORT:-21}"
|
||||
|
||||
@ -5,7 +5,7 @@ TMP_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")"/.. && pwd)/tmp/metric-verify"
|
||||
mkdir -p "$TMP_DIR"
|
||||
|
||||
PROM_BASE="http://localhost:9090/api/v1"
|
||||
INSTANCE="${METRIC_TEST_INSTANCE:-172.29.0.50:9100}"
|
||||
INSTANCE="${METRIC_TEST_INSTANCE:-172.31.0.50:9100}"
|
||||
IP_ONLY="${INSTANCE%%:*}"
|
||||
|
||||
echo "[VERIFY:DATA] node exporter metrics present in container"
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
user web;
|
||||
user root;
|
||||
worker_processes auto;
|
||||
|
||||
events {
|
||||
|
||||
@ -18,7 +18,7 @@ stopasgroup=true
|
||||
|
||||
[program:web-health]
|
||||
command=/usr/local/bin/health-check.sh
|
||||
user=web
|
||||
user=root
|
||||
stdout_logfile=/var/log/supervisor/web-health.log
|
||||
stderr_logfile=/var/log/supervisor/web-health_error.log
|
||||
autorestart=true
|
||||
|
||||
@ -66,13 +66,16 @@ RUN mkdir -p /var/log/supervisor
|
||||
# 复制启动脚本
|
||||
COPY src/web/build_tools/proxy/start-proxy-supervised.sh /usr/local/bin/start-proxy-supervised.sh
|
||||
RUN chmod +x /usr/local/bin/start-proxy-supervised.sh
|
||||
COPY src/web/build_tools/proxy/start-proxy-retry.sh /usr/local/bin/start-proxy-retry.sh
|
||||
RUN chmod +x /usr/local/bin/start-proxy-retry.sh
|
||||
|
||||
# 复制 DNS 监控脚本
|
||||
COPY src/web/build_tools/proxy/dns-monitor.sh /usr/local/bin/dns-monitor.sh
|
||||
# 统一复用 bind 模块的 dns-monitor 脚本,保持行为一致
|
||||
COPY src/bind/build/dns-monitor.sh /usr/local/bin/dns-monitor.sh
|
||||
RUN chmod +x /usr/local/bin/dns-monitor.sh
|
||||
|
||||
# 暴露端口
|
||||
EXPOSE 80
|
||||
EXPOSE 80 8080 8081 8082 8083 8084 8085
|
||||
|
||||
# 保持 root 用户,由 supervisor 控制 user 切换
|
||||
USER root
|
||||
|
||||
@ -1,9 +0,0 @@
|
||||
server {
|
||||
listen 80;
|
||||
server_name alertmanager.alert.argus.com;
|
||||
|
||||
location / {
|
||||
set $alert_backend http://alertmanager.alert.argus.com:9093;
|
||||
proxy_pass $alert_backend;
|
||||
}
|
||||
}
|
||||
@ -1,21 +0,0 @@
|
||||
# Elasticsearch
|
||||
server {
|
||||
listen 80;
|
||||
server_name es.log.argus.com;
|
||||
|
||||
location / {
|
||||
set $es_backend http://es.log.argus.com:9200;
|
||||
proxy_pass $es_backend;
|
||||
}
|
||||
}
|
||||
|
||||
# Kibana
|
||||
server {
|
||||
listen 80;
|
||||
server_name kibana.log.argus.com;
|
||||
|
||||
location / {
|
||||
set $kibana_backend http://kibana.log.argus.com:5601;
|
||||
proxy_pass $kibana_backend;
|
||||
}
|
||||
}
|
||||
@ -1,27 +0,0 @@
|
||||
server {
|
||||
listen 80;
|
||||
server_name master.argus.com;
|
||||
|
||||
location / {
|
||||
set $master_backend http://master.argus.com:3000;
|
||||
proxy_pass $master_backend;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
|
||||
# CORS 支持
|
||||
add_header 'Access-Control-Allow-Origin' 'http://web.argus.com' always;
|
||||
add_header 'Access-Control-Allow-Methods' 'GET, POST, PUT, DELETE, OPTIONS' always;
|
||||
add_header 'Access-Control-Allow-Headers' 'Origin, Content-Type, Accept, Authorization' always;
|
||||
|
||||
if ($request_method = OPTIONS) {
|
||||
add_header 'Access-Control-Allow-Origin' 'http://web.argus.com' always;
|
||||
add_header 'Access-Control-Allow-Methods' 'GET, POST, PUT, DELETE, OPTIONS' always;
|
||||
add_header 'Access-Control-Allow-Headers' 'Origin, Content-Type, Accept, Authorization' always;
|
||||
add_header 'Content-Length' 0;
|
||||
add_header 'Content-Type' 'text/plain';
|
||||
return 204;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1,21 +0,0 @@
|
||||
# Prometheus
|
||||
server {
|
||||
listen 80;
|
||||
server_name prometheus.metric.argus.com;
|
||||
|
||||
location / {
|
||||
set $prom_backend http://prom.metric.argus.com:9090;
|
||||
proxy_pass $prom_backend;
|
||||
}
|
||||
}
|
||||
|
||||
# Grafana
|
||||
server {
|
||||
listen 80;
|
||||
server_name grafana.metric.argus.com;
|
||||
|
||||
location / {
|
||||
set $grafana_backend http://grafana.metric.argus.com:3000;
|
||||
proxy_pass $grafana_backend;
|
||||
}
|
||||
}
|
||||
94
src/web/build_tools/proxy/conf.d/ports.conf
Normal file
94
src/web/build_tools/proxy/conf.d/ports.conf
Normal file
@ -0,0 +1,94 @@
|
||||
map $http_upgrade $connection_upgrade { default upgrade; "" close; }
|
||||
|
||||
# 允许的跨域来源(仅用于 8084/8085)
|
||||
map $http_origin $cors_allow {
|
||||
default "";
|
||||
"http://localhost:8080" "http://localhost:8080";
|
||||
"http://127.0.0.1:8080" "http://127.0.0.1:8080";
|
||||
}
|
||||
|
||||
# 8080 - Portal
|
||||
server {
|
||||
listen 8080;
|
||||
server_name _;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection $connection_upgrade;
|
||||
proxy_http_version 1.1;
|
||||
location / { proxy_pass http://web.argus.com:8080/; }
|
||||
}
|
||||
|
||||
# 8081 - Grafana
|
||||
server {
|
||||
listen 8081;
|
||||
server_name _;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection $connection_upgrade;
|
||||
proxy_http_version 1.1;
|
||||
location / { proxy_pass http://grafana.metric.argus.com:3000/; }
|
||||
}
|
||||
|
||||
# 8082 - Prometheus
|
||||
server {
|
||||
listen 8082;
|
||||
server_name _;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_http_version 1.1;
|
||||
location / { proxy_pass http://prom.metric.argus.com:9090/; }
|
||||
}
|
||||
|
||||
# 8083 - Kibana
|
||||
server {
|
||||
listen 8083;
|
||||
server_name _;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection $connection_upgrade;
|
||||
proxy_http_version 1.1;
|
||||
location / { proxy_pass http://kibana.log.argus.com:5601/; }
|
||||
}
|
||||
|
||||
# 8084 - Alertmanager(含 CORS)
|
||||
server {
|
||||
listen 8084;
|
||||
server_name _;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
add_header 'Access-Control-Allow-Origin' $cors_allow always;
|
||||
add_header 'Access-Control-Allow-Methods' 'GET, POST, PUT, DELETE, OPTIONS' always;
|
||||
add_header 'Access-Control-Allow-Headers' 'Origin, Content-Type, Accept, Authorization' always;
|
||||
if ($request_method = OPTIONS) { return 204; }
|
||||
proxy_http_version 1.1;
|
||||
location / { proxy_pass http://alertmanager.alert.argus.com:9093/; }
|
||||
}
|
||||
|
||||
# 8085 - Master(新增,含 CORS)
|
||||
server {
|
||||
listen 8085;
|
||||
server_name _;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
add_header 'Access-Control-Allow-Origin' $cors_allow always;
|
||||
add_header 'Access-Control-Allow-Methods' 'GET, POST, PUT, DELETE, OPTIONS' always;
|
||||
add_header 'Access-Control-Allow-Headers' 'Origin, Content-Type, Accept, Authorization' always;
|
||||
if ($request_method = OPTIONS) { return 204; }
|
||||
proxy_http_version 1.1;
|
||||
location / { proxy_pass http://master.argus.com:3000/; }
|
||||
}
|
||||
@ -1,9 +0,0 @@
|
||||
server {
|
||||
listen 80;
|
||||
server_name web.argus.com;
|
||||
|
||||
location / {
|
||||
set $web_backend http://web.argus.com:8080;
|
||||
proxy_pass $web_backend;
|
||||
}
|
||||
}
|
||||
@ -1,68 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# DNS监控脚本 - 每10秒检查dns.conf是否有变化
|
||||
# 如果有变化则执行update-dns.sh脚本
|
||||
|
||||
DNS_CONF="/private/argus/etc/dns.conf"
|
||||
DNS_BACKUP="/tmp/dns.conf.backup"
|
||||
UPDATE_SCRIPT="/private/argus/etc/update-dns.sh"
|
||||
LOG_FILE="/var/log/supervisor/dns-monitor.log"
|
||||
|
||||
# 确保日志文件存在
|
||||
touch "$LOG_FILE"
|
||||
|
||||
log_message() {
|
||||
echo "$(date '+%Y-%m-%d %H:%M:%S') [DNS-Monitor] $1" >> "$LOG_FILE"
|
||||
}
|
||||
|
||||
log_message "DNS监控脚本启动"
|
||||
|
||||
while true; do
|
||||
if [ -f "$DNS_CONF" ]; then
|
||||
if [ -f "$DNS_BACKUP" ]; then
|
||||
# 比较文件内容
|
||||
if ! cmp -s "$DNS_CONF" "$DNS_BACKUP"; then
|
||||
log_message "检测到DNS配置变化"
|
||||
|
||||
# 更新备份文件
|
||||
cp "$DNS_CONF" "$DNS_BACKUP"
|
||||
|
||||
# 执行更新脚本
|
||||
if [ -x "$UPDATE_SCRIPT" ]; then
|
||||
log_message "执行DNS更新脚本: $UPDATE_SCRIPT"
|
||||
"$UPDATE_SCRIPT" >> "$LOG_FILE" 2>&1
|
||||
if [ $? -eq 0 ]; then
|
||||
log_message "DNS更新脚本执行成功"
|
||||
else
|
||||
log_message "DNS更新脚本执行失败"
|
||||
fi
|
||||
else
|
||||
log_message "警告: 更新脚本不存在或不可执行: $UPDATE_SCRIPT"
|
||||
fi
|
||||
fi
|
||||
else
|
||||
|
||||
# 第一次检测到配置文件,执行更新脚本
|
||||
if [ -x "$UPDATE_SCRIPT" ]; then
|
||||
log_message "执行DNS更新脚本: $UPDATE_SCRIPT"
|
||||
"$UPDATE_SCRIPT" >> "$LOG_FILE" 2>&1
|
||||
if [ $? -eq 0 ]; then
|
||||
log_message "DNS更新脚本执行成功"
|
||||
|
||||
# 第一次运行,创建备份并执行更新
|
||||
cp "$DNS_CONF" "$DNS_BACKUP"
|
||||
log_message "创建DNS配置备份文件"
|
||||
|
||||
else
|
||||
log_message "DNS更新脚本执行失败"
|
||||
fi
|
||||
else
|
||||
log_message "警告: 更新脚本不存在或不可执行: $UPDATE_SCRIPT"
|
||||
fi
|
||||
fi
|
||||
else
|
||||
log_message "警告: DNS配置文件不存在: $DNS_CONF"
|
||||
fi
|
||||
|
||||
sleep 10
|
||||
done
|
||||
@ -1,4 +1,4 @@
|
||||
user web_proxy;
|
||||
user root;
|
||||
worker_processes auto;
|
||||
|
||||
events {
|
||||
@ -13,6 +13,7 @@ http {
|
||||
|
||||
# 使用系统 resolv.conf(由 update-dns.sh 动态更新)
|
||||
resolver __RESOLVERS__ valid=30s ipv6=off;
|
||||
resolver_timeout 5s;
|
||||
|
||||
# 启用访问日志
|
||||
access_log /var/log/nginx/access.log;
|
||||
|
||||
20
src/web/build_tools/proxy/start-proxy-retry.sh
Normal file
20
src/web/build_tools/proxy/start-proxy-retry.sh
Normal file
@ -0,0 +1,20 @@
|
||||
#!/bin/sh
|
||||
set -eu
|
||||
|
||||
MAX=${RETRY_MAX:-10}
|
||||
DELAY=${RETRY_DELAY:-10}
|
||||
ATTEMPT=1
|
||||
|
||||
echo "[INFO] proxy retry wrapper: max=${MAX}, delay=${DELAY}s"
|
||||
|
||||
while [ "$ATTEMPT" -le "$MAX" ]; do
|
||||
echo "[INFO] starting proxy attempt ${ATTEMPT}/${MAX}"
|
||||
/usr/local/bin/start-proxy-supervised.sh && exit 0 || true
|
||||
echo "[WARN] proxy exited (attempt ${ATTEMPT}/${MAX}); sleeping ${DELAY}s before retry"
|
||||
sleep "$DELAY"
|
||||
ATTEMPT=$((ATTEMPT+1))
|
||||
done
|
||||
|
||||
echo "[ERROR] proxy failed after ${MAX} attempts"
|
||||
exit 1
|
||||
|
||||
@ -46,6 +46,10 @@ echo "检测到 DNS 服务器列表: $RESOLVERS"
|
||||
# ========== 生成 nginx.conf ==========
|
||||
if [ -f "$TEMPLATE" ]; then
|
||||
echo "从模板生成 nginx.conf ..."
|
||||
# 合并 Docker 内置 DNS 以保障解析 Compose 服务名
|
||||
if ! echo " $RESOLVERS " | grep -q " 127.0.0.11 "; then
|
||||
RESOLVERS="127.0.0.11 ${RESOLVERS}"
|
||||
fi
|
||||
sed "s|__RESOLVERS__|$RESOLVERS|" "$TEMPLATE" > "$TARGET"
|
||||
else
|
||||
echo "错误: 找不到 nginx.conf.template ($TEMPLATE)"
|
||||
@ -55,6 +59,33 @@ fi
|
||||
# 打印生成结果供排查
|
||||
grep resolver "$TARGET" || true
|
||||
|
||||
# ========== 等待上游域名准备(避免启动即解析失败) ==========
|
||||
UPSTREAM_DOMAINS=(
|
||||
web.argus.com
|
||||
grafana.metric.argus.com
|
||||
prom.metric.argus.com
|
||||
kibana.log.argus.com
|
||||
alertmanager.alert.argus.com
|
||||
master.argus.com
|
||||
)
|
||||
WAIT_MAX=15
|
||||
WAITED=0
|
||||
MISSING=()
|
||||
while :; do
|
||||
MISSING=()
|
||||
for d in "${UPSTREAM_DOMAINS[@]}"; do
|
||||
if [ ! -s "/private/argus/etc/${d}" ]; then
|
||||
MISSING+=("$d")
|
||||
fi
|
||||
done
|
||||
if [ ${#MISSING[@]} -eq 0 ] || [ "$WAITED" -ge "$WAIT_MAX" ]; then
|
||||
break
|
||||
fi
|
||||
echo "[INFO] 等待上游域名记录生成(${WAITED}/${WAIT_MAX}) 缺失: ${MISSING[*]}"
|
||||
sleep 1
|
||||
WAITED=$((WAITED+1))
|
||||
done
|
||||
|
||||
echo "[INFO] Launching nginx..."
|
||||
|
||||
# 启动 nginx 前台模式
|
||||
|
||||
@ -5,12 +5,12 @@ pidfile=/var/run/supervisord.pid
|
||||
user=root
|
||||
|
||||
[program:proxy]
|
||||
command=/usr/local/bin/start-proxy-supervised.sh
|
||||
command=/usr/local/bin/start-proxy-retry.sh
|
||||
user=root
|
||||
stdout_logfile=/var/log/supervisor/web-proxy.log
|
||||
stderr_logfile=/var/log/supervisor/web-proxy_error.log
|
||||
autorestart=true
|
||||
startretries=3
|
||||
startretries=10
|
||||
startsecs=5
|
||||
stopwaitsecs=10
|
||||
killasgroup=true
|
||||
|
||||
@ -1,30 +1,42 @@
|
||||
// config/api.js
|
||||
|
||||
// Master 节点相关 API
|
||||
// 运行时解析主机名,统一按端口访问多服务
|
||||
const HOST = (typeof window !== 'undefined' && (window.__ARGUS_PUBLIC_HOST__ || window.location.hostname)) || 'localhost';
|
||||
|
||||
const PORTS = {
|
||||
MASTER: 8085, // 经网关(含 CORS)
|
||||
ALERTMANAGER: 8084,
|
||||
GRAFANA: 8081,
|
||||
PROMETHEUS: 8082,
|
||||
KIBANA: 8083,
|
||||
};
|
||||
|
||||
const BASE = {
|
||||
MASTER: `http://${HOST}:${PORTS.MASTER}`,
|
||||
ALERT: `http://${HOST}:${PORTS.ALERTMANAGER}`,
|
||||
GRAFANA: `http://${HOST}:${PORTS.GRAFANA}`,
|
||||
PROM: `http://${HOST}:${PORTS.PROMETHEUS}`,
|
||||
KIBANA: `http://${HOST}:${PORTS.KIBANA}`,
|
||||
};
|
||||
|
||||
// Master 节点相关 API(统一走 8085)
|
||||
export const MASTER_API = {
|
||||
// 节点列表
|
||||
LIST: "http://master.argus.com/api/v1/master/nodes",
|
||||
|
||||
// 节点详情(需要 nodeId)
|
||||
DETAIL: (nodeId) => `http://master.argus.com/api/v1/master/nodes/${nodeId}`,
|
||||
|
||||
// 节点配置(需要 nodeId)
|
||||
CONFIG: (nodeId) => `http://master.argus.com/api/v1/master/nodes/${nodeId}/config`,
|
||||
|
||||
// 节点统计信息
|
||||
STATISTICS: "http://master.argus.com/api/v1/master/nodes/statistics",
|
||||
LIST: `${BASE.MASTER}/api/v1/master/nodes`,
|
||||
DETAIL: (nodeId) => `${BASE.MASTER}/api/v1/master/nodes/${nodeId}`,
|
||||
CONFIG: (nodeId) => `${BASE.MASTER}/api/v1/master/nodes/${nodeId}/config`,
|
||||
STATISTICS: `${BASE.MASTER}/api/v1/master/nodes/statistics`,
|
||||
};
|
||||
|
||||
// 其他外部 API
|
||||
// 其他外部 API(8084)
|
||||
export const EXTERNAL_API = {
|
||||
ALERTS_INFOS: "http://alertmanager.alert.argus.com/api/v2/alerts",
|
||||
ALERTS_INFOS: `${BASE.ALERT}/api/v2/alerts`,
|
||||
};
|
||||
|
||||
// 外部服务 Host
|
||||
// 外部服务 Host(端口化)
|
||||
export const EXTERNAL_HOST = {
|
||||
ALERTS: "http://alertmanager.alert.argus.com",
|
||||
GRAFANA: "http://grafana.metric.argus.com",
|
||||
GRAFANA_DASHBOARD: "http://grafana.metric.argus.com/d/cluster-dashboard/cluster-dashboard",
|
||||
PROMETHEUS: "http://prometheus.metric.argus.com",
|
||||
KIBANA: "http://kibana.log.argus.com/app/discover",
|
||||
ALERTS: `${BASE.ALERT}`,
|
||||
GRAFANA: `${BASE.GRAFANA}`,
|
||||
GRAFANA_DASHBOARD: `${BASE.GRAFANA}/d/cluster-dashboard/cluster-dashboard`,
|
||||
PROMETHEUS: `${BASE.PROM}`,
|
||||
KIBANA: `${BASE.KIBANA}/app/discover`,
|
||||
};
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user