dev_1.0.0_yuyr 完成 log和bind模块开发部署测试 #8

Merged
yuyr merged 26 commits from dev_1.0.0_yuyr into dev_1.0.0 2025-09-22 16:39:39 +08:00
25 changed files with 559 additions and 209 deletions
Showing only changes of commit 31a6ab9803 - Show all commits

3
src/log/.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
private/

View File

@ -0,0 +1,33 @@
FROM docker.elastic.co/elasticsearch/elasticsearch:8.13.4
# 切换到 root 用户进行系统级安装
USER root
# 安装 supervisor
RUN apt-get update && \
apt-get install -y supervisor && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# 创建 supervisor 日志目录
RUN mkdir -p /var/log/supervisor
# 复制 supervisor 配置文件
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
# 复制启动脚本
COPY start-es-supervised.sh /usr/local/bin/start-es-supervised.sh
RUN chmod +x /usr/local/bin/start-es-supervised.sh
# 创建数据目录并设置权限
RUN mkdir -p /private/argus/log/elasticsearch && \
chown -R elasticsearch:elasticsearch /private/argus/log/elasticsearch
# 保持 root 用户,由 supervisor 管理用户切换
USER root
# 暴露端口
EXPOSE 9200 9300
# 使用 supervisor 作为入口点
CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]

View File

@ -0,0 +1,26 @@
#!/bin/bash
set -euo pipefail
echo "[INFO] Starting Elasticsearch under supervisor..."
# 创建数据目录(如果不存在)
mkdir -p /private/argus/log/elasticsearch
# 创建软链接到Elasticsearch预期的数据目录
if [ -L /usr/share/elasticsearch/data ]; then
rm /usr/share/elasticsearch/data
elif [ -d /usr/share/elasticsearch/data ]; then
rm -rf /usr/share/elasticsearch/data
fi
ln -sf /private/argus/log/elasticsearch /usr/share/elasticsearch/data
echo "[INFO] Data directory linked: /usr/share/elasticsearch/data -> /private/argus/log/elasticsearch"
# 设置环境变量ES配置通过docker-compose传递
export ES_JAVA_OPTS="${ES_JAVA_OPTS:-"-Xms512m -Xmx512m"}"
echo "[INFO] Starting Elasticsearch process..."
# 启动原始的Elasticsearch entrypoint
exec /usr/local/bin/docker-entrypoint.sh elasticsearch

View File

@ -0,0 +1,27 @@
[supervisord]
nodaemon=true
logfile=/var/log/supervisor/supervisord.log
pidfile=/var/run/supervisord.pid
user=root
[program:elasticsearch]
command=/usr/local/bin/start-es-supervised.sh
user=elasticsearch
stdout_logfile=/var/log/supervisor/elasticsearch.log
stderr_logfile=/var/log/supervisor/elasticsearch_error.log
autorestart=true
startretries=3
startsecs=30
stopwaitsecs=30
killasgroup=true
stopasgroup=true
[unix_http_server]
file=/var/run/supervisor.sock
chmod=0700
[supervisorctl]
serverurl=unix:///var/run/supervisor.sock
[rpcinterface:supervisor]
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface

View File

@ -0,0 +1,34 @@
FROM docker.elastic.co/kibana/kibana:8.13.4
# 切换到 root 用户进行系统级安装
USER root
# 安装 supervisor
RUN apt-get update && \
apt-get install -y supervisor && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# 创建 supervisor 日志目录
RUN mkdir -p /var/log/supervisor
# 复制 supervisor 配置文件
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
# 复制启动脚本
COPY start-kibana-supervised.sh /usr/local/bin/start-kibana-supervised.sh
COPY kibana-post-start.sh /usr/local/bin/kibana-post-start.sh
RUN chmod +x /usr/local/bin/start-kibana-supervised.sh /usr/local/bin/kibana-post-start.sh
# 创建数据目录并设置权限
RUN mkdir -p /private/argus/log/kibana && \
chown -R kibana:kibana /private/argus/log/kibana
# 保持 root 用户,由 supervisor 管理用户切换
USER root
# 暴露端口
EXPOSE 5601
# 使用 supervisor 作为入口点
CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]

View File

@ -0,0 +1,146 @@
#!/bin/bash
set -euo pipefail
ES_HOST="${ELASTICSEARCH_HOSTS:-http://es:9200}"
KB_HOST="http://localhost:5601"
echo "[INFO] Starting Kibana post-start configuration..."
# 等待 Elasticsearch 可用
wait_for_elasticsearch() {
echo "[INFO] Waiting for Elasticsearch..."
local max_attempts=60
local attempt=1
while [ $attempt -le $max_attempts ]; do
if curl -fs "$ES_HOST/_cluster/health" >/dev/null 2>&1; then
echo "[OK] Elasticsearch is available"
return 0
fi
echo " Waiting for ES... ($attempt/$max_attempts)"
sleep 5
((attempt++))
done
echo "[ERROR] Elasticsearch timeout"
return 1
}
# 等待 Kibana 可用
wait_for_kibana() {
echo "[INFO] Waiting for Kibana..."
local max_attempts=120
local attempt=1
while [ $attempt -le $max_attempts ]; do
if curl -fs "$KB_HOST/api/status" >/dev/null 2>&1; then
local status=$(curl -s "$KB_HOST/api/status" | grep -o '"level":"available"' || echo "")
if [ -n "$status" ]; then
echo "[OK] Kibana is available"
return 0
fi
echo " Waiting for Kibana... ($attempt/$max_attempts, status: $status)"
else
echo " Waiting for Kibana... ($attempt/$max_attempts, connection failed)"
fi
sleep 5
((attempt++))
done
echo "[ERROR] Kibana timeout"
return 1
}
# 幂等设置索引副本数为0
fix_replicas_idempotent() {
echo "[INFO] Checking and fixing index replicas..."
# 获取所有 train-* 和 infer-* 索引
local indices=$(curl -s "$ES_HOST/_cat/indices/train-*,infer-*?h=index" 2>/dev/null || echo "")
if [ -z "$indices" ]; then
echo "[INFO] No train-*/infer-* indices found, skipping replica adjustment"
return 0
fi
for idx in $indices; do
# 检查当前副本数
local current_replicas=$(curl -s "$ES_HOST/$idx/_settings" | grep -o '"number_of_replicas":"[^"]*"' | cut -d'"' -f4 || echo "")
if [ "$current_replicas" != "0" ]; then
echo "[INFO] Setting replicas to 0 for index: $idx (current: $current_replicas)"
curl -fsS -X PUT "$ES_HOST/$idx/_settings" \
-H 'Content-Type: application/json' \
-d '{"index":{"number_of_replicas":0}}' >/dev/null || {
echo "[WARN] Failed to set replicas for $idx"
continue
}
echo "[OK] Updated replicas for $idx"
else
echo "[INFO] Index $idx already has 0 replicas, skipping"
fi
done
}
# 幂等创建数据视图
create_data_views_idempotent() {
echo "[INFO] Checking and creating data views..."
# 检查是否存在匹配的索引
local train_indices=$(curl -s "$ES_HOST/_cat/indices/train-*?h=index" 2>/dev/null | wc -l || echo "0")
local infer_indices=$(curl -s "$ES_HOST/_cat/indices/infer-*?h=index" 2>/dev/null | wc -l || echo "0")
# 创建 train 数据视图
if [ "$train_indices" -gt 0 ]; then
# 检查数据视图是否已存在
local train_exists=$(curl -s "$KB_HOST/api/data_views/data_view" -H 'kbn-xsrf: true' 2>/dev/null | grep '"title":"train-\*"' | wc -l || echo "0")
if [ "$train_exists" -eq 0 ]; then
echo "[INFO] Creating data view for train-* indices"
curl -fsS -X POST "$KB_HOST/api/data_views/data_view" \
-H 'kbn-xsrf: true' \
-H 'Content-Type: application/json' \
-d '{"data_view":{"name":"train","title":"train-*","timeFieldName":"@timestamp"}}' \
>/dev/null && echo "[OK] Created train data view" || echo "[WARN] Failed to create train data view"
else
echo "[INFO] Train data view already exists, skipping"
fi
else
echo "[INFO] No train-* indices found, skipping train data view creation"
fi
# 创建 infer 数据视图
if [ "$infer_indices" -gt 0 ]; then
# 检查数据视图是否已存在
local infer_exists=$(curl -s "$KB_HOST/api/data_views/data_view" -H 'kbn-xsrf: true' 2>/dev/null | grep '"title":"infer-\*"' | wc -l || echo "0")
if [ "$infer_exists" -eq 0 ]; then
echo "[INFO] Creating data view for infer-* indices"
curl -fsS -X POST "$KB_HOST/api/data_views/data_view" \
-H 'kbn-xsrf: true' \
-H 'Content-Type: application/json' \
-d '{"data_view":{"name":"infer","title":"infer-*","timeFieldName":"@timestamp"}}' \
>/dev/null && echo "[OK] Created infer data view" || echo "[WARN] Failed to create infer data view"
else
echo "[INFO] Infer data view already exists, skipping"
fi
else
echo "[INFO] No infer-* indices found, skipping infer data view creation"
fi
}
# 主逻辑
main() {
# 等待服务可用
wait_for_elasticsearch || exit 1
wait_for_kibana || exit 1
# 执行幂等配置
fix_replicas_idempotent
create_data_views_idempotent
echo "[INFO] Kibana post-start configuration completed"
}
# 运行主逻辑
main

View File

@ -0,0 +1,32 @@
#!/bin/bash
set -euo pipefail
echo "[INFO] Starting Kibana under supervisor..."
# 创建数据目录(如果不存在)
mkdir -p /private/argus/log/kibana
# 创建软链接到Kibana预期的数据目录
if [ -L /usr/share/kibana/data ]; then
rm /usr/share/kibana/data
elif [ -d /usr/share/kibana/data ]; then
rm -rf /usr/share/kibana/data
fi
ln -sf /private/argus/log/kibana /usr/share/kibana/data
echo "[INFO] Data directory linked: /usr/share/kibana/data -> /private/argus/log/kibana"
# 设置环境变量
export ELASTICSEARCH_HOSTS="${ELASTICSEARCH_HOSTS:-"http://es:9200"}"
echo "[INFO] Connecting to Elasticsearch at: $ELASTICSEARCH_HOSTS"
# 启动后台配置任务
echo "[INFO] Starting background post-start configuration..."
/usr/local/bin/kibana-post-start.sh &
echo "[INFO] Starting Kibana process..."
# 启动原始的Kibana entrypoint
exec /usr/local/bin/kibana-docker

View File

@ -0,0 +1,27 @@
[supervisord]
nodaemon=true
logfile=/var/log/supervisor/supervisord.log
pidfile=/var/run/supervisord.pid
user=root
[program:kibana]
command=/usr/local/bin/start-kibana-supervised.sh
user=kibana
stdout_logfile=/var/log/supervisor/kibana.log
stderr_logfile=/var/log/supervisor/kibana_error.log
autorestart=true
startretries=3
startsecs=30
stopwaitsecs=30
killasgroup=true
stopasgroup=true
[unix_http_server]
file=/var/run/supervisor.sock
chmod=0700
[supervisorctl]
serverurl=unix:///var/run/supervisor.sock
[rpcinterface:supervisor]
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface

View File

@ -1,47 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
# 创建private目录结构 (只需要ES和Kibana)
echo "[INFO] Creating private directory structure..."
mkdir -p "$root/private/es/data"
mkdir -p "$root/private/es/scripts"
mkdir -p "$root/private/kibana/data"
mkdir -p "$root/private/kibana/scripts"
# 复制启动脚本
echo "[INFO] Copying startup scripts..."
cp "$root/scripts/start-es.sh" "$root/private/es/scripts/"
cp "$root/scripts/start-kibana.sh" "$root/private/kibana/scripts/"
# 设置执行权限
chmod +x "$root/private/es/scripts/start-es.sh"
chmod +x "$root/private/kibana/scripts/start-kibana.sh"
# 设置数据目录权限ES 和 Kibana 容器都使用 UID 1000
sudo chown -R 1000:1000 "$root/private/es/data" "$root/private/kibana/data" 2>/dev/null || true
# 检查fluent-bit相关文件是否存在
if [[ ! -f "$root/fluent-bit-bundle.tar.gz" ]]; then
echo "[INFO] Creating fluent-bit bundle..."
# 创建bundle如果目录存在的话
cd "$root"
if [[ -d "private/fluent-bit" ]]; then
cd private/fluent-bit && tar -czf ../../fluent-bit-bundle.tar.gz etc/ packages/ 2>/dev/null && cd ../..
elif [[ -d "fluent-bit" && -d "packages" ]]; then
# 临时创建目录结构来打包
mkdir -p temp-bundle/etc temp-bundle/packages
cp -r fluent-bit/* temp-bundle/etc/
cp -r packages/* temp-bundle/packages/
cd temp-bundle && tar -czf ../fluent-bit-bundle.tar.gz . && cd ..
rm -rf temp-bundle
else
echo "[WARN] 无法创建fluent-bit bundle请确保fluent-bit配置和packages目录存在"
fi
fi
if [[ ! -f "$root/start-fluent-bit.sh" ]]; then
echo "[WARN] start-fluent-bit.sh 不存在,请确保已创建该启动脚本"
fi
echo "[OK] 初始化完成: private/{es,kibana}, fluent-bit-bundle.tar.gz, start-fluent-bit.sh"

View File

@ -1,17 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
echo "[INFO] 向两个fluent-bit容器发送测试日志..."
# 发送日志到host01
echo "[INFO] 发送日志到 host01..."
./scripts/03_send_test_host01.sh
echo ""
# 发送日志到host02
echo "[INFO] 发送日志到 host02..."
./scripts/03_send_test_host02.sh
echo ""
echo "[OK] 已完成向两个主机发送测试日志"

View File

@ -1,10 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
cd "$(dirname "$0")/.."
compose_cmd="docker compose"
if ! $compose_cmd version >/dev/null 2>&1; then
if command -v docker-compose >/dev/null 2>&1; then compose_cmd="docker-compose"; else
echo "需要 Docker Compose请安装后重试" >&2; exit 1; fi
fi
$compose_cmd -p logging-mvp restart fluent-bit
echo "[OK] 已重启 fluent-bit该镜像不支持 SIGHUP 热重载)"

View File

@ -1,44 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
KB="${KB:-http://localhost:5601}"
# 等待 Kibana 完全启动
wait_for_kibana() {
echo "[i] 等待 Kibana 启动..."
local max_attempts=60
local attempt=1
while [ $attempt -le $max_attempts ]; do
if curl -fs "$KB/api/status" >/dev/null 2>&1; then
local status=$(curl -s "$KB/api/status" | jq -r '.status.overall.level // "unknown"')
if [ "$status" = "available" ]; then
echo "[OK] Kibana 已启动 (status: $status)"
return 0
else
echo " 等待中... ($attempt/$max_attempts, status: $status)"
fi
else
echo " 等待中... ($attempt/$max_attempts, 连接失败)"
fi
sleep 5
((attempt++))
done
echo "[ERROR] Kibana 启动超时"
return 1
}
create_view() {
local name="$1" pattern="$2" timefield="${3:-@timestamp}"
echo "[i] 创建 Data View: $name ($pattern, time=$timefield)"
curl -fsS -X POST "$KB/api/data_views/data_view" \
-H 'kbn-xsrf: true' \
-H 'Content-Type: application/json' \
-d "{\"data_view\":{\"name\":\"$name\",\"title\":\"$pattern\",\"timeFieldName\":\"$timefield\"}}" \
>/dev/null && echo " -> OK" || { echo " -> 失败(可能是没有匹配索引)"; return 1; }
}
# 等待 Kibana 启动
wait_for_kibana || exit 1
create_view "train" "train-*" "@timestamp" || true
create_view "infer" "infer-*" "@timestamp" || true
echo "[DONE] 若提示失败,请先确保已产生 train-*/infer-* 索引,再重试本脚本。"

View File

@ -1,8 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
ES="${ES:-http://localhost:9200}"
for idx in train-* infer-*; do
echo "[i] 将 $idx 副本数设置为 0"
curl -fsS -X PUT "$ES/$idx/_settings" -H 'Content-Type: application/json' -d '{"index":{"number_of_replicas":0}}' || true
done
echo "[OK] 完成"

View File

@ -1,33 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
cd "$(dirname "$0")/.."
echo "清理选项:"
echo "1. 清理索引和缓冲区(保留持久化数据)"
echo "2. 完全清理(包括持久化数据目录)"
read -rp "请选择 (1/2): " choice
case $choice in
1)
read -rp "危险操作:删除 ES 索引 train-*, infer-*(以及 logstash-*),并清空 buffers。确认? (yes/NO) " ans
if [[ "${ans:-NO}" != "yes" ]]; then echo "已取消"; exit 0; fi
ES="${ES:-http://localhost:9200}"
for idx in train-* infer-* logstash-*; do
echo "[i] 删除索引 $idx"
curl -fsS -X DELETE "$ES/$idx" || true
done
rm -rf ./private/fluent-bit/buffers/* || true
echo "[OK] 索引和缓冲区清理完成"
;;
2)
read -rp "危险操作:删除所有数据包括持久化存储!确认? (yes/NO) " ans
if [[ "${ans:-NO}" != "yes" ]]; then echo "已取消"; exit 0; fi
rm -rf ./private/fluent-bit/buffers/* ./private/es/data/* ./private/kibana/data/* || true
rm -rf ./private/fluent-bit/logs/train/* ./private/fluent-bit/logs/infer/* || true
echo "[OK] 完全清理完成(包括持久化数据)"
;;
*)
echo "已取消"
exit 0
;;
esac

View File

@ -1,19 +0,0 @@
#!/bin/bash
set -euo pipefail
echo "[INFO] Starting Elasticsearch with private directory setup..."
# 创建数据目录
mkdir -p /private/es/data
# 创建软链接到Elasticsearch预期的数据目录
rm -rf /usr/share/elasticsearch/data 2>/dev/null || true
ln -sf /private/es/data /usr/share/elasticsearch/data
# 设置正确的权限 (Elasticsearch使用UID 1000)
chown -R 1000:1000 /private/es/data
echo "[INFO] Data directory linked: /usr/share/elasticsearch/data -> /private/es/data"
# 启动原始的Elasticsearch entrypoint
exec /usr/local/bin/docker-entrypoint.sh elasticsearch

View File

@ -1,19 +0,0 @@
#!/bin/bash
set -euo pipefail
echo "[INFO] Starting Kibana with private directory setup..."
# 创建数据目录
mkdir -p /private/kibana/data
# 创建软链接到Kibana预期的数据目录
rm -rf /usr/share/kibana/data 2>/dev/null || true
ln -sf /private/kibana/data /usr/share/kibana/data
# 设置正确的权限 (Kibana使用UID 1000)
chown -R 1000:1000 /private/kibana/data
echo "[INFO] Data directory linked: /usr/share/kibana/data -> /private/kibana/data"
# 启动原始的Kibana entrypoint
exec /usr/local/bin/kibana-docker

20
src/log/scripts/build_images.sh Executable file
View File

@ -0,0 +1,20 @@
#!/usr/bin/env bash
set -euo pipefail
root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
cd "$root"
echo "[INFO] Building custom Docker images with supervisor support..."
# 构建 Elasticsearch 镜像
echo "[INFO] Building Elasticsearch image..."
docker build -t argus-elasticsearch:latest ./elasticsearch/build
# 构建 Kibana 镜像
echo "[INFO] Building Kibana image..."
docker build -t argus-kibana:latest ./kibana/build
echo "[OK] Custom images built successfully:"
echo " - argus-elasticsearch:latest"
echo " - argus-kibana:latest"

View File

@ -1,15 +1,17 @@
version: "3.8" version: "3.8"
services: services:
es: es:
image: docker.elastic.co/elasticsearch/elasticsearch:8.13.4 build:
context: ../elasticsearch/build
dockerfile: Dockerfile
image: argus-elasticsearch:latest
environment: environment:
- discovery.type=single-node - discovery.type=single-node
- xpack.security.enabled=false - xpack.security.enabled=false
- ES_JAVA_OPTS=-Xms512m -Xmx512m - ES_JAVA_OPTS=-Xms512m -Xmx512m
volumes: volumes:
- ./private:/private - ./private/argus/log/elasticsearch:/private/argus/log/elasticsearch
ports: ["9200:9200"] ports: ["9200:9200"]
command: /private/es/scripts/start-es.sh
healthcheck: healthcheck:
test: ["CMD-SHELL", "curl -fs http://localhost:9200 >/dev/null || exit 1"] test: ["CMD-SHELL", "curl -fs http://localhost:9200 >/dev/null || exit 1"]
interval: 10s interval: 10s
@ -17,13 +19,15 @@ services:
retries: 30 retries: 30
kibana: kibana:
image: docker.elastic.co/kibana/kibana:8.13.4 build:
context: ../kibana/build
dockerfile: Dockerfile
image: argus-kibana:latest
environment: environment:
- ELASTICSEARCH_HOSTS=http://es:9200 - ELASTICSEARCH_HOSTS=http://es:9200
volumes: volumes:
- ./private:/private - ./private/argus/log/kibana:/private/argus/log/kibana
ports: ["5601:5601"] ports: ["5601:5601"]
command: /private/kibana/scripts/start-kibana.sh
depends_on: depends_on:
es: es:
condition: service_healthy condition: service_healthy
@ -37,8 +41,8 @@ services:
- ES_HOST=es - ES_HOST=es
- ES_PORT=9200 - ES_PORT=9200
volumes: volumes:
- ./start-fluent-bit.sh:/private/start-fluent-bit.sh:ro - ../fluent-bit/start-fluent-bit.sh:/private/start-fluent-bit.sh:ro
- ./fluent-bit-bundle.tar.gz:/private/fluent-bit-bundle.tar.gz:ro - ../fluent-bit/fluent-bit-bundle.tar.gz:/private/fluent-bit-bundle.tar.gz:ro
ports: ["2020:2020"] ports: ["2020:2020"]
depends_on: depends_on:
es: es:
@ -59,8 +63,8 @@ services:
- ES_HOST=es - ES_HOST=es
- ES_PORT=9200 - ES_PORT=9200
volumes: volumes:
- ./start-fluent-bit.sh:/private/start-fluent-bit.sh:ro - ../fluent-bit/start-fluent-bit.sh:/private/start-fluent-bit.sh:ro
- ./fluent-bit-bundle.tar.gz:/private/fluent-bit-bundle.tar.gz:ro - ../fluent-bit/fluent-bit-bundle.tar.gz:/private/fluent-bit-bundle.tar.gz:ro
ports: ["2021:2020"] ports: ["2021:2020"]
depends_on: depends_on:
es: es:

View File

@ -0,0 +1,27 @@
#!/usr/bin/env bash
set -euo pipefail
root="$(cd "$(dirname "${BASH_SOURCE[0]}")/../" && pwd)"
# 创建新的private目录结构 (基于argus目录结构)
echo "[INFO] Creating private directory structure for supervisor-based containers..."
mkdir -p "$root/private/argus/log/elasticsearch"
mkdir -p "$root/private/argus/log/kibana"
# 设置数据目录权限ES 和 Kibana 容器都使用 UID 1000
echo "[INFO] Setting permissions for data directories..."
sudo chown -R 1000:1000 "$root/private/argus/log/elasticsearch" 2>/dev/null || true
sudo chown -R 1000:1000 "$root/private/argus/log/kibana" 2>/dev/null || true
echo "[INFO] Supervisor-based containers will manage their own scripts and configurations"
# 检查fluent-bit相关文件是否存在
if [[ ! -f "$root/../fluent-bit/fluent-bit-bundle.tar.gz" ]]; then
echo "[WARN] fluent-bit/fluent-bit-bundle.tar.gz 不存在,请确保已创建该文件"
fi
if [[ ! -f "$root/../fluent-bit/start-fluent-bit.sh" ]]; then
echo "[WARN] fluent-bit/start-fluent-bit.sh 不存在,请确保已创建该启动脚本"
fi
echo "[OK] 初始化完成: private/argus/log/{elasticsearch,kibana}"
echo "[INFO] Fluent-bit files should be in fluent-bit/ directory"

View File

@ -5,7 +5,7 @@ set -euo pipefail
container_name="logging-mvp-fluent-bit-host01-1" container_name="logging-mvp-fluent-bit-host01-1"
# 检查容器是否存在并运行 # 检查容器是否存在并运行
if ! docker ps --format "table {{.Names}}" | grep -q "$container_name"; then if ! docker ps | grep -q "$container_name"; then
echo "[ERROR] Fluent Bit容器 $container_name 未运行" echo "[ERROR] Fluent Bit容器 $container_name 未运行"
exit 1 exit 1
fi fi

View File

@ -5,7 +5,7 @@ set -euo pipefail
container_name="logging-mvp-fluent-bit-host02-1" container_name="logging-mvp-fluent-bit-host02-1"
# 检查容器是否存在并运行 # 检查容器是否存在并运行
if ! docker ps --format "table {{.Names}}" | grep -q "$container_name"; then if ! docker ps | grep -q "$container_name"; then
echo "[ERROR] Fluent Bit容器 $container_name 未运行" echo "[ERROR] Fluent Bit容器 $container_name 未运行"
exit 1 exit 1
fi fi

168
src/log/tests/scripts/e2e_test.sh Executable file
View File

@ -0,0 +1,168 @@
#!/usr/bin/env bash
set -euo pipefail
echo "======================================="
echo "ARGUS Log System End-to-End Test"
echo "======================================="
echo ""
# 记录测试开始时间
test_start_time=$(date +%s)
# 函数获取ES中的日志计数
get_log_count() {
local train_count=$(curl -s "http://localhost:9200/train-*/_count" 2>/dev/null | grep -o '"count":[0-9]*' | cut -d':' -f2 || echo "0")
local infer_count=$(curl -s "http://localhost:9200/infer-*/_count" 2>/dev/null | grep -o '"count":[0-9]*' | cut -d':' -f2 || echo "0")
echo "$((train_count + infer_count))"
}
# 函数:等待服务就绪
wait_for_services() {
echo "[INFO] Waiting for all services to be ready..."
local max_attempts=60
local attempt=1
while [ $attempt -le $max_attempts ]; do
if curl -fs http://localhost:9200/_cluster/health >/dev/null 2>&1 && \
curl -fs http://localhost:5601/api/status >/dev/null 2>&1 && \
curl -fs http://localhost:2020/api/v2/metrics >/dev/null 2>&1 && \
curl -fs http://localhost:2021/api/v2/metrics >/dev/null 2>&1; then
echo "[OK] All services are ready!"
return 0
fi
echo " Waiting for services... ($attempt/$max_attempts)"
sleep 5
((attempt++))
done
echo "[ERROR] Services not ready after $max_attempts attempts"
return 1
}
# 函数:显示测试步骤
show_step() {
echo ""
echo "🔄 Step $1: $2"
echo "----------------------------------------"
}
# 函数:验证步骤结果
verify_step() {
if [ $? -eq 0 ]; then
echo "$1 - SUCCESS"
else
echo "$1 - FAILED"
exit 1
fi
}
# 开始端到端测试
show_step "1" "Bootstrap - Initialize environment"
./scripts/01_bootstrap.sh
verify_step "Bootstrap"
show_step "2" "Startup - Start all services"
./scripts/02_up.sh
verify_step "Service startup"
# 等待服务完全就绪
wait_for_services || exit 1
# 记录发送测试数据前的日志计数
initial_count=$(get_log_count)
echo "[INFO] Initial log count: $initial_count"
show_step "3a" "Send test data - Host01"
./scripts/03_send_test_host01.sh
verify_step "Test data sending (host01)"
show_step "3b" "Send test data - Host02"
./scripts/03_send_test_host02.sh
verify_step "Test data sending (host02)"
# 等待数据被处理
echo "[INFO] Waiting for data to be processed..."
sleep 10
show_step "4" "Verify data - Query Elasticsearch"
./scripts/04_query_es.sh
verify_step "Data verification"
# 记录发送测试数据后的日志计数
final_count=$(get_log_count)
echo "[INFO] Final log count: $final_count"
# 验证日志数量是否增加
if [ "$final_count" -gt "$initial_count" ]; then
added_logs=$((final_count - initial_count))
echo "✅ Log count verification - SUCCESS: Added $added_logs logs (from $initial_count to $final_count)"
else
echo "❌ Log count verification - FAILED: Expected count to increase, but got $initial_count -> $final_count"
exit 1
fi
# 验证预期的最小日志数量(每个主机应该发送一些日志)
expected_min_logs=4 # 至少应该有几条日志
if [ "$final_count" -ge "$expected_min_logs" ]; then
echo "✅ Minimum log threshold - SUCCESS: $final_count logs (>= $expected_min_logs expected)"
else
echo "❌ Minimum log threshold - FAILED: Only $final_count logs (>= $expected_min_logs expected)"
exit 1
fi
# 检查服务健康状态
show_step "Health" "Check service health"
echo "[INFO] Checking service health..."
# 检查 Elasticsearch 健康状态
es_health=$(curl -s "http://localhost:9200/_cluster/health" | grep -o '"status":"[^"]*"' | cut -d'"' -f4)
if [ "$es_health" = "green" ] || [ "$es_health" = "yellow" ]; then
echo "✅ Elasticsearch health: $es_health"
else
echo "❌ Elasticsearch health: $es_health"
fi
# 检查 Kibana 状态
if curl -fs "http://localhost:5601/api/status" >/dev/null 2>&1; then
kb_status="available"
echo "✅ Kibana status: $kb_status"
else
kb_status="unavailable"
echo "⚠️ Kibana status: $kb_status"
fi
# 检查 Fluent-Bit 指标
fb_host01_uptime=$(curl -s "http://localhost:2020/api/v2/metrics" | grep "fluentbit_uptime" | head -1 | grep -o "[0-9]\+$" || echo "0")
fb_host02_uptime=$(curl -s "http://localhost:2021/api/v2/metrics" | grep "fluentbit_uptime" | head -1 | grep -o "[0-9]\+$" || echo "0")
if [ "$fb_host01_uptime" -gt 0 ] && [ "$fb_host02_uptime" -gt 0 ]; then
echo "✅ Fluent-Bit services: host01 uptime=${fb_host01_uptime}s, host02 uptime=${fb_host02_uptime}s"
else
echo "⚠️ Fluent-Bit services: host01 uptime=${fb_host01_uptime}s, host02 uptime=${fb_host02_uptime}s"
fi
verify_step "Service health check"
show_step "5" "Cleanup - Stop all services"
./scripts/05_down.sh
verify_step "Service cleanup"
# 计算总测试时间
test_end_time=$(date +%s)
total_time=$((test_end_time - test_start_time))
echo ""
echo "======================================="
echo "🎉 END-TO-END TEST COMPLETED SUCCESSFULLY!"
echo "======================================="
echo "📊 Test Summary:"
echo " • Initial logs: $initial_count"
echo " • Final logs: $final_count"
echo " • Added logs: $added_logs"
echo " • Total time: ${total_time}s"
echo " • ES health: $es_health"
echo " • Kibana status: $kb_status"
echo " • All services started and stopped successfully"
echo ""
echo "✅ The ARGUS log system is working correctly!"
echo ""