dev_1.0.0_yuyr 完成 log和bind模块开发部署测试 #8
3
src/log/.gitignore
vendored
Normal file
3
src/log/.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
|
||||
private/
|
||||
|
33
src/log/elasticsearch/build/Dockerfile
Normal file
33
src/log/elasticsearch/build/Dockerfile
Normal file
@ -0,0 +1,33 @@
|
||||
FROM docker.elastic.co/elasticsearch/elasticsearch:8.13.4
|
||||
|
||||
# 切换到 root 用户进行系统级安装
|
||||
USER root
|
||||
|
||||
# 安装 supervisor
|
||||
RUN apt-get update && \
|
||||
apt-get install -y supervisor && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# 创建 supervisor 日志目录
|
||||
RUN mkdir -p /var/log/supervisor
|
||||
|
||||
# 复制 supervisor 配置文件
|
||||
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
|
||||
|
||||
# 复制启动脚本
|
||||
COPY start-es-supervised.sh /usr/local/bin/start-es-supervised.sh
|
||||
RUN chmod +x /usr/local/bin/start-es-supervised.sh
|
||||
|
||||
# 创建数据目录并设置权限
|
||||
RUN mkdir -p /private/argus/log/elasticsearch && \
|
||||
chown -R elasticsearch:elasticsearch /private/argus/log/elasticsearch
|
||||
|
||||
# 保持 root 用户,由 supervisor 管理用户切换
|
||||
USER root
|
||||
|
||||
# 暴露端口
|
||||
EXPOSE 9200 9300
|
||||
|
||||
# 使用 supervisor 作为入口点
|
||||
CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
|
26
src/log/elasticsearch/build/start-es-supervised.sh
Normal file
26
src/log/elasticsearch/build/start-es-supervised.sh
Normal file
@ -0,0 +1,26 @@
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
echo "[INFO] Starting Elasticsearch under supervisor..."
|
||||
|
||||
# 创建数据目录(如果不存在)
|
||||
mkdir -p /private/argus/log/elasticsearch
|
||||
|
||||
# 创建软链接到Elasticsearch预期的数据目录
|
||||
if [ -L /usr/share/elasticsearch/data ]; then
|
||||
rm /usr/share/elasticsearch/data
|
||||
elif [ -d /usr/share/elasticsearch/data ]; then
|
||||
rm -rf /usr/share/elasticsearch/data
|
||||
fi
|
||||
|
||||
ln -sf /private/argus/log/elasticsearch /usr/share/elasticsearch/data
|
||||
|
||||
echo "[INFO] Data directory linked: /usr/share/elasticsearch/data -> /private/argus/log/elasticsearch"
|
||||
|
||||
# 设置环境变量(ES配置通过docker-compose传递)
|
||||
export ES_JAVA_OPTS="${ES_JAVA_OPTS:-"-Xms512m -Xmx512m"}"
|
||||
|
||||
echo "[INFO] Starting Elasticsearch process..."
|
||||
|
||||
# 启动原始的Elasticsearch entrypoint
|
||||
exec /usr/local/bin/docker-entrypoint.sh elasticsearch
|
27
src/log/elasticsearch/build/supervisord.conf
Normal file
27
src/log/elasticsearch/build/supervisord.conf
Normal file
@ -0,0 +1,27 @@
|
||||
[supervisord]
|
||||
nodaemon=true
|
||||
logfile=/var/log/supervisor/supervisord.log
|
||||
pidfile=/var/run/supervisord.pid
|
||||
user=root
|
||||
|
||||
[program:elasticsearch]
|
||||
command=/usr/local/bin/start-es-supervised.sh
|
||||
user=elasticsearch
|
||||
stdout_logfile=/var/log/supervisor/elasticsearch.log
|
||||
stderr_logfile=/var/log/supervisor/elasticsearch_error.log
|
||||
autorestart=true
|
||||
startretries=3
|
||||
startsecs=30
|
||||
stopwaitsecs=30
|
||||
killasgroup=true
|
||||
stopasgroup=true
|
||||
|
||||
[unix_http_server]
|
||||
file=/var/run/supervisor.sock
|
||||
chmod=0700
|
||||
|
||||
[supervisorctl]
|
||||
serverurl=unix:///var/run/supervisor.sock
|
||||
|
||||
[rpcinterface:supervisor]
|
||||
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
|
34
src/log/kibana/build/Dockerfile
Normal file
34
src/log/kibana/build/Dockerfile
Normal file
@ -0,0 +1,34 @@
|
||||
FROM docker.elastic.co/kibana/kibana:8.13.4
|
||||
|
||||
# 切换到 root 用户进行系统级安装
|
||||
USER root
|
||||
|
||||
# 安装 supervisor
|
||||
RUN apt-get update && \
|
||||
apt-get install -y supervisor && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# 创建 supervisor 日志目录
|
||||
RUN mkdir -p /var/log/supervisor
|
||||
|
||||
# 复制 supervisor 配置文件
|
||||
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
|
||||
|
||||
# 复制启动脚本
|
||||
COPY start-kibana-supervised.sh /usr/local/bin/start-kibana-supervised.sh
|
||||
COPY kibana-post-start.sh /usr/local/bin/kibana-post-start.sh
|
||||
RUN chmod +x /usr/local/bin/start-kibana-supervised.sh /usr/local/bin/kibana-post-start.sh
|
||||
|
||||
# 创建数据目录并设置权限
|
||||
RUN mkdir -p /private/argus/log/kibana && \
|
||||
chown -R kibana:kibana /private/argus/log/kibana
|
||||
|
||||
# 保持 root 用户,由 supervisor 管理用户切换
|
||||
USER root
|
||||
|
||||
# 暴露端口
|
||||
EXPOSE 5601
|
||||
|
||||
# 使用 supervisor 作为入口点
|
||||
CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
|
146
src/log/kibana/build/kibana-post-start.sh
Normal file
146
src/log/kibana/build/kibana-post-start.sh
Normal file
@ -0,0 +1,146 @@
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
ES_HOST="${ELASTICSEARCH_HOSTS:-http://es:9200}"
|
||||
KB_HOST="http://localhost:5601"
|
||||
|
||||
echo "[INFO] Starting Kibana post-start configuration..."
|
||||
|
||||
# 等待 Elasticsearch 可用
|
||||
wait_for_elasticsearch() {
|
||||
echo "[INFO] Waiting for Elasticsearch..."
|
||||
local max_attempts=60
|
||||
local attempt=1
|
||||
|
||||
while [ $attempt -le $max_attempts ]; do
|
||||
if curl -fs "$ES_HOST/_cluster/health" >/dev/null 2>&1; then
|
||||
echo "[OK] Elasticsearch is available"
|
||||
return 0
|
||||
fi
|
||||
echo " Waiting for ES... ($attempt/$max_attempts)"
|
||||
sleep 5
|
||||
((attempt++))
|
||||
done
|
||||
|
||||
echo "[ERROR] Elasticsearch timeout"
|
||||
return 1
|
||||
}
|
||||
|
||||
# 等待 Kibana 可用
|
||||
wait_for_kibana() {
|
||||
echo "[INFO] Waiting for Kibana..."
|
||||
local max_attempts=120
|
||||
local attempt=1
|
||||
|
||||
while [ $attempt -le $max_attempts ]; do
|
||||
if curl -fs "$KB_HOST/api/status" >/dev/null 2>&1; then
|
||||
local status=$(curl -s "$KB_HOST/api/status" | grep -o '"level":"available"' || echo "")
|
||||
if [ -n "$status" ]; then
|
||||
echo "[OK] Kibana is available"
|
||||
return 0
|
||||
fi
|
||||
echo " Waiting for Kibana... ($attempt/$max_attempts, status: $status)"
|
||||
else
|
||||
echo " Waiting for Kibana... ($attempt/$max_attempts, connection failed)"
|
||||
fi
|
||||
sleep 5
|
||||
((attempt++))
|
||||
done
|
||||
|
||||
echo "[ERROR] Kibana timeout"
|
||||
return 1
|
||||
}
|
||||
|
||||
# 幂等设置索引副本数为0
|
||||
fix_replicas_idempotent() {
|
||||
echo "[INFO] Checking and fixing index replicas..."
|
||||
|
||||
# 获取所有 train-* 和 infer-* 索引
|
||||
local indices=$(curl -s "$ES_HOST/_cat/indices/train-*,infer-*?h=index" 2>/dev/null || echo "")
|
||||
|
||||
if [ -z "$indices" ]; then
|
||||
echo "[INFO] No train-*/infer-* indices found, skipping replica adjustment"
|
||||
return 0
|
||||
fi
|
||||
|
||||
for idx in $indices; do
|
||||
# 检查当前副本数
|
||||
local current_replicas=$(curl -s "$ES_HOST/$idx/_settings" | grep -o '"number_of_replicas":"[^"]*"' | cut -d'"' -f4 || echo "")
|
||||
|
||||
if [ "$current_replicas" != "0" ]; then
|
||||
echo "[INFO] Setting replicas to 0 for index: $idx (current: $current_replicas)"
|
||||
curl -fsS -X PUT "$ES_HOST/$idx/_settings" \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"index":{"number_of_replicas":0}}' >/dev/null || {
|
||||
echo "[WARN] Failed to set replicas for $idx"
|
||||
continue
|
||||
}
|
||||
echo "[OK] Updated replicas for $idx"
|
||||
else
|
||||
echo "[INFO] Index $idx already has 0 replicas, skipping"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# 幂等创建数据视图
|
||||
create_data_views_idempotent() {
|
||||
echo "[INFO] Checking and creating data views..."
|
||||
|
||||
# 检查是否存在匹配的索引
|
||||
local train_indices=$(curl -s "$ES_HOST/_cat/indices/train-*?h=index" 2>/dev/null | wc -l || echo "0")
|
||||
local infer_indices=$(curl -s "$ES_HOST/_cat/indices/infer-*?h=index" 2>/dev/null | wc -l || echo "0")
|
||||
|
||||
# 创建 train 数据视图
|
||||
if [ "$train_indices" -gt 0 ]; then
|
||||
# 检查数据视图是否已存在
|
||||
local train_exists=$(curl -s "$KB_HOST/api/data_views/data_view" -H 'kbn-xsrf: true' 2>/dev/null | grep '"title":"train-\*"' | wc -l || echo "0")
|
||||
|
||||
if [ "$train_exists" -eq 0 ]; then
|
||||
echo "[INFO] Creating data view for train-* indices"
|
||||
curl -fsS -X POST "$KB_HOST/api/data_views/data_view" \
|
||||
-H 'kbn-xsrf: true' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"data_view":{"name":"train","title":"train-*","timeFieldName":"@timestamp"}}' \
|
||||
>/dev/null && echo "[OK] Created train data view" || echo "[WARN] Failed to create train data view"
|
||||
else
|
||||
echo "[INFO] Train data view already exists, skipping"
|
||||
fi
|
||||
else
|
||||
echo "[INFO] No train-* indices found, skipping train data view creation"
|
||||
fi
|
||||
|
||||
# 创建 infer 数据视图
|
||||
if [ "$infer_indices" -gt 0 ]; then
|
||||
# 检查数据视图是否已存在
|
||||
local infer_exists=$(curl -s "$KB_HOST/api/data_views/data_view" -H 'kbn-xsrf: true' 2>/dev/null | grep '"title":"infer-\*"' | wc -l || echo "0")
|
||||
|
||||
if [ "$infer_exists" -eq 0 ]; then
|
||||
echo "[INFO] Creating data view for infer-* indices"
|
||||
curl -fsS -X POST "$KB_HOST/api/data_views/data_view" \
|
||||
-H 'kbn-xsrf: true' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"data_view":{"name":"infer","title":"infer-*","timeFieldName":"@timestamp"}}' \
|
||||
>/dev/null && echo "[OK] Created infer data view" || echo "[WARN] Failed to create infer data view"
|
||||
else
|
||||
echo "[INFO] Infer data view already exists, skipping"
|
||||
fi
|
||||
else
|
||||
echo "[INFO] No infer-* indices found, skipping infer data view creation"
|
||||
fi
|
||||
}
|
||||
|
||||
# 主逻辑
|
||||
main() {
|
||||
# 等待服务可用
|
||||
wait_for_elasticsearch || exit 1
|
||||
wait_for_kibana || exit 1
|
||||
|
||||
# 执行幂等配置
|
||||
fix_replicas_idempotent
|
||||
create_data_views_idempotent
|
||||
|
||||
echo "[INFO] Kibana post-start configuration completed"
|
||||
}
|
||||
|
||||
# 运行主逻辑
|
||||
main
|
32
src/log/kibana/build/start-kibana-supervised.sh
Normal file
32
src/log/kibana/build/start-kibana-supervised.sh
Normal file
@ -0,0 +1,32 @@
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
echo "[INFO] Starting Kibana under supervisor..."
|
||||
|
||||
# 创建数据目录(如果不存在)
|
||||
mkdir -p /private/argus/log/kibana
|
||||
|
||||
# 创建软链接到Kibana预期的数据目录
|
||||
if [ -L /usr/share/kibana/data ]; then
|
||||
rm /usr/share/kibana/data
|
||||
elif [ -d /usr/share/kibana/data ]; then
|
||||
rm -rf /usr/share/kibana/data
|
||||
fi
|
||||
|
||||
ln -sf /private/argus/log/kibana /usr/share/kibana/data
|
||||
|
||||
echo "[INFO] Data directory linked: /usr/share/kibana/data -> /private/argus/log/kibana"
|
||||
|
||||
# 设置环境变量
|
||||
export ELASTICSEARCH_HOSTS="${ELASTICSEARCH_HOSTS:-"http://es:9200"}"
|
||||
|
||||
echo "[INFO] Connecting to Elasticsearch at: $ELASTICSEARCH_HOSTS"
|
||||
|
||||
# 启动后台配置任务
|
||||
echo "[INFO] Starting background post-start configuration..."
|
||||
/usr/local/bin/kibana-post-start.sh &
|
||||
|
||||
echo "[INFO] Starting Kibana process..."
|
||||
|
||||
# 启动原始的Kibana entrypoint
|
||||
exec /usr/local/bin/kibana-docker
|
27
src/log/kibana/build/supervisord.conf
Normal file
27
src/log/kibana/build/supervisord.conf
Normal file
@ -0,0 +1,27 @@
|
||||
[supervisord]
|
||||
nodaemon=true
|
||||
logfile=/var/log/supervisor/supervisord.log
|
||||
pidfile=/var/run/supervisord.pid
|
||||
user=root
|
||||
|
||||
[program:kibana]
|
||||
command=/usr/local/bin/start-kibana-supervised.sh
|
||||
user=kibana
|
||||
stdout_logfile=/var/log/supervisor/kibana.log
|
||||
stderr_logfile=/var/log/supervisor/kibana_error.log
|
||||
autorestart=true
|
||||
startretries=3
|
||||
startsecs=30
|
||||
stopwaitsecs=30
|
||||
killasgroup=true
|
||||
stopasgroup=true
|
||||
|
||||
[unix_http_server]
|
||||
file=/var/run/supervisor.sock
|
||||
chmod=0700
|
||||
|
||||
[supervisorctl]
|
||||
serverurl=unix:///var/run/supervisor.sock
|
||||
|
||||
[rpcinterface:supervisor]
|
||||
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
|
@ -1,47 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
|
||||
# 创建private目录结构 (只需要ES和Kibana)
|
||||
echo "[INFO] Creating private directory structure..."
|
||||
mkdir -p "$root/private/es/data"
|
||||
mkdir -p "$root/private/es/scripts"
|
||||
mkdir -p "$root/private/kibana/data"
|
||||
mkdir -p "$root/private/kibana/scripts"
|
||||
|
||||
# 复制启动脚本
|
||||
echo "[INFO] Copying startup scripts..."
|
||||
cp "$root/scripts/start-es.sh" "$root/private/es/scripts/"
|
||||
cp "$root/scripts/start-kibana.sh" "$root/private/kibana/scripts/"
|
||||
|
||||
# 设置执行权限
|
||||
chmod +x "$root/private/es/scripts/start-es.sh"
|
||||
chmod +x "$root/private/kibana/scripts/start-kibana.sh"
|
||||
|
||||
# 设置数据目录权限(ES 和 Kibana 容器都使用 UID 1000)
|
||||
sudo chown -R 1000:1000 "$root/private/es/data" "$root/private/kibana/data" 2>/dev/null || true
|
||||
|
||||
# 检查fluent-bit相关文件是否存在
|
||||
if [[ ! -f "$root/fluent-bit-bundle.tar.gz" ]]; then
|
||||
echo "[INFO] Creating fluent-bit bundle..."
|
||||
# 创建bundle(如果目录存在的话)
|
||||
cd "$root"
|
||||
if [[ -d "private/fluent-bit" ]]; then
|
||||
cd private/fluent-bit && tar -czf ../../fluent-bit-bundle.tar.gz etc/ packages/ 2>/dev/null && cd ../..
|
||||
elif [[ -d "fluent-bit" && -d "packages" ]]; then
|
||||
# 临时创建目录结构来打包
|
||||
mkdir -p temp-bundle/etc temp-bundle/packages
|
||||
cp -r fluent-bit/* temp-bundle/etc/
|
||||
cp -r packages/* temp-bundle/packages/
|
||||
cd temp-bundle && tar -czf ../fluent-bit-bundle.tar.gz . && cd ..
|
||||
rm -rf temp-bundle
|
||||
else
|
||||
echo "[WARN] 无法创建fluent-bit bundle,请确保fluent-bit配置和packages目录存在"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ ! -f "$root/start-fluent-bit.sh" ]]; then
|
||||
echo "[WARN] start-fluent-bit.sh 不存在,请确保已创建该启动脚本"
|
||||
fi
|
||||
|
||||
echo "[OK] 初始化完成: private/{es,kibana}, fluent-bit-bundle.tar.gz, start-fluent-bit.sh"
|
@ -1,17 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
echo "[INFO] 向两个fluent-bit容器发送测试日志..."
|
||||
|
||||
# 发送日志到host01
|
||||
echo "[INFO] 发送日志到 host01..."
|
||||
./scripts/03_send_test_host01.sh
|
||||
|
||||
echo ""
|
||||
|
||||
# 发送日志到host02
|
||||
echo "[INFO] 发送日志到 host02..."
|
||||
./scripts/03_send_test_host02.sh
|
||||
|
||||
echo ""
|
||||
echo "[OK] 已完成向两个主机发送测试日志"
|
@ -1,10 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
cd "$(dirname "$0")/.."
|
||||
compose_cmd="docker compose"
|
||||
if ! $compose_cmd version >/dev/null 2>&1; then
|
||||
if command -v docker-compose >/dev/null 2>&1; then compose_cmd="docker-compose"; else
|
||||
echo "需要 Docker Compose,请安装后重试" >&2; exit 1; fi
|
||||
fi
|
||||
$compose_cmd -p logging-mvp restart fluent-bit
|
||||
echo "[OK] 已重启 fluent-bit(该镜像不支持 SIGHUP 热重载)"
|
@ -1,44 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
KB="${KB:-http://localhost:5601}"
|
||||
|
||||
# 等待 Kibana 完全启动
|
||||
wait_for_kibana() {
|
||||
echo "[i] 等待 Kibana 启动..."
|
||||
local max_attempts=60
|
||||
local attempt=1
|
||||
|
||||
while [ $attempt -le $max_attempts ]; do
|
||||
if curl -fs "$KB/api/status" >/dev/null 2>&1; then
|
||||
local status=$(curl -s "$KB/api/status" | jq -r '.status.overall.level // "unknown"')
|
||||
if [ "$status" = "available" ]; then
|
||||
echo "[OK] Kibana 已启动 (status: $status)"
|
||||
return 0
|
||||
else
|
||||
echo " 等待中... ($attempt/$max_attempts, status: $status)"
|
||||
fi
|
||||
else
|
||||
echo " 等待中... ($attempt/$max_attempts, 连接失败)"
|
||||
fi
|
||||
sleep 5
|
||||
((attempt++))
|
||||
done
|
||||
|
||||
echo "[ERROR] Kibana 启动超时"
|
||||
return 1
|
||||
}
|
||||
create_view() {
|
||||
local name="$1" pattern="$2" timefield="${3:-@timestamp}"
|
||||
echo "[i] 创建 Data View: $name ($pattern, time=$timefield)"
|
||||
curl -fsS -X POST "$KB/api/data_views/data_view" \
|
||||
-H 'kbn-xsrf: true' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d "{\"data_view\":{\"name\":\"$name\",\"title\":\"$pattern\",\"timeFieldName\":\"$timefield\"}}" \
|
||||
>/dev/null && echo " -> OK" || { echo " -> 失败(可能是没有匹配索引)"; return 1; }
|
||||
}
|
||||
# 等待 Kibana 启动
|
||||
wait_for_kibana || exit 1
|
||||
|
||||
create_view "train" "train-*" "@timestamp" || true
|
||||
create_view "infer" "infer-*" "@timestamp" || true
|
||||
echo "[DONE] 若提示失败,请先确保已产生 train-*/infer-* 索引,再重试本脚本。"
|
@ -1,8 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
ES="${ES:-http://localhost:9200}"
|
||||
for idx in train-* infer-*; do
|
||||
echo "[i] 将 $idx 副本数设置为 0"
|
||||
curl -fsS -X PUT "$ES/$idx/_settings" -H 'Content-Type: application/json' -d '{"index":{"number_of_replicas":0}}' || true
|
||||
done
|
||||
echo "[OK] 完成"
|
@ -1,33 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
cd "$(dirname "$0")/.."
|
||||
|
||||
echo "清理选项:"
|
||||
echo "1. 清理索引和缓冲区(保留持久化数据)"
|
||||
echo "2. 完全清理(包括持久化数据目录)"
|
||||
read -rp "请选择 (1/2): " choice
|
||||
|
||||
case $choice in
|
||||
1)
|
||||
read -rp "危险操作:删除 ES 索引 train-*, infer-*(以及 logstash-*),并清空 buffers。确认? (yes/NO) " ans
|
||||
if [[ "${ans:-NO}" != "yes" ]]; then echo "已取消"; exit 0; fi
|
||||
ES="${ES:-http://localhost:9200}"
|
||||
for idx in train-* infer-* logstash-*; do
|
||||
echo "[i] 删除索引 $idx"
|
||||
curl -fsS -X DELETE "$ES/$idx" || true
|
||||
done
|
||||
rm -rf ./private/fluent-bit/buffers/* || true
|
||||
echo "[OK] 索引和缓冲区清理完成"
|
||||
;;
|
||||
2)
|
||||
read -rp "危险操作:删除所有数据包括持久化存储!确认? (yes/NO) " ans
|
||||
if [[ "${ans:-NO}" != "yes" ]]; then echo "已取消"; exit 0; fi
|
||||
rm -rf ./private/fluent-bit/buffers/* ./private/es/data/* ./private/kibana/data/* || true
|
||||
rm -rf ./private/fluent-bit/logs/train/* ./private/fluent-bit/logs/infer/* || true
|
||||
echo "[OK] 完全清理完成(包括持久化数据)"
|
||||
;;
|
||||
*)
|
||||
echo "已取消"
|
||||
exit 0
|
||||
;;
|
||||
esac
|
@ -1,19 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
echo "[INFO] Starting Elasticsearch with private directory setup..."
|
||||
|
||||
# 创建数据目录
|
||||
mkdir -p /private/es/data
|
||||
|
||||
# 创建软链接到Elasticsearch预期的数据目录
|
||||
rm -rf /usr/share/elasticsearch/data 2>/dev/null || true
|
||||
ln -sf /private/es/data /usr/share/elasticsearch/data
|
||||
|
||||
# 设置正确的权限 (Elasticsearch使用UID 1000)
|
||||
chown -R 1000:1000 /private/es/data
|
||||
|
||||
echo "[INFO] Data directory linked: /usr/share/elasticsearch/data -> /private/es/data"
|
||||
|
||||
# 启动原始的Elasticsearch entrypoint
|
||||
exec /usr/local/bin/docker-entrypoint.sh elasticsearch
|
@ -1,19 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
echo "[INFO] Starting Kibana with private directory setup..."
|
||||
|
||||
# 创建数据目录
|
||||
mkdir -p /private/kibana/data
|
||||
|
||||
# 创建软链接到Kibana预期的数据目录
|
||||
rm -rf /usr/share/kibana/data 2>/dev/null || true
|
||||
ln -sf /private/kibana/data /usr/share/kibana/data
|
||||
|
||||
# 设置正确的权限 (Kibana使用UID 1000)
|
||||
chown -R 1000:1000 /private/kibana/data
|
||||
|
||||
echo "[INFO] Data directory linked: /usr/share/kibana/data -> /private/kibana/data"
|
||||
|
||||
# 启动原始的Kibana entrypoint
|
||||
exec /usr/local/bin/kibana-docker
|
20
src/log/scripts/build_images.sh
Executable file
20
src/log/scripts/build_images.sh
Executable file
@ -0,0 +1,20 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
cd "$root"
|
||||
|
||||
echo "[INFO] Building custom Docker images with supervisor support..."
|
||||
|
||||
# 构建 Elasticsearch 镜像
|
||||
echo "[INFO] Building Elasticsearch image..."
|
||||
docker build -t argus-elasticsearch:latest ./elasticsearch/build
|
||||
|
||||
# 构建 Kibana 镜像
|
||||
echo "[INFO] Building Kibana image..."
|
||||
docker build -t argus-kibana:latest ./kibana/build
|
||||
|
||||
echo "[OK] Custom images built successfully:"
|
||||
echo " - argus-elasticsearch:latest"
|
||||
echo " - argus-kibana:latest"
|
||||
|
@ -1,15 +1,17 @@
|
||||
version: "3.8"
|
||||
services:
|
||||
es:
|
||||
image: docker.elastic.co/elasticsearch/elasticsearch:8.13.4
|
||||
build:
|
||||
context: ../elasticsearch/build
|
||||
dockerfile: Dockerfile
|
||||
image: argus-elasticsearch:latest
|
||||
environment:
|
||||
- discovery.type=single-node
|
||||
- xpack.security.enabled=false
|
||||
- ES_JAVA_OPTS=-Xms512m -Xmx512m
|
||||
volumes:
|
||||
- ./private:/private
|
||||
- ./private/argus/log/elasticsearch:/private/argus/log/elasticsearch
|
||||
ports: ["9200:9200"]
|
||||
command: /private/es/scripts/start-es.sh
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -fs http://localhost:9200 >/dev/null || exit 1"]
|
||||
interval: 10s
|
||||
@ -17,13 +19,15 @@ services:
|
||||
retries: 30
|
||||
|
||||
kibana:
|
||||
image: docker.elastic.co/kibana/kibana:8.13.4
|
||||
build:
|
||||
context: ../kibana/build
|
||||
dockerfile: Dockerfile
|
||||
image: argus-kibana:latest
|
||||
environment:
|
||||
- ELASTICSEARCH_HOSTS=http://es:9200
|
||||
volumes:
|
||||
- ./private:/private
|
||||
- ./private/argus/log/kibana:/private/argus/log/kibana
|
||||
ports: ["5601:5601"]
|
||||
command: /private/kibana/scripts/start-kibana.sh
|
||||
depends_on:
|
||||
es:
|
||||
condition: service_healthy
|
||||
@ -37,8 +41,8 @@ services:
|
||||
- ES_HOST=es
|
||||
- ES_PORT=9200
|
||||
volumes:
|
||||
- ./start-fluent-bit.sh:/private/start-fluent-bit.sh:ro
|
||||
- ./fluent-bit-bundle.tar.gz:/private/fluent-bit-bundle.tar.gz:ro
|
||||
- ../fluent-bit/start-fluent-bit.sh:/private/start-fluent-bit.sh:ro
|
||||
- ../fluent-bit/fluent-bit-bundle.tar.gz:/private/fluent-bit-bundle.tar.gz:ro
|
||||
ports: ["2020:2020"]
|
||||
depends_on:
|
||||
es:
|
||||
@ -59,8 +63,8 @@ services:
|
||||
- ES_HOST=es
|
||||
- ES_PORT=9200
|
||||
volumes:
|
||||
- ./start-fluent-bit.sh:/private/start-fluent-bit.sh:ro
|
||||
- ./fluent-bit-bundle.tar.gz:/private/fluent-bit-bundle.tar.gz:ro
|
||||
- ../fluent-bit/start-fluent-bit.sh:/private/start-fluent-bit.sh:ro
|
||||
- ../fluent-bit/fluent-bit-bundle.tar.gz:/private/fluent-bit-bundle.tar.gz:ro
|
||||
ports: ["2021:2020"]
|
||||
depends_on:
|
||||
es:
|
27
src/log/tests/scripts/01_bootstrap.sh
Executable file
27
src/log/tests/scripts/01_bootstrap.sh
Executable file
@ -0,0 +1,27 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
root="$(cd "$(dirname "${BASH_SOURCE[0]}")/../" && pwd)"
|
||||
|
||||
# 创建新的private目录结构 (基于argus目录结构)
|
||||
echo "[INFO] Creating private directory structure for supervisor-based containers..."
|
||||
mkdir -p "$root/private/argus/log/elasticsearch"
|
||||
mkdir -p "$root/private/argus/log/kibana"
|
||||
|
||||
# 设置数据目录权限(ES 和 Kibana 容器都使用 UID 1000)
|
||||
echo "[INFO] Setting permissions for data directories..."
|
||||
sudo chown -R 1000:1000 "$root/private/argus/log/elasticsearch" 2>/dev/null || true
|
||||
sudo chown -R 1000:1000 "$root/private/argus/log/kibana" 2>/dev/null || true
|
||||
|
||||
echo "[INFO] Supervisor-based containers will manage their own scripts and configurations"
|
||||
|
||||
# 检查fluent-bit相关文件是否存在
|
||||
if [[ ! -f "$root/../fluent-bit/fluent-bit-bundle.tar.gz" ]]; then
|
||||
echo "[WARN] fluent-bit/fluent-bit-bundle.tar.gz 不存在,请确保已创建该文件"
|
||||
fi
|
||||
|
||||
if [[ ! -f "$root/../fluent-bit/start-fluent-bit.sh" ]]; then
|
||||
echo "[WARN] fluent-bit/start-fluent-bit.sh 不存在,请确保已创建该启动脚本"
|
||||
fi
|
||||
|
||||
echo "[OK] 初始化完成: private/argus/log/{elasticsearch,kibana}"
|
||||
echo "[INFO] Fluent-bit files should be in fluent-bit/ directory"
|
@ -5,7 +5,7 @@ set -euo pipefail
|
||||
container_name="logging-mvp-fluent-bit-host01-1"
|
||||
|
||||
# 检查容器是否存在并运行
|
||||
if ! docker ps --format "table {{.Names}}" | grep -q "$container_name"; then
|
||||
if ! docker ps | grep -q "$container_name"; then
|
||||
echo "[ERROR] Fluent Bit容器 $container_name 未运行"
|
||||
exit 1
|
||||
fi
|
@ -5,7 +5,7 @@ set -euo pipefail
|
||||
container_name="logging-mvp-fluent-bit-host02-1"
|
||||
|
||||
# 检查容器是否存在并运行
|
||||
if ! docker ps --format "table {{.Names}}" | grep -q "$container_name"; then
|
||||
if ! docker ps | grep -q "$container_name"; then
|
||||
echo "[ERROR] Fluent Bit容器 $container_name 未运行"
|
||||
exit 1
|
||||
fi
|
168
src/log/tests/scripts/e2e_test.sh
Executable file
168
src/log/tests/scripts/e2e_test.sh
Executable file
@ -0,0 +1,168 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
echo "======================================="
|
||||
echo "ARGUS Log System End-to-End Test"
|
||||
echo "======================================="
|
||||
echo ""
|
||||
|
||||
# 记录测试开始时间
|
||||
test_start_time=$(date +%s)
|
||||
|
||||
# 函数:获取ES中的日志计数
|
||||
get_log_count() {
|
||||
local train_count=$(curl -s "http://localhost:9200/train-*/_count" 2>/dev/null | grep -o '"count":[0-9]*' | cut -d':' -f2 || echo "0")
|
||||
local infer_count=$(curl -s "http://localhost:9200/infer-*/_count" 2>/dev/null | grep -o '"count":[0-9]*' | cut -d':' -f2 || echo "0")
|
||||
echo "$((train_count + infer_count))"
|
||||
}
|
||||
|
||||
# 函数:等待服务就绪
|
||||
wait_for_services() {
|
||||
echo "[INFO] Waiting for all services to be ready..."
|
||||
local max_attempts=60
|
||||
local attempt=1
|
||||
|
||||
while [ $attempt -le $max_attempts ]; do
|
||||
if curl -fs http://localhost:9200/_cluster/health >/dev/null 2>&1 && \
|
||||
curl -fs http://localhost:5601/api/status >/dev/null 2>&1 && \
|
||||
curl -fs http://localhost:2020/api/v2/metrics >/dev/null 2>&1 && \
|
||||
curl -fs http://localhost:2021/api/v2/metrics >/dev/null 2>&1; then
|
||||
echo "[OK] All services are ready!"
|
||||
return 0
|
||||
fi
|
||||
echo " Waiting for services... ($attempt/$max_attempts)"
|
||||
sleep 5
|
||||
((attempt++))
|
||||
done
|
||||
|
||||
echo "[ERROR] Services not ready after $max_attempts attempts"
|
||||
return 1
|
||||
}
|
||||
|
||||
# 函数:显示测试步骤
|
||||
show_step() {
|
||||
echo ""
|
||||
echo "🔄 Step $1: $2"
|
||||
echo "----------------------------------------"
|
||||
}
|
||||
|
||||
# 函数:验证步骤结果
|
||||
verify_step() {
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "✅ $1 - SUCCESS"
|
||||
else
|
||||
echo "❌ $1 - FAILED"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# 开始端到端测试
|
||||
show_step "1" "Bootstrap - Initialize environment"
|
||||
./scripts/01_bootstrap.sh
|
||||
verify_step "Bootstrap"
|
||||
|
||||
show_step "2" "Startup - Start all services"
|
||||
./scripts/02_up.sh
|
||||
verify_step "Service startup"
|
||||
|
||||
# 等待服务完全就绪
|
||||
wait_for_services || exit 1
|
||||
|
||||
# 记录发送测试数据前的日志计数
|
||||
initial_count=$(get_log_count)
|
||||
echo "[INFO] Initial log count: $initial_count"
|
||||
|
||||
show_step "3a" "Send test data - Host01"
|
||||
./scripts/03_send_test_host01.sh
|
||||
verify_step "Test data sending (host01)"
|
||||
|
||||
show_step "3b" "Send test data - Host02"
|
||||
./scripts/03_send_test_host02.sh
|
||||
verify_step "Test data sending (host02)"
|
||||
|
||||
# 等待数据被处理
|
||||
echo "[INFO] Waiting for data to be processed..."
|
||||
sleep 10
|
||||
|
||||
show_step "4" "Verify data - Query Elasticsearch"
|
||||
./scripts/04_query_es.sh
|
||||
verify_step "Data verification"
|
||||
|
||||
# 记录发送测试数据后的日志计数
|
||||
final_count=$(get_log_count)
|
||||
echo "[INFO] Final log count: $final_count"
|
||||
|
||||
# 验证日志数量是否增加
|
||||
if [ "$final_count" -gt "$initial_count" ]; then
|
||||
added_logs=$((final_count - initial_count))
|
||||
echo "✅ Log count verification - SUCCESS: Added $added_logs logs (from $initial_count to $final_count)"
|
||||
else
|
||||
echo "❌ Log count verification - FAILED: Expected count to increase, but got $initial_count -> $final_count"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 验证预期的最小日志数量(每个主机应该发送一些日志)
|
||||
expected_min_logs=4 # 至少应该有几条日志
|
||||
if [ "$final_count" -ge "$expected_min_logs" ]; then
|
||||
echo "✅ Minimum log threshold - SUCCESS: $final_count logs (>= $expected_min_logs expected)"
|
||||
else
|
||||
echo "❌ Minimum log threshold - FAILED: Only $final_count logs (>= $expected_min_logs expected)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 检查服务健康状态
|
||||
show_step "Health" "Check service health"
|
||||
echo "[INFO] Checking service health..."
|
||||
|
||||
# 检查 Elasticsearch 健康状态
|
||||
es_health=$(curl -s "http://localhost:9200/_cluster/health" | grep -o '"status":"[^"]*"' | cut -d'"' -f4)
|
||||
if [ "$es_health" = "green" ] || [ "$es_health" = "yellow" ]; then
|
||||
echo "✅ Elasticsearch health: $es_health"
|
||||
else
|
||||
echo "❌ Elasticsearch health: $es_health"
|
||||
fi
|
||||
|
||||
# 检查 Kibana 状态
|
||||
if curl -fs "http://localhost:5601/api/status" >/dev/null 2>&1; then
|
||||
kb_status="available"
|
||||
echo "✅ Kibana status: $kb_status"
|
||||
else
|
||||
kb_status="unavailable"
|
||||
echo "⚠️ Kibana status: $kb_status"
|
||||
fi
|
||||
|
||||
# 检查 Fluent-Bit 指标
|
||||
fb_host01_uptime=$(curl -s "http://localhost:2020/api/v2/metrics" | grep "fluentbit_uptime" | head -1 | grep -o "[0-9]\+$" || echo "0")
|
||||
fb_host02_uptime=$(curl -s "http://localhost:2021/api/v2/metrics" | grep "fluentbit_uptime" | head -1 | grep -o "[0-9]\+$" || echo "0")
|
||||
|
||||
if [ "$fb_host01_uptime" -gt 0 ] && [ "$fb_host02_uptime" -gt 0 ]; then
|
||||
echo "✅ Fluent-Bit services: host01 uptime=${fb_host01_uptime}s, host02 uptime=${fb_host02_uptime}s"
|
||||
else
|
||||
echo "⚠️ Fluent-Bit services: host01 uptime=${fb_host01_uptime}s, host02 uptime=${fb_host02_uptime}s"
|
||||
fi
|
||||
|
||||
verify_step "Service health check"
|
||||
|
||||
show_step "5" "Cleanup - Stop all services"
|
||||
./scripts/05_down.sh
|
||||
verify_step "Service cleanup"
|
||||
|
||||
# 计算总测试时间
|
||||
test_end_time=$(date +%s)
|
||||
total_time=$((test_end_time - test_start_time))
|
||||
|
||||
echo ""
|
||||
echo "======================================="
|
||||
echo "🎉 END-TO-END TEST COMPLETED SUCCESSFULLY!"
|
||||
echo "======================================="
|
||||
echo "📊 Test Summary:"
|
||||
echo " • Initial logs: $initial_count"
|
||||
echo " • Final logs: $final_count"
|
||||
echo " • Added logs: $added_logs"
|
||||
echo " • Total time: ${total_time}s"
|
||||
echo " • ES health: $es_health"
|
||||
echo " • Kibana status: $kb_status"
|
||||
echo " • All services started and stopped successfully"
|
||||
echo ""
|
||||
echo "✅ The ARGUS log system is working correctly!"
|
||||
echo ""
|
Loading…
x
Reference in New Issue
Block a user