diff --git a/src/log/.gitignore b/src/log/.gitignore new file mode 100644 index 0000000..7546a9c --- /dev/null +++ b/src/log/.gitignore @@ -0,0 +1,3 @@ + +private/ + diff --git a/src/log/elasticsearch/build/Dockerfile b/src/log/elasticsearch/build/Dockerfile new file mode 100644 index 0000000..54bf52e --- /dev/null +++ b/src/log/elasticsearch/build/Dockerfile @@ -0,0 +1,33 @@ +FROM docker.elastic.co/elasticsearch/elasticsearch:8.13.4 + +# 切换到 root 用户进行系统级安装 +USER root + +# 安装 supervisor +RUN apt-get update && \ + apt-get install -y supervisor && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# 创建 supervisor 日志目录 +RUN mkdir -p /var/log/supervisor + +# 复制 supervisor 配置文件 +COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf + +# 复制启动脚本 +COPY start-es-supervised.sh /usr/local/bin/start-es-supervised.sh +RUN chmod +x /usr/local/bin/start-es-supervised.sh + +# 创建数据目录并设置权限 +RUN mkdir -p /private/argus/log/elasticsearch && \ + chown -R elasticsearch:elasticsearch /private/argus/log/elasticsearch + +# 保持 root 用户,由 supervisor 管理用户切换 +USER root + +# 暴露端口 +EXPOSE 9200 9300 + +# 使用 supervisor 作为入口点 +CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"] \ No newline at end of file diff --git a/src/log/elasticsearch/build/start-es-supervised.sh b/src/log/elasticsearch/build/start-es-supervised.sh new file mode 100644 index 0000000..10973c7 --- /dev/null +++ b/src/log/elasticsearch/build/start-es-supervised.sh @@ -0,0 +1,26 @@ +#!/bin/bash +set -euo pipefail + +echo "[INFO] Starting Elasticsearch under supervisor..." + +# 创建数据目录(如果不存在) +mkdir -p /private/argus/log/elasticsearch + +# 创建软链接到Elasticsearch预期的数据目录 +if [ -L /usr/share/elasticsearch/data ]; then + rm /usr/share/elasticsearch/data +elif [ -d /usr/share/elasticsearch/data ]; then + rm -rf /usr/share/elasticsearch/data +fi + +ln -sf /private/argus/log/elasticsearch /usr/share/elasticsearch/data + +echo "[INFO] Data directory linked: /usr/share/elasticsearch/data -> /private/argus/log/elasticsearch" + +# 设置环境变量(ES配置通过docker-compose传递) +export ES_JAVA_OPTS="${ES_JAVA_OPTS:-"-Xms512m -Xmx512m"}" + +echo "[INFO] Starting Elasticsearch process..." + +# 启动原始的Elasticsearch entrypoint +exec /usr/local/bin/docker-entrypoint.sh elasticsearch \ No newline at end of file diff --git a/src/log/elasticsearch/build/supervisord.conf b/src/log/elasticsearch/build/supervisord.conf new file mode 100644 index 0000000..1575572 --- /dev/null +++ b/src/log/elasticsearch/build/supervisord.conf @@ -0,0 +1,27 @@ +[supervisord] +nodaemon=true +logfile=/var/log/supervisor/supervisord.log +pidfile=/var/run/supervisord.pid +user=root + +[program:elasticsearch] +command=/usr/local/bin/start-es-supervised.sh +user=elasticsearch +stdout_logfile=/var/log/supervisor/elasticsearch.log +stderr_logfile=/var/log/supervisor/elasticsearch_error.log +autorestart=true +startretries=3 +startsecs=30 +stopwaitsecs=30 +killasgroup=true +stopasgroup=true + +[unix_http_server] +file=/var/run/supervisor.sock +chmod=0700 + +[supervisorctl] +serverurl=unix:///var/run/supervisor.sock + +[rpcinterface:supervisor] +supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface \ No newline at end of file diff --git a/src/log/kibana/build/Dockerfile b/src/log/kibana/build/Dockerfile new file mode 100644 index 0000000..332433f --- /dev/null +++ b/src/log/kibana/build/Dockerfile @@ -0,0 +1,34 @@ +FROM docker.elastic.co/kibana/kibana:8.13.4 + +# 切换到 root 用户进行系统级安装 +USER root + +# 安装 supervisor +RUN apt-get update && \ + apt-get install -y supervisor && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# 创建 supervisor 日志目录 +RUN mkdir -p /var/log/supervisor + +# 复制 supervisor 配置文件 +COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf + +# 复制启动脚本 +COPY start-kibana-supervised.sh /usr/local/bin/start-kibana-supervised.sh +COPY kibana-post-start.sh /usr/local/bin/kibana-post-start.sh +RUN chmod +x /usr/local/bin/start-kibana-supervised.sh /usr/local/bin/kibana-post-start.sh + +# 创建数据目录并设置权限 +RUN mkdir -p /private/argus/log/kibana && \ + chown -R kibana:kibana /private/argus/log/kibana + +# 保持 root 用户,由 supervisor 管理用户切换 +USER root + +# 暴露端口 +EXPOSE 5601 + +# 使用 supervisor 作为入口点 +CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"] \ No newline at end of file diff --git a/src/log/kibana/build/kibana-post-start.sh b/src/log/kibana/build/kibana-post-start.sh new file mode 100644 index 0000000..d6df289 --- /dev/null +++ b/src/log/kibana/build/kibana-post-start.sh @@ -0,0 +1,146 @@ +#!/bin/bash +set -euo pipefail + +ES_HOST="${ELASTICSEARCH_HOSTS:-http://es:9200}" +KB_HOST="http://localhost:5601" + +echo "[INFO] Starting Kibana post-start configuration..." + +# 等待 Elasticsearch 可用 +wait_for_elasticsearch() { + echo "[INFO] Waiting for Elasticsearch..." + local max_attempts=60 + local attempt=1 + + while [ $attempt -le $max_attempts ]; do + if curl -fs "$ES_HOST/_cluster/health" >/dev/null 2>&1; then + echo "[OK] Elasticsearch is available" + return 0 + fi + echo " Waiting for ES... ($attempt/$max_attempts)" + sleep 5 + ((attempt++)) + done + + echo "[ERROR] Elasticsearch timeout" + return 1 +} + +# 等待 Kibana 可用 +wait_for_kibana() { + echo "[INFO] Waiting for Kibana..." + local max_attempts=120 + local attempt=1 + + while [ $attempt -le $max_attempts ]; do + if curl -fs "$KB_HOST/api/status" >/dev/null 2>&1; then + local status=$(curl -s "$KB_HOST/api/status" | grep -o '"level":"available"' || echo "") + if [ -n "$status" ]; then + echo "[OK] Kibana is available" + return 0 + fi + echo " Waiting for Kibana... ($attempt/$max_attempts, status: $status)" + else + echo " Waiting for Kibana... ($attempt/$max_attempts, connection failed)" + fi + sleep 5 + ((attempt++)) + done + + echo "[ERROR] Kibana timeout" + return 1 +} + +# 幂等设置索引副本数为0 +fix_replicas_idempotent() { + echo "[INFO] Checking and fixing index replicas..." + + # 获取所有 train-* 和 infer-* 索引 + local indices=$(curl -s "$ES_HOST/_cat/indices/train-*,infer-*?h=index" 2>/dev/null || echo "") + + if [ -z "$indices" ]; then + echo "[INFO] No train-*/infer-* indices found, skipping replica adjustment" + return 0 + fi + + for idx in $indices; do + # 检查当前副本数 + local current_replicas=$(curl -s "$ES_HOST/$idx/_settings" | grep -o '"number_of_replicas":"[^"]*"' | cut -d'"' -f4 || echo "") + + if [ "$current_replicas" != "0" ]; then + echo "[INFO] Setting replicas to 0 for index: $idx (current: $current_replicas)" + curl -fsS -X PUT "$ES_HOST/$idx/_settings" \ + -H 'Content-Type: application/json' \ + -d '{"index":{"number_of_replicas":0}}' >/dev/null || { + echo "[WARN] Failed to set replicas for $idx" + continue + } + echo "[OK] Updated replicas for $idx" + else + echo "[INFO] Index $idx already has 0 replicas, skipping" + fi + done +} + +# 幂等创建数据视图 +create_data_views_idempotent() { + echo "[INFO] Checking and creating data views..." + + # 检查是否存在匹配的索引 + local train_indices=$(curl -s "$ES_HOST/_cat/indices/train-*?h=index" 2>/dev/null | wc -l || echo "0") + local infer_indices=$(curl -s "$ES_HOST/_cat/indices/infer-*?h=index" 2>/dev/null | wc -l || echo "0") + + # 创建 train 数据视图 + if [ "$train_indices" -gt 0 ]; then + # 检查数据视图是否已存在 + local train_exists=$(curl -s "$KB_HOST/api/data_views/data_view" -H 'kbn-xsrf: true' 2>/dev/null | grep '"title":"train-\*"' | wc -l || echo "0") + + if [ "$train_exists" -eq 0 ]; then + echo "[INFO] Creating data view for train-* indices" + curl -fsS -X POST "$KB_HOST/api/data_views/data_view" \ + -H 'kbn-xsrf: true' \ + -H 'Content-Type: application/json' \ + -d '{"data_view":{"name":"train","title":"train-*","timeFieldName":"@timestamp"}}' \ + >/dev/null && echo "[OK] Created train data view" || echo "[WARN] Failed to create train data view" + else + echo "[INFO] Train data view already exists, skipping" + fi + else + echo "[INFO] No train-* indices found, skipping train data view creation" + fi + + # 创建 infer 数据视图 + if [ "$infer_indices" -gt 0 ]; then + # 检查数据视图是否已存在 + local infer_exists=$(curl -s "$KB_HOST/api/data_views/data_view" -H 'kbn-xsrf: true' 2>/dev/null | grep '"title":"infer-\*"' | wc -l || echo "0") + + if [ "$infer_exists" -eq 0 ]; then + echo "[INFO] Creating data view for infer-* indices" + curl -fsS -X POST "$KB_HOST/api/data_views/data_view" \ + -H 'kbn-xsrf: true' \ + -H 'Content-Type: application/json' \ + -d '{"data_view":{"name":"infer","title":"infer-*","timeFieldName":"@timestamp"}}' \ + >/dev/null && echo "[OK] Created infer data view" || echo "[WARN] Failed to create infer data view" + else + echo "[INFO] Infer data view already exists, skipping" + fi + else + echo "[INFO] No infer-* indices found, skipping infer data view creation" + fi +} + +# 主逻辑 +main() { + # 等待服务可用 + wait_for_elasticsearch || exit 1 + wait_for_kibana || exit 1 + + # 执行幂等配置 + fix_replicas_idempotent + create_data_views_idempotent + + echo "[INFO] Kibana post-start configuration completed" +} + +# 运行主逻辑 +main \ No newline at end of file diff --git a/src/log/kibana/build/start-kibana-supervised.sh b/src/log/kibana/build/start-kibana-supervised.sh new file mode 100644 index 0000000..b2b98fc --- /dev/null +++ b/src/log/kibana/build/start-kibana-supervised.sh @@ -0,0 +1,32 @@ +#!/bin/bash +set -euo pipefail + +echo "[INFO] Starting Kibana under supervisor..." + +# 创建数据目录(如果不存在) +mkdir -p /private/argus/log/kibana + +# 创建软链接到Kibana预期的数据目录 +if [ -L /usr/share/kibana/data ]; then + rm /usr/share/kibana/data +elif [ -d /usr/share/kibana/data ]; then + rm -rf /usr/share/kibana/data +fi + +ln -sf /private/argus/log/kibana /usr/share/kibana/data + +echo "[INFO] Data directory linked: /usr/share/kibana/data -> /private/argus/log/kibana" + +# 设置环境变量 +export ELASTICSEARCH_HOSTS="${ELASTICSEARCH_HOSTS:-"http://es:9200"}" + +echo "[INFO] Connecting to Elasticsearch at: $ELASTICSEARCH_HOSTS" + +# 启动后台配置任务 +echo "[INFO] Starting background post-start configuration..." +/usr/local/bin/kibana-post-start.sh & + +echo "[INFO] Starting Kibana process..." + +# 启动原始的Kibana entrypoint +exec /usr/local/bin/kibana-docker \ No newline at end of file diff --git a/src/log/kibana/build/supervisord.conf b/src/log/kibana/build/supervisord.conf new file mode 100644 index 0000000..c6244ad --- /dev/null +++ b/src/log/kibana/build/supervisord.conf @@ -0,0 +1,27 @@ +[supervisord] +nodaemon=true +logfile=/var/log/supervisor/supervisord.log +pidfile=/var/run/supervisord.pid +user=root + +[program:kibana] +command=/usr/local/bin/start-kibana-supervised.sh +user=kibana +stdout_logfile=/var/log/supervisor/kibana.log +stderr_logfile=/var/log/supervisor/kibana_error.log +autorestart=true +startretries=3 +startsecs=30 +stopwaitsecs=30 +killasgroup=true +stopasgroup=true + +[unix_http_server] +file=/var/run/supervisor.sock +chmod=0700 + +[supervisorctl] +serverurl=unix:///var/run/supervisor.sock + +[rpcinterface:supervisor] +supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface \ No newline at end of file diff --git a/src/log/misc/01_bootstrap.sh b/src/log/misc/01_bootstrap.sh deleted file mode 100755 index 8129f03..0000000 --- a/src/log/misc/01_bootstrap.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail -root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" - -# 创建private目录结构 (只需要ES和Kibana) -echo "[INFO] Creating private directory structure..." -mkdir -p "$root/private/es/data" -mkdir -p "$root/private/es/scripts" -mkdir -p "$root/private/kibana/data" -mkdir -p "$root/private/kibana/scripts" - -# 复制启动脚本 -echo "[INFO] Copying startup scripts..." -cp "$root/scripts/start-es.sh" "$root/private/es/scripts/" -cp "$root/scripts/start-kibana.sh" "$root/private/kibana/scripts/" - -# 设置执行权限 -chmod +x "$root/private/es/scripts/start-es.sh" -chmod +x "$root/private/kibana/scripts/start-kibana.sh" - -# 设置数据目录权限(ES 和 Kibana 容器都使用 UID 1000) -sudo chown -R 1000:1000 "$root/private/es/data" "$root/private/kibana/data" 2>/dev/null || true - -# 检查fluent-bit相关文件是否存在 -if [[ ! -f "$root/fluent-bit-bundle.tar.gz" ]]; then - echo "[INFO] Creating fluent-bit bundle..." - # 创建bundle(如果目录存在的话) - cd "$root" - if [[ -d "private/fluent-bit" ]]; then - cd private/fluent-bit && tar -czf ../../fluent-bit-bundle.tar.gz etc/ packages/ 2>/dev/null && cd ../.. - elif [[ -d "fluent-bit" && -d "packages" ]]; then - # 临时创建目录结构来打包 - mkdir -p temp-bundle/etc temp-bundle/packages - cp -r fluent-bit/* temp-bundle/etc/ - cp -r packages/* temp-bundle/packages/ - cd temp-bundle && tar -czf ../fluent-bit-bundle.tar.gz . && cd .. - rm -rf temp-bundle - else - echo "[WARN] 无法创建fluent-bit bundle,请确保fluent-bit配置和packages目录存在" - fi -fi - -if [[ ! -f "$root/start-fluent-bit.sh" ]]; then - echo "[WARN] start-fluent-bit.sh 不存在,请确保已创建该启动脚本" -fi - -echo "[OK] 初始化完成: private/{es,kibana}, fluent-bit-bundle.tar.gz, start-fluent-bit.sh" \ No newline at end of file diff --git a/src/log/misc/03_send_test.sh b/src/log/misc/03_send_test.sh deleted file mode 100755 index f971f32..0000000 --- a/src/log/misc/03_send_test.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -echo "[INFO] 向两个fluent-bit容器发送测试日志..." - -# 发送日志到host01 -echo "[INFO] 发送日志到 host01..." -./scripts/03_send_test_host01.sh - -echo "" - -# 发送日志到host02 -echo "[INFO] 发送日志到 host02..." -./scripts/03_send_test_host02.sh - -echo "" -echo "[OK] 已完成向两个主机发送测试日志" \ No newline at end of file diff --git a/src/log/misc/06_restart_fluentbit.sh b/src/log/misc/06_restart_fluentbit.sh deleted file mode 100755 index e0f1a58..0000000 --- a/src/log/misc/06_restart_fluentbit.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail -cd "$(dirname "$0")/.." -compose_cmd="docker compose" -if ! $compose_cmd version >/dev/null 2>&1; then - if command -v docker-compose >/dev/null 2>&1; then compose_cmd="docker-compose"; else - echo "需要 Docker Compose,请安装后重试" >&2; exit 1; fi -fi -$compose_cmd -p logging-mvp restart fluent-bit -echo "[OK] 已重启 fluent-bit(该镜像不支持 SIGHUP 热重载)" diff --git a/src/log/misc/07_create_data_views.sh b/src/log/misc/07_create_data_views.sh deleted file mode 100755 index 9accf13..0000000 --- a/src/log/misc/07_create_data_views.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail -KB="${KB:-http://localhost:5601}" - -# 等待 Kibana 完全启动 -wait_for_kibana() { - echo "[i] 等待 Kibana 启动..." - local max_attempts=60 - local attempt=1 - - while [ $attempt -le $max_attempts ]; do - if curl -fs "$KB/api/status" >/dev/null 2>&1; then - local status=$(curl -s "$KB/api/status" | jq -r '.status.overall.level // "unknown"') - if [ "$status" = "available" ]; then - echo "[OK] Kibana 已启动 (status: $status)" - return 0 - else - echo " 等待中... ($attempt/$max_attempts, status: $status)" - fi - else - echo " 等待中... ($attempt/$max_attempts, 连接失败)" - fi - sleep 5 - ((attempt++)) - done - - echo "[ERROR] Kibana 启动超时" - return 1 -} -create_view() { - local name="$1" pattern="$2" timefield="${3:-@timestamp}" - echo "[i] 创建 Data View: $name ($pattern, time=$timefield)" - curl -fsS -X POST "$KB/api/data_views/data_view" \ - -H 'kbn-xsrf: true' \ - -H 'Content-Type: application/json' \ - -d "{\"data_view\":{\"name\":\"$name\",\"title\":\"$pattern\",\"timeFieldName\":\"$timefield\"}}" \ - >/dev/null && echo " -> OK" || { echo " -> 失败(可能是没有匹配索引)"; return 1; } -} -# 等待 Kibana 启动 -wait_for_kibana || exit 1 - -create_view "train" "train-*" "@timestamp" || true -create_view "infer" "infer-*" "@timestamp" || true -echo "[DONE] 若提示失败,请先确保已产生 train-*/infer-* 索引,再重试本脚本。" diff --git a/src/log/misc/08_fix_replicas.sh b/src/log/misc/08_fix_replicas.sh deleted file mode 100755 index 54e175e..0000000 --- a/src/log/misc/08_fix_replicas.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail -ES="${ES:-http://localhost:9200}" -for idx in train-* infer-*; do - echo "[i] 将 $idx 副本数设置为 0" - curl -fsS -X PUT "$ES/$idx/_settings" -H 'Content-Type: application/json' -d '{"index":{"number_of_replicas":0}}' || true -done -echo "[OK] 完成" diff --git a/src/log/misc/99_clean.sh b/src/log/misc/99_clean.sh deleted file mode 100755 index 2cdd8be..0000000 --- a/src/log/misc/99_clean.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail -cd "$(dirname "$0")/.." - -echo "清理选项:" -echo "1. 清理索引和缓冲区(保留持久化数据)" -echo "2. 完全清理(包括持久化数据目录)" -read -rp "请选择 (1/2): " choice - -case $choice in - 1) - read -rp "危险操作:删除 ES 索引 train-*, infer-*(以及 logstash-*),并清空 buffers。确认? (yes/NO) " ans - if [[ "${ans:-NO}" != "yes" ]]; then echo "已取消"; exit 0; fi - ES="${ES:-http://localhost:9200}" - for idx in train-* infer-* logstash-*; do - echo "[i] 删除索引 $idx" - curl -fsS -X DELETE "$ES/$idx" || true - done - rm -rf ./private/fluent-bit/buffers/* || true - echo "[OK] 索引和缓冲区清理完成" - ;; - 2) - read -rp "危险操作:删除所有数据包括持久化存储!确认? (yes/NO) " ans - if [[ "${ans:-NO}" != "yes" ]]; then echo "已取消"; exit 0; fi - rm -rf ./private/fluent-bit/buffers/* ./private/es/data/* ./private/kibana/data/* || true - rm -rf ./private/fluent-bit/logs/train/* ./private/fluent-bit/logs/infer/* || true - echo "[OK] 完全清理完成(包括持久化数据)" - ;; - *) - echo "已取消" - exit 0 - ;; -esac diff --git a/src/log/misc/start-es.sh b/src/log/misc/start-es.sh deleted file mode 100644 index 18edb9f..0000000 --- a/src/log/misc/start-es.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash -set -euo pipefail - -echo "[INFO] Starting Elasticsearch with private directory setup..." - -# 创建数据目录 -mkdir -p /private/es/data - -# 创建软链接到Elasticsearch预期的数据目录 -rm -rf /usr/share/elasticsearch/data 2>/dev/null || true -ln -sf /private/es/data /usr/share/elasticsearch/data - -# 设置正确的权限 (Elasticsearch使用UID 1000) -chown -R 1000:1000 /private/es/data - -echo "[INFO] Data directory linked: /usr/share/elasticsearch/data -> /private/es/data" - -# 启动原始的Elasticsearch entrypoint -exec /usr/local/bin/docker-entrypoint.sh elasticsearch \ No newline at end of file diff --git a/src/log/misc/start-kibana.sh b/src/log/misc/start-kibana.sh deleted file mode 100644 index b6c0d07..0000000 --- a/src/log/misc/start-kibana.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash -set -euo pipefail - -echo "[INFO] Starting Kibana with private directory setup..." - -# 创建数据目录 -mkdir -p /private/kibana/data - -# 创建软链接到Kibana预期的数据目录 -rm -rf /usr/share/kibana/data 2>/dev/null || true -ln -sf /private/kibana/data /usr/share/kibana/data - -# 设置正确的权限 (Kibana使用UID 1000) -chown -R 1000:1000 /private/kibana/data - -echo "[INFO] Data directory linked: /usr/share/kibana/data -> /private/kibana/data" - -# 启动原始的Kibana entrypoint -exec /usr/local/bin/kibana-docker \ No newline at end of file diff --git a/src/log/scripts/build_images.sh b/src/log/scripts/build_images.sh new file mode 100755 index 0000000..6fe3332 --- /dev/null +++ b/src/log/scripts/build_images.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +set -euo pipefail + +root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +cd "$root" + +echo "[INFO] Building custom Docker images with supervisor support..." + +# 构建 Elasticsearch 镜像 +echo "[INFO] Building Elasticsearch image..." +docker build -t argus-elasticsearch:latest ./elasticsearch/build + +# 构建 Kibana 镜像 +echo "[INFO] Building Kibana image..." +docker build -t argus-kibana:latest ./kibana/build + +echo "[OK] Custom images built successfully:" +echo " - argus-elasticsearch:latest" +echo " - argus-kibana:latest" + diff --git a/src/log/docker-compose.yml b/src/log/tests/docker-compose.yml similarity index 68% rename from src/log/docker-compose.yml rename to src/log/tests/docker-compose.yml index 2237568..b44a6b3 100644 --- a/src/log/docker-compose.yml +++ b/src/log/tests/docker-compose.yml @@ -1,15 +1,17 @@ version: "3.8" services: es: - image: docker.elastic.co/elasticsearch/elasticsearch:8.13.4 + build: + context: ../elasticsearch/build + dockerfile: Dockerfile + image: argus-elasticsearch:latest environment: - discovery.type=single-node - xpack.security.enabled=false - ES_JAVA_OPTS=-Xms512m -Xmx512m volumes: - - ./private:/private + - ./private/argus/log/elasticsearch:/private/argus/log/elasticsearch ports: ["9200:9200"] - command: /private/es/scripts/start-es.sh healthcheck: test: ["CMD-SHELL", "curl -fs http://localhost:9200 >/dev/null || exit 1"] interval: 10s @@ -17,13 +19,15 @@ services: retries: 30 kibana: - image: docker.elastic.co/kibana/kibana:8.13.4 + build: + context: ../kibana/build + dockerfile: Dockerfile + image: argus-kibana:latest environment: - ELASTICSEARCH_HOSTS=http://es:9200 volumes: - - ./private:/private + - ./private/argus/log/kibana:/private/argus/log/kibana ports: ["5601:5601"] - command: /private/kibana/scripts/start-kibana.sh depends_on: es: condition: service_healthy @@ -37,8 +41,8 @@ services: - ES_HOST=es - ES_PORT=9200 volumes: - - ./start-fluent-bit.sh:/private/start-fluent-bit.sh:ro - - ./fluent-bit-bundle.tar.gz:/private/fluent-bit-bundle.tar.gz:ro + - ../fluent-bit/start-fluent-bit.sh:/private/start-fluent-bit.sh:ro + - ../fluent-bit/fluent-bit-bundle.tar.gz:/private/fluent-bit-bundle.tar.gz:ro ports: ["2020:2020"] depends_on: es: @@ -59,8 +63,8 @@ services: - ES_HOST=es - ES_PORT=9200 volumes: - - ./start-fluent-bit.sh:/private/start-fluent-bit.sh:ro - - ./fluent-bit-bundle.tar.gz:/private/fluent-bit-bundle.tar.gz:ro + - ../fluent-bit/start-fluent-bit.sh:/private/start-fluent-bit.sh:ro + - ../fluent-bit/fluent-bit-bundle.tar.gz:/private/fluent-bit-bundle.tar.gz:ro ports: ["2021:2020"] depends_on: es: diff --git a/src/log/tests/scripts/01_bootstrap.sh b/src/log/tests/scripts/01_bootstrap.sh new file mode 100755 index 0000000..6cc4caa --- /dev/null +++ b/src/log/tests/scripts/01_bootstrap.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash +set -euo pipefail +root="$(cd "$(dirname "${BASH_SOURCE[0]}")/../" && pwd)" + +# 创建新的private目录结构 (基于argus目录结构) +echo "[INFO] Creating private directory structure for supervisor-based containers..." +mkdir -p "$root/private/argus/log/elasticsearch" +mkdir -p "$root/private/argus/log/kibana" + +# 设置数据目录权限(ES 和 Kibana 容器都使用 UID 1000) +echo "[INFO] Setting permissions for data directories..." +sudo chown -R 1000:1000 "$root/private/argus/log/elasticsearch" 2>/dev/null || true +sudo chown -R 1000:1000 "$root/private/argus/log/kibana" 2>/dev/null || true + +echo "[INFO] Supervisor-based containers will manage their own scripts and configurations" + +# 检查fluent-bit相关文件是否存在 +if [[ ! -f "$root/../fluent-bit/fluent-bit-bundle.tar.gz" ]]; then + echo "[WARN] fluent-bit/fluent-bit-bundle.tar.gz 不存在,请确保已创建该文件" +fi + +if [[ ! -f "$root/../fluent-bit/start-fluent-bit.sh" ]]; then + echo "[WARN] fluent-bit/start-fluent-bit.sh 不存在,请确保已创建该启动脚本" +fi + +echo "[OK] 初始化完成: private/argus/log/{elasticsearch,kibana}" +echo "[INFO] Fluent-bit files should be in fluent-bit/ directory" diff --git a/src/log/misc/02_up.sh b/src/log/tests/scripts/02_up.sh similarity index 100% rename from src/log/misc/02_up.sh rename to src/log/tests/scripts/02_up.sh diff --git a/src/log/misc/03_send_test_host01.sh b/src/log/tests/scripts/03_send_test_host01.sh similarity index 94% rename from src/log/misc/03_send_test_host01.sh rename to src/log/tests/scripts/03_send_test_host01.sh index 41e3c28..8889b06 100755 --- a/src/log/misc/03_send_test_host01.sh +++ b/src/log/tests/scripts/03_send_test_host01.sh @@ -5,7 +5,7 @@ set -euo pipefail container_name="logging-mvp-fluent-bit-host01-1" # 检查容器是否存在并运行 -if ! docker ps --format "table {{.Names}}" | grep -q "$container_name"; then +if ! docker ps | grep -q "$container_name"; then echo "[ERROR] Fluent Bit容器 $container_name 未运行" exit 1 fi diff --git a/src/log/misc/03_send_test_host02.sh b/src/log/tests/scripts/03_send_test_host02.sh similarity index 94% rename from src/log/misc/03_send_test_host02.sh rename to src/log/tests/scripts/03_send_test_host02.sh index 5d0c4f6..039c0cc 100755 --- a/src/log/misc/03_send_test_host02.sh +++ b/src/log/tests/scripts/03_send_test_host02.sh @@ -5,7 +5,7 @@ set -euo pipefail container_name="logging-mvp-fluent-bit-host02-1" # 检查容器是否存在并运行 -if ! docker ps --format "table {{.Names}}" | grep -q "$container_name"; then +if ! docker ps | grep -q "$container_name"; then echo "[ERROR] Fluent Bit容器 $container_name 未运行" exit 1 fi diff --git a/src/log/misc/04_query_es.sh b/src/log/tests/scripts/04_query_es.sh similarity index 100% rename from src/log/misc/04_query_es.sh rename to src/log/tests/scripts/04_query_es.sh diff --git a/src/log/misc/05_down.sh b/src/log/tests/scripts/05_down.sh similarity index 100% rename from src/log/misc/05_down.sh rename to src/log/tests/scripts/05_down.sh diff --git a/src/log/tests/scripts/e2e_test.sh b/src/log/tests/scripts/e2e_test.sh new file mode 100755 index 0000000..ae45d1c --- /dev/null +++ b/src/log/tests/scripts/e2e_test.sh @@ -0,0 +1,168 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "=======================================" +echo "ARGUS Log System End-to-End Test" +echo "=======================================" +echo "" + +# 记录测试开始时间 +test_start_time=$(date +%s) + +# 函数:获取ES中的日志计数 +get_log_count() { + local train_count=$(curl -s "http://localhost:9200/train-*/_count" 2>/dev/null | grep -o '"count":[0-9]*' | cut -d':' -f2 || echo "0") + local infer_count=$(curl -s "http://localhost:9200/infer-*/_count" 2>/dev/null | grep -o '"count":[0-9]*' | cut -d':' -f2 || echo "0") + echo "$((train_count + infer_count))" +} + +# 函数:等待服务就绪 +wait_for_services() { + echo "[INFO] Waiting for all services to be ready..." + local max_attempts=60 + local attempt=1 + + while [ $attempt -le $max_attempts ]; do + if curl -fs http://localhost:9200/_cluster/health >/dev/null 2>&1 && \ + curl -fs http://localhost:5601/api/status >/dev/null 2>&1 && \ + curl -fs http://localhost:2020/api/v2/metrics >/dev/null 2>&1 && \ + curl -fs http://localhost:2021/api/v2/metrics >/dev/null 2>&1; then + echo "[OK] All services are ready!" + return 0 + fi + echo " Waiting for services... ($attempt/$max_attempts)" + sleep 5 + ((attempt++)) + done + + echo "[ERROR] Services not ready after $max_attempts attempts" + return 1 +} + +# 函数:显示测试步骤 +show_step() { + echo "" + echo "🔄 Step $1: $2" + echo "----------------------------------------" +} + +# 函数:验证步骤结果 +verify_step() { + if [ $? -eq 0 ]; then + echo "✅ $1 - SUCCESS" + else + echo "❌ $1 - FAILED" + exit 1 + fi +} + +# 开始端到端测试 +show_step "1" "Bootstrap - Initialize environment" +./scripts/01_bootstrap.sh +verify_step "Bootstrap" + +show_step "2" "Startup - Start all services" +./scripts/02_up.sh +verify_step "Service startup" + +# 等待服务完全就绪 +wait_for_services || exit 1 + +# 记录发送测试数据前的日志计数 +initial_count=$(get_log_count) +echo "[INFO] Initial log count: $initial_count" + +show_step "3a" "Send test data - Host01" +./scripts/03_send_test_host01.sh +verify_step "Test data sending (host01)" + +show_step "3b" "Send test data - Host02" +./scripts/03_send_test_host02.sh +verify_step "Test data sending (host02)" + +# 等待数据被处理 +echo "[INFO] Waiting for data to be processed..." +sleep 10 + +show_step "4" "Verify data - Query Elasticsearch" +./scripts/04_query_es.sh +verify_step "Data verification" + +# 记录发送测试数据后的日志计数 +final_count=$(get_log_count) +echo "[INFO] Final log count: $final_count" + +# 验证日志数量是否增加 +if [ "$final_count" -gt "$initial_count" ]; then + added_logs=$((final_count - initial_count)) + echo "✅ Log count verification - SUCCESS: Added $added_logs logs (from $initial_count to $final_count)" +else + echo "❌ Log count verification - FAILED: Expected count to increase, but got $initial_count -> $final_count" + exit 1 +fi + +# 验证预期的最小日志数量(每个主机应该发送一些日志) +expected_min_logs=4 # 至少应该有几条日志 +if [ "$final_count" -ge "$expected_min_logs" ]; then + echo "✅ Minimum log threshold - SUCCESS: $final_count logs (>= $expected_min_logs expected)" +else + echo "❌ Minimum log threshold - FAILED: Only $final_count logs (>= $expected_min_logs expected)" + exit 1 +fi + +# 检查服务健康状态 +show_step "Health" "Check service health" +echo "[INFO] Checking service health..." + +# 检查 Elasticsearch 健康状态 +es_health=$(curl -s "http://localhost:9200/_cluster/health" | grep -o '"status":"[^"]*"' | cut -d'"' -f4) +if [ "$es_health" = "green" ] || [ "$es_health" = "yellow" ]; then + echo "✅ Elasticsearch health: $es_health" +else + echo "❌ Elasticsearch health: $es_health" +fi + +# 检查 Kibana 状态 +if curl -fs "http://localhost:5601/api/status" >/dev/null 2>&1; then + kb_status="available" + echo "✅ Kibana status: $kb_status" +else + kb_status="unavailable" + echo "⚠️ Kibana status: $kb_status" +fi + +# 检查 Fluent-Bit 指标 +fb_host01_uptime=$(curl -s "http://localhost:2020/api/v2/metrics" | grep "fluentbit_uptime" | head -1 | grep -o "[0-9]\+$" || echo "0") +fb_host02_uptime=$(curl -s "http://localhost:2021/api/v2/metrics" | grep "fluentbit_uptime" | head -1 | grep -o "[0-9]\+$" || echo "0") + +if [ "$fb_host01_uptime" -gt 0 ] && [ "$fb_host02_uptime" -gt 0 ]; then + echo "✅ Fluent-Bit services: host01 uptime=${fb_host01_uptime}s, host02 uptime=${fb_host02_uptime}s" +else + echo "⚠️ Fluent-Bit services: host01 uptime=${fb_host01_uptime}s, host02 uptime=${fb_host02_uptime}s" +fi + +verify_step "Service health check" + +show_step "5" "Cleanup - Stop all services" +./scripts/05_down.sh +verify_step "Service cleanup" + +# 计算总测试时间 +test_end_time=$(date +%s) +total_time=$((test_end_time - test_start_time)) + +echo "" +echo "=======================================" +echo "🎉 END-TO-END TEST COMPLETED SUCCESSFULLY!" +echo "=======================================" +echo "📊 Test Summary:" +echo " • Initial logs: $initial_count" +echo " • Final logs: $final_count" +echo " • Added logs: $added_logs" +echo " • Total time: ${total_time}s" +echo " • ES health: $es_health" +echo " • Kibana status: $kb_status" +echo " • All services started and stopped successfully" +echo "" +echo "✅ The ARGUS log system is working correctly!" +echo "" \ No newline at end of file