Compare commits
5 Commits
af90f89fbf
...
68b265624c
Author | SHA1 | Date | |
---|---|---|---|
|
68b265624c | ||
|
d9d937f5d6 | ||
|
095e8ee32d | ||
|
a6c60b2edd | ||
8a38d3d0b2 |
138
build/build_images.sh
Executable file
138
build/build_images.sh
Executable file
@ -0,0 +1,138 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# 帮助信息
|
||||||
|
show_help() {
|
||||||
|
cat << EOF
|
||||||
|
ARGUS Unified Build System - Image Build Tool
|
||||||
|
|
||||||
|
Usage: $0 [OPTIONS]
|
||||||
|
|
||||||
|
Options:
|
||||||
|
--intranet Use intranet mirror for Ubuntu 22.04 packages
|
||||||
|
-h, --help Show this help message
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
$0 # Build with default sources
|
||||||
|
$0 --intranet # Build with intranet mirror
|
||||||
|
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
# 解析命令行参数
|
||||||
|
use_intranet=false
|
||||||
|
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case $1 in
|
||||||
|
--intranet)
|
||||||
|
use_intranet=true
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
-h|--help)
|
||||||
|
show_help
|
||||||
|
exit 0
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Unknown option: $1"
|
||||||
|
show_help
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
# 获取项目根目录
|
||||||
|
root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||||
|
cd "$root"
|
||||||
|
|
||||||
|
echo "======================================="
|
||||||
|
echo "ARGUS Unified Build System"
|
||||||
|
echo "======================================="
|
||||||
|
|
||||||
|
if [[ "$use_intranet" == true ]]; then
|
||||||
|
echo "🌐 Mode: Intranet (Using internal mirror: 10.68.64.1)"
|
||||||
|
build_args="--build-arg USE_INTRANET=true"
|
||||||
|
else
|
||||||
|
echo "🌐 Mode: Public (Using default package sources)"
|
||||||
|
build_args=""
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "📁 Build context: $root"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# 构建镜像的函数
|
||||||
|
build_image() {
|
||||||
|
local image_name=$1
|
||||||
|
local dockerfile_path=$2
|
||||||
|
local tag=$3
|
||||||
|
|
||||||
|
echo "🔄 Building $image_name image..."
|
||||||
|
echo " Dockerfile: $dockerfile_path"
|
||||||
|
echo " Tag: $tag"
|
||||||
|
|
||||||
|
if docker build $build_args -f "$dockerfile_path" -t "$tag" .; then
|
||||||
|
echo "✅ $image_name image built successfully"
|
||||||
|
return 0
|
||||||
|
else
|
||||||
|
echo "❌ Failed to build $image_name image"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# 构建所有镜像
|
||||||
|
images_built=()
|
||||||
|
build_failed=false
|
||||||
|
|
||||||
|
# 构建 Elasticsearch 镜像
|
||||||
|
if build_image "Elasticsearch" "src/log/elasticsearch/build/Dockerfile" "argus-elasticsearch:latest"; then
|
||||||
|
images_built+=("argus-elasticsearch:latest")
|
||||||
|
else
|
||||||
|
build_failed=true
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# 构建 Kibana 镜像
|
||||||
|
if build_image "Kibana" "src/log/kibana/build/Dockerfile" "argus-kibana:latest"; then
|
||||||
|
images_built+=("argus-kibana:latest")
|
||||||
|
else
|
||||||
|
build_failed=true
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# 构建 BIND9 镜像
|
||||||
|
if build_image "BIND9" "src/bind/build/Dockerfile" "argus-bind9:latest"; then
|
||||||
|
images_built+=("argus-bind9:latest")
|
||||||
|
else
|
||||||
|
build_failed=true
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "======================================="
|
||||||
|
echo "📦 Build Summary"
|
||||||
|
echo "======================================="
|
||||||
|
|
||||||
|
if [[ ${#images_built[@]} -gt 0 ]]; then
|
||||||
|
echo "✅ Successfully built images:"
|
||||||
|
for image in "${images_built[@]}"; do
|
||||||
|
echo " • $image"
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ "$build_failed" == true ]]; then
|
||||||
|
echo ""
|
||||||
|
echo "❌ Some images failed to build. Please check the errors above."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ "$use_intranet" == true ]]; then
|
||||||
|
echo ""
|
||||||
|
echo "🌐 Built with intranet mirror configuration"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "🚀 Next steps:"
|
||||||
|
echo " cd src/log && ./scripts/save_images.sh # Export log images"
|
||||||
|
echo " cd src/bind && ./scripts/save_images.sh # Export bind images"
|
||||||
|
echo " cd src/log/tests && ./scripts/02_up.sh # Start log services"
|
||||||
|
echo ""
|
222
build/save_images.sh
Executable file
222
build/save_images.sh
Executable file
@ -0,0 +1,222 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# 帮助信息
|
||||||
|
show_help() {
|
||||||
|
cat << EOF
|
||||||
|
ARGUS Unified Build System - Image Export Tool
|
||||||
|
|
||||||
|
Usage: $0 [OPTIONS]
|
||||||
|
|
||||||
|
Options:
|
||||||
|
--compress Compress exported images with gzip
|
||||||
|
-h, --help Show this help message
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
$0 # Export all images without compression
|
||||||
|
$0 --compress # Export all images with gzip compression
|
||||||
|
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
# 解析命令行参数
|
||||||
|
use_compression=false
|
||||||
|
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case $1 in
|
||||||
|
--compress)
|
||||||
|
use_compression=true
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
-h|--help)
|
||||||
|
show_help
|
||||||
|
exit 0
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Unknown option: $1"
|
||||||
|
show_help
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
# 获取项目根目录
|
||||||
|
root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||||
|
cd "$root"
|
||||||
|
|
||||||
|
# 创建镜像输出目录
|
||||||
|
images_dir="$root/images"
|
||||||
|
mkdir -p "$images_dir"
|
||||||
|
|
||||||
|
echo "======================================="
|
||||||
|
echo "ARGUS Unified Build System - Image Export"
|
||||||
|
echo "======================================="
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
if [[ "$use_compression" == true ]]; then
|
||||||
|
echo "🗜️ Mode: With gzip compression"
|
||||||
|
else
|
||||||
|
echo "📦 Mode: No compression"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "📁 Output directory: $images_dir"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# 定义镜像列表
|
||||||
|
declare -A images=(
|
||||||
|
["argus-elasticsearch:latest"]="argus-elasticsearch-latest.tar"
|
||||||
|
["argus-kibana:latest"]="argus-kibana-latest.tar"
|
||||||
|
["argus-bind9:latest"]="argus-bind9-latest.tar"
|
||||||
|
)
|
||||||
|
|
||||||
|
# 函数:检查镜像是否存在
|
||||||
|
check_image() {
|
||||||
|
local image_name="$1"
|
||||||
|
if docker images --format "{{.Repository}}:{{.Tag}}" | grep -q "^$image_name$"; then
|
||||||
|
echo "✅ Image found: $image_name"
|
||||||
|
return 0
|
||||||
|
else
|
||||||
|
echo "❌ Image not found: $image_name"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# 函数:显示镜像信息
|
||||||
|
show_image_info() {
|
||||||
|
local image_name="$1"
|
||||||
|
echo "📋 Image info for $image_name:"
|
||||||
|
docker images "$image_name" --format " Size: {{.Size}}, Created: {{.CreatedSince}}, ID: {{.ID}}"
|
||||||
|
}
|
||||||
|
|
||||||
|
# 函数:保存镜像
|
||||||
|
save_image() {
|
||||||
|
local image_name="$1"
|
||||||
|
local output_file="$2"
|
||||||
|
local output_path="$images_dir/$output_file"
|
||||||
|
|
||||||
|
echo "🔄 Saving $image_name to $output_file..."
|
||||||
|
|
||||||
|
# 删除旧的镜像文件(如果存在)
|
||||||
|
if [[ -f "$output_path" ]]; then
|
||||||
|
echo " Removing existing file: $output_file"
|
||||||
|
rm "$output_path"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ "$use_compression" == true && -f "$output_path.gz" ]]; then
|
||||||
|
echo " Removing existing compressed file: $output_file.gz"
|
||||||
|
rm "$output_path.gz"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 保存镜像
|
||||||
|
docker save "$image_name" -o "$output_path"
|
||||||
|
|
||||||
|
if [[ "$use_compression" == true ]]; then
|
||||||
|
echo " Compressing with gzip..."
|
||||||
|
gzip "$output_path"
|
||||||
|
output_path="$output_path.gz"
|
||||||
|
output_file="$output_file.gz"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 检查文件大小
|
||||||
|
local file_size=$(du -h "$output_path" | cut -f1)
|
||||||
|
echo "✅ Saved successfully: $output_file ($file_size)"
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "🔍 Checking for ARGUS images..."
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# 检查所有镜像
|
||||||
|
available_images=()
|
||||||
|
missing_images=()
|
||||||
|
|
||||||
|
for image_name in "${!images[@]}"; do
|
||||||
|
if check_image "$image_name"; then
|
||||||
|
show_image_info "$image_name"
|
||||||
|
available_images+=("$image_name")
|
||||||
|
else
|
||||||
|
missing_images+=("$image_name")
|
||||||
|
fi
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
# 如果没有镜像存在,提示构建
|
||||||
|
if [[ ${#available_images[@]} -eq 0 ]]; then
|
||||||
|
echo "❌ No ARGUS images found to export."
|
||||||
|
echo ""
|
||||||
|
echo "🔧 Please build the images first with:"
|
||||||
|
echo " ./build/build_images.sh"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 显示缺失的镜像
|
||||||
|
if [[ ${#missing_images[@]} -gt 0 ]]; then
|
||||||
|
echo "⚠️ Missing images (will be skipped):"
|
||||||
|
for image_name in "${missing_images[@]}"; do
|
||||||
|
echo " • $image_name"
|
||||||
|
done
|
||||||
|
echo ""
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "💾 Starting image export process..."
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# 保存所有可用的镜像
|
||||||
|
exported_files=()
|
||||||
|
for image_name in "${available_images[@]}"; do
|
||||||
|
output_file="${images[$image_name]}"
|
||||||
|
save_image "$image_name" "$output_file"
|
||||||
|
|
||||||
|
if [[ "$use_compression" == true ]]; then
|
||||||
|
exported_files+=("$output_file.gz")
|
||||||
|
else
|
||||||
|
exported_files+=("$output_file")
|
||||||
|
fi
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "======================================="
|
||||||
|
echo "📦 Export Summary"
|
||||||
|
echo "======================================="
|
||||||
|
|
||||||
|
# 显示导出的文件
|
||||||
|
echo "📁 Exported files in $images_dir:"
|
||||||
|
total_size=0
|
||||||
|
for file in "${exported_files[@]}"; do
|
||||||
|
full_path="$images_dir/$file"
|
||||||
|
if [[ -f "$full_path" ]]; then
|
||||||
|
size=$(du -h "$full_path" | cut -f1)
|
||||||
|
size_bytes=$(du -b "$full_path" | cut -f1)
|
||||||
|
total_size=$((total_size + size_bytes))
|
||||||
|
echo " ✅ $file ($size)"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
# 显示总大小
|
||||||
|
if [[ $total_size -gt 0 ]]; then
|
||||||
|
total_size_human=$(numfmt --to=iec --suffix=B $total_size)
|
||||||
|
echo ""
|
||||||
|
echo "📊 Total size: $total_size_human"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "🚀 Usage instructions:"
|
||||||
|
echo " To load these images on another system:"
|
||||||
|
|
||||||
|
if [[ "$use_compression" == true ]]; then
|
||||||
|
for file in "${exported_files[@]}"; do
|
||||||
|
if [[ -f "$images_dir/$file" ]]; then
|
||||||
|
base_name="${file%.gz}"
|
||||||
|
echo " gunzip $file && docker load -i $base_name"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
else
|
||||||
|
for file in "${exported_files[@]}"; do
|
||||||
|
if [[ -f "$images_dir/$file" ]]; then
|
||||||
|
echo " docker load -i $file"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "✅ Image export completed successfully!"
|
||||||
|
echo ""
|
2
src/bind/.gitignore
vendored
Normal file
2
src/bind/.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
|
||||||
|
images/
|
66
src/bind/build/Dockerfile
Normal file
66
src/bind/build/Dockerfile
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
FROM ubuntu:22.04
|
||||||
|
|
||||||
|
# Set timezone and avoid interactive prompts
|
||||||
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
|
ENV TZ=Asia/Shanghai
|
||||||
|
|
||||||
|
# 设置构建参数
|
||||||
|
ARG USE_INTRANET=false
|
||||||
|
|
||||||
|
# 配置内网 apt 源 (如果指定了内网选项)
|
||||||
|
RUN if [ "$USE_INTRANET" = "true" ]; then \
|
||||||
|
echo "Configuring intranet apt sources..." && \
|
||||||
|
cp /etc/apt/sources.list /etc/apt/sources.list.bak && \
|
||||||
|
echo "deb [trusted=yes] http://10.68.64.1/ubuntu2204/ jammy main" > /etc/apt/sources.list && \
|
||||||
|
echo 'Acquire::https::Verify-Peer "false";' > /etc/apt/apt.conf.d/99disable-ssl-check && \
|
||||||
|
echo 'Acquire::https::Verify-Host "false";' >> /etc/apt/apt.conf.d/99disable-ssl-check; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Update package list and install required packages
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y \
|
||||||
|
bind9 \
|
||||||
|
bind9utils \
|
||||||
|
bind9-doc \
|
||||||
|
supervisor \
|
||||||
|
net-tools \
|
||||||
|
inetutils-ping \
|
||||||
|
vim \
|
||||||
|
&& apt-get clean \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# 配置部署时使用的apt源
|
||||||
|
RUN if [ "$USE_INTRANET" = "true" ]; then \
|
||||||
|
echo "deb [trusted=yes] https://10.92.132.52/mirrors/ubuntu2204/ jammy main" > /etc/apt/sources.list; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Create supervisor configuration directory
|
||||||
|
RUN mkdir -p /etc/supervisor/conf.d
|
||||||
|
|
||||||
|
# Copy supervisor configuration
|
||||||
|
COPY src/bind/build/supervisord.conf /etc/supervisor/conf.d/supervisord.conf
|
||||||
|
|
||||||
|
# Copy BIND9 configuration files
|
||||||
|
COPY src/bind/build/named.conf.local /etc/bind/named.conf.local
|
||||||
|
COPY src/bind/build/db.argus.com /etc/bind/db.argus.com
|
||||||
|
|
||||||
|
# Copy startup and reload scripts
|
||||||
|
COPY src/bind/build/startup.sh /usr/local/bin/startup.sh
|
||||||
|
COPY src/bind/build/reload-bind9.sh /usr/local/bin/reload-bind9.sh
|
||||||
|
COPY src/bind/build/argus_dns_sync.sh /usr/local/bin/argus_dns_sync.sh
|
||||||
|
COPY src/bind/build/update-dns.sh /usr/local/bin/update-dns.sh
|
||||||
|
|
||||||
|
# Make scripts executable
|
||||||
|
RUN chmod +x /usr/local/bin/startup.sh /usr/local/bin/reload-bind9.sh /usr/local/bin/argus_dns_sync.sh /usr/local/bin/update-dns.sh
|
||||||
|
|
||||||
|
# Set proper ownership for BIND9 files
|
||||||
|
RUN chown bind:bind /etc/bind/named.conf.local /etc/bind/db.argus.com
|
||||||
|
|
||||||
|
# Expose DNS port
|
||||||
|
EXPOSE 53/tcp 53/udp
|
||||||
|
|
||||||
|
# Use root user as requested
|
||||||
|
USER root
|
||||||
|
|
||||||
|
# Start with startup script
|
||||||
|
CMD ["/usr/local/bin/startup.sh"]
|
100
src/bind/build/argus_dns_sync.sh
Normal file
100
src/bind/build/argus_dns_sync.sh
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
WATCH_DIR="/private/argus/etc"
|
||||||
|
ZONE_DB="/private/argus/bind/db.argus.com"
|
||||||
|
LOCKFILE="/var/lock/argus_dns_sync.lock"
|
||||||
|
BACKUP_DIR="/private/argus/bind/.backup"
|
||||||
|
SLEEP_SECONDS=10
|
||||||
|
RELOAD_SCRIPT="/usr/local/bin/reload-bind9.sh" # 这里放你已有脚本的路径
|
||||||
|
|
||||||
|
mkdir -p "$(dirname "$LOCKFILE")" "$BACKUP_DIR"
|
||||||
|
|
||||||
|
is_ipv4() {
|
||||||
|
local ip="$1"
|
||||||
|
[[ "$ip" =~ ^([0-9]{1,3}\.){3}[0-9]{1,3}$ ]] || return 1
|
||||||
|
IFS='.' read -r a b c d <<<"$ip"
|
||||||
|
for n in "$a" "$b" "$c" "$d"; do
|
||||||
|
(( n >= 0 && n <= 255 )) || return 1
|
||||||
|
done
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
get_current_ip() {
|
||||||
|
local name="$1"
|
||||||
|
sed -n -E "s/^${name}[[:space:]]+IN[[:space:]]+A[[:space:]]+([0-9.]+)[[:space:]]*$/\1/p" "$ZONE_DB" | head -n1
|
||||||
|
}
|
||||||
|
|
||||||
|
upsert_record() {
|
||||||
|
local name="$1"
|
||||||
|
local new_ip="$2"
|
||||||
|
local ts
|
||||||
|
ts="$(date +%Y%m%d-%H%M%S)"
|
||||||
|
local changed=0
|
||||||
|
|
||||||
|
cp -a "$ZONE_DB" "$BACKUP_DIR/db.argus.com.$ts.bak"
|
||||||
|
|
||||||
|
local cur_ip
|
||||||
|
cur_ip="$(get_current_ip "$name" || true)"
|
||||||
|
|
||||||
|
if [[ -z "$cur_ip" ]]; then
|
||||||
|
# Ensure the file ends with a newline before adding new record
|
||||||
|
if [[ -s "$ZONE_DB" ]] && [[ $(tail -c1 "$ZONE_DB" | wc -l) -eq 0 ]]; then
|
||||||
|
echo "" >> "$ZONE_DB"
|
||||||
|
fi
|
||||||
|
printf "%-20s IN A %s\n" "$name" "$new_ip" >> "$ZONE_DB"
|
||||||
|
echo "[ADD] ${name} -> ${new_ip}"
|
||||||
|
changed=1
|
||||||
|
elif [[ "$cur_ip" != "$new_ip" ]]; then
|
||||||
|
awk -v n="$name" -v ip="$new_ip" '
|
||||||
|
{
|
||||||
|
if ($1==n && $2=="IN" && $3=="A") {
|
||||||
|
printf "%-20s IN A %s\n", n, ip
|
||||||
|
} else {
|
||||||
|
print
|
||||||
|
}
|
||||||
|
}
|
||||||
|
' "$ZONE_DB" > "${ZONE_DB}.tmp" && mv "${ZONE_DB}.tmp" "$ZONE_DB"
|
||||||
|
echo "[UPDATE] ${name}: ${cur_ip} -> ${new_ip}"
|
||||||
|
changed=1
|
||||||
|
else
|
||||||
|
echo "[SKIP] ${name} unchanged (${new_ip})"
|
||||||
|
fi
|
||||||
|
|
||||||
|
return $changed
|
||||||
|
}
|
||||||
|
|
||||||
|
while true; do
|
||||||
|
exec 9>"$LOCKFILE"
|
||||||
|
if flock -n 9; then
|
||||||
|
shopt -s nullglob
|
||||||
|
NEED_RELOAD=0
|
||||||
|
|
||||||
|
for f in "$WATCH_DIR"/*.argus.com; do
|
||||||
|
base="$(basename "$f")"
|
||||||
|
name="${base%.argus.com}"
|
||||||
|
ip="$(grep -Eo '([0-9]{1,3}\.){3}[0-9]{1,3}' "$f" | tail -n1 || true)"
|
||||||
|
|
||||||
|
if [[ -z "$ip" ]] || ! is_ipv4 "$ip"; then
|
||||||
|
echo "[WARN] $f 未找到有效 IPv4,跳过"
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
if upsert_record "$name" "$ip"; then
|
||||||
|
NEED_RELOAD=1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
if [[ $NEED_RELOAD -eq 1 ]]; then
|
||||||
|
echo "[INFO] 检测到 db.argus.com 变更,执行 reload-bind9.sh"
|
||||||
|
bash "$RELOAD_SCRIPT"
|
||||||
|
fi
|
||||||
|
|
||||||
|
flock -u 9
|
||||||
|
else
|
||||||
|
echo "[INFO] 已有同步任务在运行,跳过本轮"
|
||||||
|
fi
|
||||||
|
|
||||||
|
sleep "$SLEEP_SECONDS"
|
||||||
|
done
|
||||||
|
|
16
src/bind/build/db.argus.com
Normal file
16
src/bind/build/db.argus.com
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
$TTL 604800
|
||||||
|
@ IN SOA ns1.argus.com. admin.argus.com. (
|
||||||
|
2 ; Serial
|
||||||
|
604800 ; Refresh
|
||||||
|
86400 ; Retry
|
||||||
|
2419200 ; Expire
|
||||||
|
604800 ) ; Negative Cache TTL
|
||||||
|
|
||||||
|
; 定义 DNS 服务器
|
||||||
|
@ IN NS ns1.argus.com.
|
||||||
|
|
||||||
|
; 定义 ns1 主机
|
||||||
|
ns1 IN A 127.0.0.1
|
||||||
|
|
||||||
|
; 定义 web 指向 12.4.5.6
|
||||||
|
web IN A 12.4.5.6
|
68
src/bind/build/dns-monitor.sh
Normal file
68
src/bind/build/dns-monitor.sh
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# DNS监控脚本 - 每10秒检查dns.conf是否有变化
|
||||||
|
# 如果有变化则执行update-dns.sh脚本
|
||||||
|
|
||||||
|
DNS_CONF="/private/argus/etc/dns.conf"
|
||||||
|
DNS_BACKUP="/tmp/dns.conf.backup"
|
||||||
|
UPDATE_SCRIPT="/private/argus/etc/update-dns.sh"
|
||||||
|
LOG_FILE="/var/log/supervisor/dns-monitor.log"
|
||||||
|
|
||||||
|
# 确保日志文件存在
|
||||||
|
touch "$LOG_FILE"
|
||||||
|
|
||||||
|
log_message() {
|
||||||
|
echo "$(date '+%Y-%m-%d %H:%M:%S') [DNS-Monitor] $1" >> "$LOG_FILE"
|
||||||
|
}
|
||||||
|
|
||||||
|
log_message "DNS监控脚本启动"
|
||||||
|
|
||||||
|
while true; do
|
||||||
|
if [ -f "$DNS_CONF" ]; then
|
||||||
|
if [ -f "$DNS_BACKUP" ]; then
|
||||||
|
# 比较文件内容
|
||||||
|
if ! cmp -s "$DNS_CONF" "$DNS_BACKUP"; then
|
||||||
|
log_message "检测到DNS配置变化"
|
||||||
|
|
||||||
|
# 更新备份文件
|
||||||
|
cp "$DNS_CONF" "$DNS_BACKUP"
|
||||||
|
|
||||||
|
# 执行更新脚本
|
||||||
|
if [ -x "$UPDATE_SCRIPT" ]; then
|
||||||
|
log_message "执行DNS更新脚本: $UPDATE_SCRIPT"
|
||||||
|
"$UPDATE_SCRIPT" >> "$LOG_FILE" 2>&1
|
||||||
|
if [ $? -eq 0 ]; then
|
||||||
|
log_message "DNS更新脚本执行成功"
|
||||||
|
else
|
||||||
|
log_message "DNS更新脚本执行失败"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
log_message "警告: 更新脚本不存在或不可执行: $UPDATE_SCRIPT"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
|
||||||
|
# 第一次检测到配置文件,执行更新脚本
|
||||||
|
if [ -x "$UPDATE_SCRIPT" ]; then
|
||||||
|
log_message "执行DNS更新脚本: $UPDATE_SCRIPT"
|
||||||
|
"$UPDATE_SCRIPT" >> "$LOG_FILE" 2>&1
|
||||||
|
if [ $? -eq 0 ]; then
|
||||||
|
log_message "DNS更新脚本执行成功"
|
||||||
|
|
||||||
|
# 第一次运行,创建备份并执行更新
|
||||||
|
cp "$DNS_CONF" "$DNS_BACKUP"
|
||||||
|
log_message "创建DNS配置备份文件"
|
||||||
|
|
||||||
|
else
|
||||||
|
log_message "DNS更新脚本执行失败"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
log_message "警告: 更新脚本不存在或不可执行: $UPDATE_SCRIPT"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
log_message "警告: DNS配置文件不存在: $DNS_CONF"
|
||||||
|
fi
|
||||||
|
|
||||||
|
sleep 10
|
||||||
|
done
|
4
src/bind/build/named.conf.local
Normal file
4
src/bind/build/named.conf.local
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
zone "argus.com" {
|
||||||
|
type master;
|
||||||
|
file "/etc/bind/db.argus.com";
|
||||||
|
};
|
27
src/bind/build/reload-bind9.sh
Normal file
27
src/bind/build/reload-bind9.sh
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
echo "Reloading BIND9 configuration..."
|
||||||
|
|
||||||
|
# Check if configuration files are valid
|
||||||
|
echo "Checking named.conf.local syntax..."
|
||||||
|
if ! named-checkconf /etc/bind/named.conf.local; then
|
||||||
|
echo "ERROR: named.conf.local has syntax errors!"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Checking zone file syntax..."
|
||||||
|
if ! named-checkzone argus.com /etc/bind/db.argus.com; then
|
||||||
|
echo "ERROR: db.argus.com has syntax errors!"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Reload BIND9 via supervisor
|
||||||
|
echo "Reloading BIND9 service..."
|
||||||
|
supervisorctl restart bind9
|
||||||
|
|
||||||
|
if [ $? -eq 0 ]; then
|
||||||
|
echo "BIND9 reloaded successfully!"
|
||||||
|
else
|
||||||
|
echo "ERROR: Failed to reload BIND9!"
|
||||||
|
exit 1
|
||||||
|
fi
|
40
src/bind/build/startup.sh
Normal file
40
src/bind/build/startup.sh
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Set /private permissions to 777 as requested
|
||||||
|
chmod 777 /private 2>/dev/null || true
|
||||||
|
|
||||||
|
# Create persistent directories for BIND9 configs and DNS sync
|
||||||
|
mkdir -p /private/argus/bind
|
||||||
|
mkdir -p /private/argus/etc
|
||||||
|
|
||||||
|
# Copy configuration files to persistent storage if they don't exist
|
||||||
|
if [ ! -f /private/argus/bind/named.conf.local ]; then
|
||||||
|
cp /etc/bind/named.conf.local /private/argus/bind/named.conf.local
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f /private/argus/bind/db.argus.com ]; then
|
||||||
|
cp /etc/bind/db.argus.com /private/argus/bind/db.argus.com
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Copy update-dns.sh to /private/argus/etc/
|
||||||
|
cp /usr/local/bin/update-dns.sh /private/argus/etc/update-dns.sh
|
||||||
|
chown bind:bind /private/argus/etc/update-dns.sh
|
||||||
|
chmod a+x /private/argus/etc/update-dns.sh
|
||||||
|
|
||||||
|
# Create symlinks to use persistent configs
|
||||||
|
ln -sf /private/argus/bind/named.conf.local /etc/bind/named.conf.local
|
||||||
|
ln -sf /private/argus/bind/db.argus.com /etc/bind/db.argus.com
|
||||||
|
|
||||||
|
# Set proper ownership
|
||||||
|
chown bind:bind /private/argus/bind/named.conf.local /private/argus/bind/db.argus.com
|
||||||
|
|
||||||
|
# 记录容器ip地址更新到dns.conf
|
||||||
|
IP=`ifconfig | grep -A 1 eth0 | grep inet | awk '{print $2}'`
|
||||||
|
echo current IP: ${IP}
|
||||||
|
echo ${IP} > /private/argus/etc/dns.conf
|
||||||
|
|
||||||
|
# Create supervisor log directory
|
||||||
|
mkdir -p /var/log/supervisor
|
||||||
|
|
||||||
|
# Start supervisor
|
||||||
|
exec /usr/bin/supervisord -c /etc/supervisor/conf.d/supervisord.conf
|
37
src/bind/build/supervisord.conf
Normal file
37
src/bind/build/supervisord.conf
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
[unix_http_server]
|
||||||
|
file=/var/run/supervisor.sock
|
||||||
|
chmod=0700
|
||||||
|
|
||||||
|
[supervisord]
|
||||||
|
nodaemon=true
|
||||||
|
user=root
|
||||||
|
logfile=/var/log/supervisor/supervisord.log
|
||||||
|
pidfile=/var/run/supervisord.pid
|
||||||
|
|
||||||
|
[rpcinterface:supervisor]
|
||||||
|
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
|
||||||
|
|
||||||
|
[supervisorctl]
|
||||||
|
serverurl=unix:///var/run/supervisor.sock
|
||||||
|
|
||||||
|
[program:bind9]
|
||||||
|
command=/usr/sbin/named -g -c /etc/bind/named.conf -u bind
|
||||||
|
user=bind
|
||||||
|
autostart=true
|
||||||
|
autorestart=true
|
||||||
|
stderr_logfile=/var/log/supervisor/bind9.err.log
|
||||||
|
stdout_logfile=/var/log/supervisor/bind9.out.log
|
||||||
|
priority=10
|
||||||
|
|
||||||
|
[program:argus-dns-sync]
|
||||||
|
command=/usr/local/bin/argus_dns_sync.sh
|
||||||
|
autostart=true
|
||||||
|
autorestart=true
|
||||||
|
startsecs=3
|
||||||
|
stopsignal=TERM
|
||||||
|
user=root
|
||||||
|
stdout_logfile=/var/log/argus_dns_sync.out.log
|
||||||
|
stderr_logfile=/var/log/argus_dns_sync.err.log
|
||||||
|
; 根据环境调整环境变量(可选)
|
||||||
|
; environment=RNDC_RELOAD="yes"
|
||||||
|
|
31
src/bind/build/update-dns.sh
Executable file
31
src/bind/build/update-dns.sh
Executable file
@ -0,0 +1,31 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
# update-dns.sh
|
||||||
|
# 从 /private/argus/etc/dns.conf 读取 IP,写入 /etc/resolv.conf
|
||||||
|
|
||||||
|
DNS_CONF="/private/argus/etc/dns.conf"
|
||||||
|
RESOLV_CONF="/etc/resolv.conf"
|
||||||
|
|
||||||
|
# 检查配置文件是否存在
|
||||||
|
if [ ! -f "$DNS_CONF" ]; then
|
||||||
|
echo "配置文件不存在: $DNS_CONF" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 生成 resolv.conf 内容
|
||||||
|
{
|
||||||
|
while IFS= read -r ip; do
|
||||||
|
# 跳过空行和注释
|
||||||
|
case "$ip" in
|
||||||
|
\#*) continue ;;
|
||||||
|
"") continue ;;
|
||||||
|
esac
|
||||||
|
echo "nameserver $ip"
|
||||||
|
done < "$DNS_CONF"
|
||||||
|
} > "$RESOLV_CONF".tmp
|
||||||
|
|
||||||
|
# 替换写入 /etc/resolv.conf
|
||||||
|
cat "$RESOLV_CONF".tmp > "$RESOLV_CONF"
|
||||||
|
rm -f "$RESOLV_CONF".tmp
|
||||||
|
|
||||||
|
echo "已更新 $RESOLV_CONF"
|
||||||
|
|
16
src/bind/tests/docker-compose.yml
Normal file
16
src/bind/tests/docker-compose.yml
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
services:
|
||||||
|
bind9:
|
||||||
|
image: argus-bind9:latest
|
||||||
|
container_name: argus-bind9-test
|
||||||
|
ports:
|
||||||
|
- "53:53/tcp"
|
||||||
|
- "53:53/udp"
|
||||||
|
volumes:
|
||||||
|
- ./private:/private
|
||||||
|
restart: unless-stopped
|
||||||
|
networks:
|
||||||
|
- bind-test-network
|
||||||
|
|
||||||
|
networks:
|
||||||
|
bind-test-network:
|
||||||
|
driver: bridge
|
115
src/bind/tests/scripts/00_e2e_test.sh
Executable file
115
src/bind/tests/scripts/00_e2e_test.sh
Executable file
@ -0,0 +1,115 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# End-to-end test for BIND9 DNS server
|
||||||
|
# This script runs all tests in sequence to validate the complete functionality
|
||||||
|
# Usage: ./00_e2e_test.sh
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
|
||||||
|
echo "=========================================="
|
||||||
|
echo "BIND9 DNS Server End-to-End Test Suite"
|
||||||
|
echo "=========================================="
|
||||||
|
|
||||||
|
# Track test results
|
||||||
|
total_tests=0
|
||||||
|
passed_tests=0
|
||||||
|
failed_tests=0
|
||||||
|
|
||||||
|
# Function to run a test step
|
||||||
|
run_test_step() {
|
||||||
|
local step_name="$1"
|
||||||
|
local script_name="$2"
|
||||||
|
local description="$3"
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "[$step_name] $description"
|
||||||
|
echo "$(printf '=%.0s' {1..50})"
|
||||||
|
|
||||||
|
((total_tests++))
|
||||||
|
|
||||||
|
if [ ! -f "$SCRIPT_DIR/$script_name" ]; then
|
||||||
|
echo "✗ Test script not found: $script_name"
|
||||||
|
((failed_tests++))
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Make sure script is executable
|
||||||
|
chmod +x "$SCRIPT_DIR/$script_name"
|
||||||
|
|
||||||
|
# Run the test
|
||||||
|
echo "Executing: $SCRIPT_DIR/$script_name"
|
||||||
|
if "$SCRIPT_DIR/$script_name"; then
|
||||||
|
echo "✓ $step_name completed successfully"
|
||||||
|
((passed_tests++))
|
||||||
|
return 0
|
||||||
|
else
|
||||||
|
echo "✗ $step_name failed"
|
||||||
|
((failed_tests++))
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Cleanup any previous test environment (but preserve the Docker image)
|
||||||
|
echo ""
|
||||||
|
echo "[SETUP] Cleaning up any previous test environment..."
|
||||||
|
if [ -f "$SCRIPT_DIR/05_cleanup.sh" ]; then
|
||||||
|
chmod +x "$SCRIPT_DIR/05_cleanup.sh"
|
||||||
|
"$SCRIPT_DIR/05_cleanup.sh" || true
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Starting BIND9 DNS server end-to-end test sequence..."
|
||||||
|
|
||||||
|
# Test sequence
|
||||||
|
run_test_step "TEST-01" "01_start_container.sh" "Start BIND9 container" || true
|
||||||
|
|
||||||
|
run_test_step "TEST-02" "02_dig_test.sh" "Initial DNS resolution test" || true
|
||||||
|
|
||||||
|
run_test_step "TEST-03" "03_reload_test.sh" "Configuration reload with IP modification" || true
|
||||||
|
|
||||||
|
run_test_step "TEST-03.5" "03.5_dns_sync_test.sh" "DNS auto-sync functionality test" || true
|
||||||
|
|
||||||
|
run_test_step "TEST-04" "04_persistence_test.sh" "Configuration persistence after restart" || true
|
||||||
|
|
||||||
|
# Final cleanup (but preserve logs for review)
|
||||||
|
echo ""
|
||||||
|
echo "[CLEANUP] Cleaning up test environment..."
|
||||||
|
run_test_step "CLEANUP" "05_cleanup.sh" "Clean up containers and networks" || true
|
||||||
|
|
||||||
|
# Test summary
|
||||||
|
echo ""
|
||||||
|
echo "=========================================="
|
||||||
|
echo "TEST SUMMARY"
|
||||||
|
echo "=========================================="
|
||||||
|
echo "Total tests: $total_tests"
|
||||||
|
echo "Passed: $passed_tests"
|
||||||
|
echo "Failed: $failed_tests"
|
||||||
|
|
||||||
|
if [ $failed_tests -eq 0 ]; then
|
||||||
|
echo ""
|
||||||
|
echo "✅ ALL TESTS PASSED!"
|
||||||
|
echo ""
|
||||||
|
echo "BIND9 DNS server functionality validated:"
|
||||||
|
echo " ✓ Container startup and basic functionality"
|
||||||
|
echo " ✓ DNS resolution for configured domains"
|
||||||
|
echo " ✓ Configuration modification and reload"
|
||||||
|
echo " ✓ DNS auto-sync from IP files"
|
||||||
|
echo " ✓ Configuration persistence across restarts"
|
||||||
|
echo " ✓ Cleanup and resource management"
|
||||||
|
echo ""
|
||||||
|
echo "The BIND9 DNS server is ready for production use."
|
||||||
|
exit 0
|
||||||
|
else
|
||||||
|
echo ""
|
||||||
|
echo "❌ SOME TESTS FAILED!"
|
||||||
|
echo ""
|
||||||
|
echo "Please review the test output above to identify and fix issues."
|
||||||
|
echo "You may need to:"
|
||||||
|
echo " - Check Docker installation and permissions"
|
||||||
|
echo " - Verify network connectivity"
|
||||||
|
echo " - Review BIND9 configuration files"
|
||||||
|
echo " - Check system resources and port availability"
|
||||||
|
exit 1
|
||||||
|
fi
|
38
src/bind/tests/scripts/01_start_container.sh
Executable file
38
src/bind/tests/scripts/01_start_container.sh
Executable file
@ -0,0 +1,38 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Start BIND9 test container
|
||||||
|
# Usage: ./01_start_container.sh
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
TEST_DIR="$(dirname "$SCRIPT_DIR")"
|
||||||
|
|
||||||
|
cd "$TEST_DIR"
|
||||||
|
|
||||||
|
echo "Starting BIND9 test container..."
|
||||||
|
|
||||||
|
# Ensure private directory exists with proper permissions
|
||||||
|
mkdir -p private
|
||||||
|
chmod 777 private
|
||||||
|
|
||||||
|
# Start the container
|
||||||
|
docker compose up -d
|
||||||
|
|
||||||
|
echo "Waiting for container to be ready..."
|
||||||
|
sleep 5
|
||||||
|
|
||||||
|
# Check if container is running
|
||||||
|
if docker compose ps | grep -q "Up"; then
|
||||||
|
echo "✓ Container started successfully"
|
||||||
|
echo "Container status:"
|
||||||
|
docker compose ps
|
||||||
|
else
|
||||||
|
echo "✗ Failed to start container"
|
||||||
|
docker compose logs
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "BIND9 test environment is ready!"
|
||||||
|
echo "DNS server listening on localhost:53"
|
72
src/bind/tests/scripts/02_dig_test.sh
Executable file
72
src/bind/tests/scripts/02_dig_test.sh
Executable file
@ -0,0 +1,72 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Test DNS resolution using dig
|
||||||
|
# Usage: ./02_dig_test.sh
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
echo "Testing DNS resolution with dig..."
|
||||||
|
|
||||||
|
# Function to test DNS query
|
||||||
|
test_dns_query() {
|
||||||
|
local hostname="$1"
|
||||||
|
local expected_ip="$2"
|
||||||
|
local description="$3"
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Testing: $description"
|
||||||
|
echo "Query: $hostname.argus.com"
|
||||||
|
echo "Expected IP: $expected_ip"
|
||||||
|
|
||||||
|
# Perform dig query
|
||||||
|
result=$(dig @localhost $hostname.argus.com A +short 2>/dev/null || echo "QUERY_FAILED")
|
||||||
|
|
||||||
|
if [ "$result" = "QUERY_FAILED" ]; then
|
||||||
|
echo "✗ DNS query failed"
|
||||||
|
return 1
|
||||||
|
elif [ "$result" = "$expected_ip" ]; then
|
||||||
|
echo "✓ DNS query successful: $result"
|
||||||
|
return 0
|
||||||
|
else
|
||||||
|
echo "✗ DNS query returned unexpected result: $result"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check if dig is available
|
||||||
|
if ! command -v dig &> /dev/null; then
|
||||||
|
echo "Installing dig (dnsutils)..."
|
||||||
|
apt-get update && apt-get install -y dnsutils
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check if container is running
|
||||||
|
if ! docker compose ps | grep -q "Up"; then
|
||||||
|
echo "Error: BIND9 container is not running"
|
||||||
|
echo "Please start the container first with: ./01_start_container.sh"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "=== DNS Resolution Tests ==="
|
||||||
|
|
||||||
|
# Test cases based on current configuration
|
||||||
|
failed_tests=0
|
||||||
|
|
||||||
|
# Test ns1.argus.com -> 127.0.0.1
|
||||||
|
if ! test_dns_query "ns1" "127.0.0.1" "Name server resolution"; then
|
||||||
|
((failed_tests++))
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Test web.argus.com -> 12.4.5.6
|
||||||
|
if ! test_dns_query "web" "12.4.5.6" "Web server resolution"; then
|
||||||
|
((failed_tests++))
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "=== Test Summary ==="
|
||||||
|
if [ $failed_tests -eq 0 ]; then
|
||||||
|
echo "✓ All DNS tests passed!"
|
||||||
|
exit 0
|
||||||
|
else
|
||||||
|
echo "✗ $failed_tests test(s) failed"
|
||||||
|
exit 1
|
||||||
|
fi
|
256
src/bind/tests/scripts/03.5_dns_sync_test.sh
Executable file
256
src/bind/tests/scripts/03.5_dns_sync_test.sh
Executable file
@ -0,0 +1,256 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Test DNS auto-sync functionality using argus_dns_sync.sh
|
||||||
|
# This test validates the automatic DNS record updates from IP files
|
||||||
|
# Usage: ./03.5_dns_sync_test.sh
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
TEST_DIR="$(dirname "$SCRIPT_DIR")"
|
||||||
|
|
||||||
|
echo "=== DNS Auto-Sync Functionality Test ==="
|
||||||
|
|
||||||
|
# Check if container is running
|
||||||
|
if ! docker compose ps | grep -q "Up"; then
|
||||||
|
echo "Error: BIND9 container is not running"
|
||||||
|
echo "Please start the container first with: ./01_start_container.sh"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check if dig is available
|
||||||
|
if ! command -v dig &> /dev/null; then
|
||||||
|
echo "Installing dig (dnsutils)..."
|
||||||
|
apt-get update && apt-get install -y dnsutils
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Function to test DNS query
|
||||||
|
test_dns_query() {
|
||||||
|
local hostname="$1"
|
||||||
|
local expected_ip="$2"
|
||||||
|
local description="$3"
|
||||||
|
|
||||||
|
echo "Testing: $description"
|
||||||
|
echo "Query: $hostname.argus.com -> Expected: $expected_ip"
|
||||||
|
|
||||||
|
# Wait a moment for DNS cache
|
||||||
|
sleep 2
|
||||||
|
|
||||||
|
result=$(dig @localhost $hostname.argus.com A +short 2>/dev/null || echo "QUERY_FAILED")
|
||||||
|
|
||||||
|
if [ "$result" = "$expected_ip" ]; then
|
||||||
|
echo "✓ $result"
|
||||||
|
return 0
|
||||||
|
else
|
||||||
|
echo "✗ Got: $result, Expected: $expected_ip"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to wait for sync to complete
|
||||||
|
wait_for_sync() {
|
||||||
|
local timeout=15
|
||||||
|
local elapsed=0
|
||||||
|
echo "Waiting for DNS sync to complete (max ${timeout}s)..."
|
||||||
|
|
||||||
|
while [ $elapsed -lt $timeout ]; do
|
||||||
|
if docker compose exec bind9 test -f /var/lock/argus_dns_sync.lock; then
|
||||||
|
echo "Sync process is running..."
|
||||||
|
else
|
||||||
|
echo "Sync completed"
|
||||||
|
sleep 2 # Extra wait for DNS propagation
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
sleep 2
|
||||||
|
elapsed=$((elapsed + 2))
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "Warning: Sync may still be running after ${timeout}s"
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Step 1: Preparing test environment..."
|
||||||
|
|
||||||
|
# Ensure required directories exist
|
||||||
|
docker compose exec bind9 mkdir -p /private/argus/etc
|
||||||
|
docker compose exec bind9 mkdir -p /private/argus/bind/.backup
|
||||||
|
|
||||||
|
# Backup original configuration if it exists
|
||||||
|
docker compose exec bind9 test -f /private/argus/bind/db.argus.com && \
|
||||||
|
docker compose exec bind9 cp /private/argus/bind/db.argus.com /private/argus/bind/db.argus.com.backup.test || true
|
||||||
|
|
||||||
|
# Ensure initial configuration is available (may already be symlinked)
|
||||||
|
docker compose exec bind9 test -f /private/argus/bind/db.argus.com || \
|
||||||
|
docker compose exec bind9 cp /etc/bind/db.argus.com /private/argus/bind/db.argus.com
|
||||||
|
|
||||||
|
echo "✓ Test environment prepared"
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Step 2: Testing initial DNS configuration..."
|
||||||
|
|
||||||
|
# Get current IP for web.argus.com (may have been changed by previous tests)
|
||||||
|
current_web_ip=$(dig @localhost web.argus.com A +short 2>/dev/null || echo "UNKNOWN")
|
||||||
|
echo "Current web.argus.com IP: $current_web_ip"
|
||||||
|
|
||||||
|
# Test that DNS is working (regardless of specific IP)
|
||||||
|
if [ "$current_web_ip" = "UNKNOWN" ] || [ -z "$current_web_ip" ]; then
|
||||||
|
echo "DNS resolution not working for web.argus.com"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "✓ DNS resolution is working"
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Step 3: Creating IP files for auto-sync..."
|
||||||
|
|
||||||
|
# Create test IP files in the watch directory
|
||||||
|
echo "Creating test1.argus.com with IP 10.0.0.100"
|
||||||
|
docker compose exec bind9 bash -c 'echo "10.0.0.100" > /private/argus/etc/test1.argus.com'
|
||||||
|
|
||||||
|
echo "Creating test2.argus.com with IP 10.0.0.200"
|
||||||
|
docker compose exec bind9 bash -c 'echo "test2 service running on 10.0.0.200" > /private/argus/etc/test2.argus.com'
|
||||||
|
|
||||||
|
echo "Creating api.argus.com with IP 192.168.1.50"
|
||||||
|
docker compose exec bind9 bash -c 'echo "API server: 192.168.1.50 port 8080" > /private/argus/etc/api.argus.com'
|
||||||
|
|
||||||
|
echo "✓ IP files created"
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Step 4: Checking DNS sync process..."
|
||||||
|
|
||||||
|
# Check if DNS sync process is already running (via supervisord)
|
||||||
|
if docker compose exec bind9 pgrep -f argus_dns_sync.sh > /dev/null; then
|
||||||
|
echo "✓ DNS sync process already running (via supervisord)"
|
||||||
|
else
|
||||||
|
echo "Starting DNS sync process manually..."
|
||||||
|
# Start the DNS sync process in background if not running
|
||||||
|
docker compose exec -d bind9 /usr/local/bin/argus_dns_sync.sh
|
||||||
|
echo "✓ DNS sync process started manually"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Wait for first sync cycle
|
||||||
|
wait_for_sync
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Step 5: Testing auto-synced DNS records..."
|
||||||
|
|
||||||
|
failed_tests=0
|
||||||
|
|
||||||
|
# Test new DNS records created by auto-sync
|
||||||
|
if ! test_dns_query "test1" "10.0.0.100" "Auto-synced test1.argus.com"; then
|
||||||
|
((failed_tests++))
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! test_dns_query "test2" "10.0.0.200" "Auto-synced test2.argus.com"; then
|
||||||
|
((failed_tests++))
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! test_dns_query "api" "192.168.1.50" "Auto-synced api.argus.com"; then
|
||||||
|
((failed_tests++))
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Verify original records still work (use current IP from earlier)
|
||||||
|
if ! test_dns_query "web" "$current_web_ip" "Original web.argus.com still working"; then
|
||||||
|
((failed_tests++))
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! test_dns_query "ns1" "127.0.0.1" "Original ns1.argus.com still working"; then
|
||||||
|
((failed_tests++))
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Step 6: Testing IP update functionality..."
|
||||||
|
|
||||||
|
# Update an existing IP file
|
||||||
|
echo "Updating test1.argus.com IP from 10.0.0.100 to 10.0.0.150"
|
||||||
|
docker compose exec bind9 bash -c 'echo "10.0.0.150" > /private/argus/etc/test1.argus.com'
|
||||||
|
|
||||||
|
# Wait for sync
|
||||||
|
wait_for_sync
|
||||||
|
|
||||||
|
# Test updated record
|
||||||
|
if ! test_dns_query "test1" "10.0.0.150" "Updated test1.argus.com IP"; then
|
||||||
|
((failed_tests++))
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Step 7: Testing invalid IP handling..."
|
||||||
|
|
||||||
|
# Create file with invalid IP
|
||||||
|
echo "Creating invalid.argus.com with invalid IP"
|
||||||
|
docker compose exec bind9 bash -c 'echo "this is not an IP address" > /private/argus/etc/invalid.argus.com'
|
||||||
|
|
||||||
|
# Wait for sync (should skip invalid IP)
|
||||||
|
wait_for_sync
|
||||||
|
|
||||||
|
# Verify invalid record was not added (should fail to resolve)
|
||||||
|
result=$(dig @localhost invalid.argus.com A +short 2>/dev/null || echo "NO_RESULT")
|
||||||
|
if [ "$result" = "NO_RESULT" ] || [ -z "$result" ]; then
|
||||||
|
echo "✓ Invalid IP correctly ignored"
|
||||||
|
else
|
||||||
|
echo "✗ Invalid IP was processed: $result"
|
||||||
|
((failed_tests++))
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Step 8: Verifying backup functionality..."
|
||||||
|
|
||||||
|
# Check if backups were created
|
||||||
|
backup_count=$(docker compose exec bind9 ls -1 /private/argus/bind/.backup/ | wc -l || echo "0")
|
||||||
|
if [ "$backup_count" -gt 0 ]; then
|
||||||
|
echo "✓ Configuration backups created ($backup_count files)"
|
||||||
|
# Show latest backup
|
||||||
|
docker compose exec bind9 ls -la /private/argus/bind/.backup/ | tail -1
|
||||||
|
else
|
||||||
|
echo "✗ No backup files found"
|
||||||
|
((failed_tests++))
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Step 9: Cleanup..."
|
||||||
|
|
||||||
|
# Note: We don't stop the DNS sync process since it's managed by supervisord
|
||||||
|
echo "Note: DNS sync process will continue running (managed by supervisord)"
|
||||||
|
|
||||||
|
# Clean up test files
|
||||||
|
docker compose exec bind9 rm -f /private/argus/etc/test1.argus.com
|
||||||
|
docker compose exec bind9 rm -f /private/argus/etc/test2.argus.com
|
||||||
|
docker compose exec bind9 rm -f /private/argus/etc/api.argus.com
|
||||||
|
docker compose exec bind9 rm -f /private/argus/etc/invalid.argus.com
|
||||||
|
|
||||||
|
# Restore original configuration if backup exists
|
||||||
|
docker compose exec bind9 test -f /private/argus/bind/db.argus.com.backup.test && \
|
||||||
|
docker compose exec bind9 cp /private/argus/bind/db.argus.com.backup.test /private/argus/bind/db.argus.com && \
|
||||||
|
docker compose exec bind9 rm /private/argus/bind/db.argus.com.backup.test || true
|
||||||
|
|
||||||
|
# Reload original configuration
|
||||||
|
docker compose exec bind9 /usr/local/bin/reload-bind9.sh
|
||||||
|
|
||||||
|
echo "✓ Cleanup completed"
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "=== DNS Auto-Sync Test Summary ==="
|
||||||
|
if [ $failed_tests -eq 0 ]; then
|
||||||
|
echo "✅ All DNS auto-sync tests passed!"
|
||||||
|
echo ""
|
||||||
|
echo "Validated functionality:"
|
||||||
|
echo " ✓ Automatic DNS record creation from IP files"
|
||||||
|
echo " ✓ IP address extraction from various file formats"
|
||||||
|
echo " ✓ Dynamic DNS record updates"
|
||||||
|
echo " ✓ Invalid IP address handling"
|
||||||
|
echo " ✓ Configuration backup mechanism"
|
||||||
|
echo " ✓ Preservation of existing DNS records"
|
||||||
|
echo ""
|
||||||
|
echo "The DNS auto-sync functionality is working correctly!"
|
||||||
|
exit 0
|
||||||
|
else
|
||||||
|
echo "❌ $failed_tests DNS auto-sync test(s) failed!"
|
||||||
|
echo ""
|
||||||
|
echo "Please check:"
|
||||||
|
echo " - argus_dns_sync.sh script configuration"
|
||||||
|
echo " - File permissions in /private/argus/etc/"
|
||||||
|
echo " - BIND9 reload functionality"
|
||||||
|
echo " - Network connectivity and DNS resolution"
|
||||||
|
exit 1
|
||||||
|
fi
|
112
src/bind/tests/scripts/03_reload_test.sh
Executable file
112
src/bind/tests/scripts/03_reload_test.sh
Executable file
@ -0,0 +1,112 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Test DNS configuration reload with IP modification
|
||||||
|
# Usage: ./03_reload_test.sh
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
TEST_DIR="$(dirname "$SCRIPT_DIR")"
|
||||||
|
|
||||||
|
echo "=== DNS Configuration Reload Test ==="
|
||||||
|
|
||||||
|
# Check if container is running
|
||||||
|
if ! docker compose ps | grep -q "Up"; then
|
||||||
|
echo "Error: BIND9 container is not running"
|
||||||
|
echo "Please start the container first with: ./01_start_container.sh"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check if dig is available
|
||||||
|
if ! command -v dig &> /dev/null; then
|
||||||
|
echo "Installing dig (dnsutils)..."
|
||||||
|
apt-get update && apt-get install -y dnsutils
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Function to test DNS query
|
||||||
|
test_dns_query() {
|
||||||
|
local hostname="$1"
|
||||||
|
local expected_ip="$2"
|
||||||
|
local description="$3"
|
||||||
|
|
||||||
|
echo "Testing: $description"
|
||||||
|
echo "Query: $hostname.argus.com -> Expected: $expected_ip"
|
||||||
|
|
||||||
|
result=$(dig @localhost $hostname.argus.com A +short 2>/dev/null || echo "QUERY_FAILED")
|
||||||
|
|
||||||
|
if [ "$result" = "$expected_ip" ]; then
|
||||||
|
echo "✓ $result"
|
||||||
|
return 0
|
||||||
|
else
|
||||||
|
echo "✗ Got: $result, Expected: $expected_ip"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Step 1: Testing initial DNS configuration..."
|
||||||
|
|
||||||
|
# Test initial configuration
|
||||||
|
if ! test_dns_query "web" "12.4.5.6" "Initial web.argus.com resolution"; then
|
||||||
|
echo "Initial DNS test failed"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Step 2: Modifying DNS configuration..."
|
||||||
|
|
||||||
|
# Backup original configuration
|
||||||
|
cp "$TEST_DIR/private/argus/bind/db.argus.com" "$TEST_DIR/private/argus/bind/db.argus.com.backup" 2>/dev/null || true
|
||||||
|
|
||||||
|
# Create new configuration with modified IP
|
||||||
|
DB_FILE="$TEST_DIR/private/argus/bind/db.argus.com"
|
||||||
|
|
||||||
|
# Check if persistent config exists, if not use from container
|
||||||
|
if [ ! -f "$DB_FILE" ]; then
|
||||||
|
echo "Persistent config not found, copying from container..."
|
||||||
|
docker compose exec bind9 cp /etc/bind/db.argus.com /private/argus/bind/db.argus.com
|
||||||
|
docker compose exec bind9 chown bind:bind /private/argus/bind/db.argus.com
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Modify the IP address (12.4.5.6 -> 192.168.1.100)
|
||||||
|
sed -i 's/12\.4\.5\.6/192.168.1.100/g' "$DB_FILE"
|
||||||
|
|
||||||
|
# Increment serial number for DNS cache invalidation
|
||||||
|
current_serial=$(grep -o "2[[:space:]]*;" "$DB_FILE" | grep -o "2")
|
||||||
|
new_serial=$((current_serial + 1))
|
||||||
|
sed -i "s/2[[:space:]]*;/${new_serial} ;/" "$DB_FILE"
|
||||||
|
|
||||||
|
echo "Modified configuration:"
|
||||||
|
echo "- Changed web.argus.com IP: 12.4.5.6 -> 192.168.1.100"
|
||||||
|
echo "- Updated serial number: $current_serial -> $new_serial"
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Step 3: Reloading BIND9 configuration..."
|
||||||
|
|
||||||
|
# Reload BIND9 configuration
|
||||||
|
docker compose exec bind9 /usr/local/bin/reload-bind9.sh
|
||||||
|
|
||||||
|
echo "Configuration reloaded"
|
||||||
|
|
||||||
|
# Wait a moment for changes to take effect
|
||||||
|
sleep 3
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Step 4: Testing modified DNS configuration..."
|
||||||
|
|
||||||
|
# Test modified configuration
|
||||||
|
if ! test_dns_query "web" "192.168.1.100" "Modified web.argus.com resolution"; then
|
||||||
|
echo "Modified DNS test failed"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Also verify ns1 still works
|
||||||
|
if ! test_dns_query "ns1" "127.0.0.1" "ns1.argus.com still working"; then
|
||||||
|
echo "ns1 DNS test failed after reload"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "✓ DNS configuration reload test completed successfully!"
|
||||||
|
echo "✓ IP address changed from 12.4.5.6 to 192.168.1.100"
|
||||||
|
echo "✓ Configuration persisted and reloaded correctly"
|
115
src/bind/tests/scripts/04_persistence_test.sh
Executable file
115
src/bind/tests/scripts/04_persistence_test.sh
Executable file
@ -0,0 +1,115 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Test configuration persistence after container restart
|
||||||
|
# Usage: ./04_persistence_test.sh
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
TEST_DIR="$(dirname "$SCRIPT_DIR")"
|
||||||
|
|
||||||
|
echo "=== Configuration Persistence Test ==="
|
||||||
|
|
||||||
|
# Check if dig is available
|
||||||
|
if ! command -v dig &> /dev/null; then
|
||||||
|
echo "Installing dig (dnsutils)..."
|
||||||
|
apt-get update && apt-get install -y dnsutils
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Function to test DNS query
|
||||||
|
test_dns_query() {
|
||||||
|
local hostname="$1"
|
||||||
|
local expected_ip="$2"
|
||||||
|
local description="$3"
|
||||||
|
|
||||||
|
echo "Testing: $description"
|
||||||
|
echo "Query: $hostname.argus.com -> Expected: $expected_ip"
|
||||||
|
|
||||||
|
result=$(dig @localhost $hostname.argus.com A +short 2>/dev/null || echo "QUERY_FAILED")
|
||||||
|
|
||||||
|
if [ "$result" = "$expected_ip" ]; then
|
||||||
|
echo "✓ $result"
|
||||||
|
return 0
|
||||||
|
else
|
||||||
|
echo "✗ Got: $result, Expected: $expected_ip"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Step 1: Stopping current container..."
|
||||||
|
|
||||||
|
# Stop the container
|
||||||
|
docker compose down
|
||||||
|
|
||||||
|
echo "Container stopped"
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Step 2: Verifying persistent configuration exists..."
|
||||||
|
|
||||||
|
# Check if modified configuration exists
|
||||||
|
DB_FILE="$TEST_DIR/private/argus/bind/db.argus.com"
|
||||||
|
|
||||||
|
if [ ! -f "$DB_FILE" ]; then
|
||||||
|
echo "✗ Persistent configuration file not found: $DB_FILE"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check if the modified IP is in the configuration
|
||||||
|
if grep -q "192.168.1.100" "$DB_FILE"; then
|
||||||
|
echo "✓ Modified IP (192.168.1.100) found in persistent configuration"
|
||||||
|
else
|
||||||
|
echo "✗ Modified IP not found in persistent configuration"
|
||||||
|
echo "Configuration content:"
|
||||||
|
cat "$DB_FILE"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Step 3: Restarting container with persistent configuration..."
|
||||||
|
|
||||||
|
# Start the container again
|
||||||
|
docker compose up -d
|
||||||
|
|
||||||
|
echo "Waiting for container to be ready..."
|
||||||
|
sleep 5
|
||||||
|
|
||||||
|
# Check if container is running
|
||||||
|
if ! docker compose ps | grep -q "Up"; then
|
||||||
|
echo "✗ Failed to restart container"
|
||||||
|
docker compose logs
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "✓ Container restarted successfully"
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Step 4: Testing DNS resolution after restart..."
|
||||||
|
|
||||||
|
# Wait a bit more for DNS to be fully ready
|
||||||
|
sleep 5
|
||||||
|
|
||||||
|
# Test that the modified configuration is still active
|
||||||
|
if ! test_dns_query "web" "192.168.1.100" "Persistent web.argus.com resolution"; then
|
||||||
|
echo "✗ Persistent configuration test failed"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Also verify ns1 still works
|
||||||
|
if ! test_dns_query "ns1" "127.0.0.1" "ns1.argus.com still working"; then
|
||||||
|
echo "✗ ns1 DNS test failed after restart"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Step 5: Verifying configuration files are linked correctly..."
|
||||||
|
|
||||||
|
# Check that the persistent files are properly linked
|
||||||
|
echo "Checking file links in container:"
|
||||||
|
docker compose exec bind9 ls -la /etc/bind/named.conf.local /etc/bind/db.argus.com
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "✓ Configuration persistence test completed successfully!"
|
||||||
|
echo "✓ Modified IP (192.168.1.100) persisted after container restart"
|
||||||
|
echo "✓ Configuration files properly linked to persistent storage"
|
||||||
|
echo "✓ DNS resolution working correctly with persisted configuration"
|
87
src/bind/tests/scripts/05_cleanup.sh
Executable file
87
src/bind/tests/scripts/05_cleanup.sh
Executable file
@ -0,0 +1,87 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Clean up test environment and containers
|
||||||
|
# Usage: ./05_cleanup.sh [--full]
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
TEST_DIR="$(dirname "$SCRIPT_DIR")"
|
||||||
|
|
||||||
|
# Parse command line arguments
|
||||||
|
FULL_CLEANUP=true
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case $1 in
|
||||||
|
--full)
|
||||||
|
FULL_CLEANUP=true
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Unknown option: $1"
|
||||||
|
echo "Usage: $0 [--full]"
|
||||||
|
echo " --full: Also remove persistent data "
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
cd "$TEST_DIR"
|
||||||
|
|
||||||
|
echo "=== Cleaning up BIND9 test environment ==="
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Step 1: Stopping and removing containers..."
|
||||||
|
|
||||||
|
# Stop and remove containers
|
||||||
|
docker compose down -v
|
||||||
|
|
||||||
|
echo "✓ Containers stopped and removed"
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Step 2: Removing Docker networks..."
|
||||||
|
|
||||||
|
# Clean up networks
|
||||||
|
docker network prune -f > /dev/null 2>&1 || true
|
||||||
|
|
||||||
|
echo "✓ Docker networks cleaned"
|
||||||
|
|
||||||
|
if [ "$FULL_CLEANUP" = true ]; then
|
||||||
|
echo ""
|
||||||
|
echo "Step 3: Removing persistent data..."
|
||||||
|
|
||||||
|
# Remove persistent data directory
|
||||||
|
if [ -d "private" ]; then
|
||||||
|
rm -rf private
|
||||||
|
echo "✓ Persistent data directory removed"
|
||||||
|
else
|
||||||
|
echo "✓ No persistent data directory found"
|
||||||
|
fi
|
||||||
|
|
||||||
|
else
|
||||||
|
echo ""
|
||||||
|
echo "Step 3: Preserving persistent data and Docker image..."
|
||||||
|
echo "✓ Persistent data preserved in: private/"
|
||||||
|
echo "✓ Docker image 'argus-bind9:latest' preserved"
|
||||||
|
echo ""
|
||||||
|
echo "To perform full cleanup including persistent data and image, run:"
|
||||||
|
echo " $0 --full"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "=== Cleanup Summary ==="
|
||||||
|
echo "✓ Containers stopped and removed"
|
||||||
|
echo "✓ Docker networks cleaned"
|
||||||
|
|
||||||
|
if [ "$FULL_CLEANUP" = true ]; then
|
||||||
|
echo "✓ Persistent data removed"
|
||||||
|
echo ""
|
||||||
|
echo "Full cleanup completed! Test environment completely removed."
|
||||||
|
else
|
||||||
|
echo "✓ Persistent data preserved"
|
||||||
|
echo "✓ Docker image preserved"
|
||||||
|
echo ""
|
||||||
|
echo "Basic cleanup completed! Run './01_start_container.sh' to restart testing."
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Test environment cleanup finished."
|
5
src/log/.gitignore
vendored
Normal file
5
src/log/.gitignore
vendored
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
|
||||||
|
private/
|
||||||
|
|
||||||
|
|
||||||
|
images/
|
@ -0,0 +1,8 @@
|
|||||||
|
|
||||||
|
测试log模块开发
|
||||||
|
|
||||||
|
elasticsearch: 部署镜像构建及启动脚本(解决账号问题、挂载目录、使用supervisor守护)
|
||||||
|
kibana: 镜像构建
|
||||||
|
fluent-bit: 安装包,脚本准备, 交付给大鹏统一组织客户端侧安装流程
|
||||||
|
init: EK初始化脚本:数据视图创建脚本等
|
||||||
|
|
56
src/log/elasticsearch/build/Dockerfile
Normal file
56
src/log/elasticsearch/build/Dockerfile
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
FROM docker.elastic.co/elasticsearch/elasticsearch:8.13.4
|
||||||
|
|
||||||
|
# 切换到 root 用户进行系统级安装
|
||||||
|
USER root
|
||||||
|
|
||||||
|
# 修改elasticsearch用户的UID和GID
|
||||||
|
RUN usermod -u 2133 elasticsearch && \
|
||||||
|
groupmod -g 2015 elasticsearch && \
|
||||||
|
chown -R elasticsearch:elasticsearch /usr/share/elasticsearch
|
||||||
|
|
||||||
|
# 设置构建参数
|
||||||
|
ARG USE_INTRANET=false
|
||||||
|
|
||||||
|
# 配置内网 apt 源 (如果指定了内网选项)
|
||||||
|
RUN if [ "$USE_INTRANET" = "true" ]; then \
|
||||||
|
echo "Configuring intranet apt sources..." && \
|
||||||
|
cp /etc/apt/sources.list /etc/apt/sources.list.bak && \
|
||||||
|
echo "deb [trusted=yes] http://10.68.64.1/ubuntu2204/ jammy main" > /etc/apt/sources.list && \
|
||||||
|
echo 'Acquire::https::Verify-Peer "false";' > /etc/apt/apt.conf.d/99disable-ssl-check && \
|
||||||
|
echo 'Acquire::https::Verify-Host "false";' >> /etc/apt/apt.conf.d/99disable-ssl-check; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 安装 supervisor, net-tools, vim
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y supervisor net-tools inetutils-ping vim && \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# 配置部署时使用的apt源
|
||||||
|
RUN if [ "$USE_INTRANET" = "true" ]; then \
|
||||||
|
echo "deb [trusted=yes] https://10.92.132.52/mirrors/ubuntu2204/ jammy main" > /etc/apt/sources.list; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 创建 supervisor 日志目录
|
||||||
|
RUN mkdir -p /var/log/supervisor
|
||||||
|
|
||||||
|
|
||||||
|
# 复制 supervisor 配置文件
|
||||||
|
COPY src/log/elasticsearch/build/supervisord.conf /etc/supervisor/conf.d/supervisord.conf
|
||||||
|
|
||||||
|
# 复制启动脚本
|
||||||
|
COPY src/log/elasticsearch/build/start-es-supervised.sh /usr/local/bin/start-es-supervised.sh
|
||||||
|
RUN chmod +x /usr/local/bin/start-es-supervised.sh
|
||||||
|
|
||||||
|
# 复制DNS监控脚本
|
||||||
|
COPY src/log/elasticsearch/build/dns-monitor.sh /usr/local/bin/dns-monitor.sh
|
||||||
|
RUN chmod +x /usr/local/bin/dns-monitor.sh
|
||||||
|
|
||||||
|
# 保持 root 用户,由 supervisor 管理用户切换
|
||||||
|
USER root
|
||||||
|
|
||||||
|
# 暴露端口
|
||||||
|
EXPOSE 9200 9300
|
||||||
|
|
||||||
|
# 使用 supervisor 作为入口点
|
||||||
|
CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
|
1
src/log/elasticsearch/build/dns-monitor.sh
Symbolic link
1
src/log/elasticsearch/build/dns-monitor.sh
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
../../../bind/build/dns-monitor.sh
|
32
src/log/elasticsearch/build/start-es-supervised.sh
Normal file
32
src/log/elasticsearch/build/start-es-supervised.sh
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
echo "[INFO] Starting Elasticsearch under supervisor..."
|
||||||
|
|
||||||
|
# 创建数据目录并设置权限(如果不存在)
|
||||||
|
mkdir -p /private/argus/log/elasticsearch
|
||||||
|
|
||||||
|
# 创建软链接到Elasticsearch预期的数据目录
|
||||||
|
if [ -L /usr/share/elasticsearch/data ]; then
|
||||||
|
rm /usr/share/elasticsearch/data
|
||||||
|
elif [ -d /usr/share/elasticsearch/data ]; then
|
||||||
|
rm -rf /usr/share/elasticsearch/data
|
||||||
|
fi
|
||||||
|
|
||||||
|
ln -sf /private/argus/log/elasticsearch /usr/share/elasticsearch/data
|
||||||
|
|
||||||
|
# 记录容器ip地址
|
||||||
|
DOMAIN=es.log.argus.com
|
||||||
|
IP=`ifconfig | grep -A 1 eth0 | grep inet | awk '{print $2}'`
|
||||||
|
echo current IP: ${IP}
|
||||||
|
echo ${IP} > /private/argus/etc/${DOMAIN}
|
||||||
|
|
||||||
|
echo "[INFO] Data directory linked: /usr/share/elasticsearch/data -> /private/argus/log/elasticsearch"
|
||||||
|
|
||||||
|
# 设置环境变量(ES配置通过docker-compose传递)
|
||||||
|
export ES_JAVA_OPTS="${ES_JAVA_OPTS:-"-Xms512m -Xmx512m"}"
|
||||||
|
|
||||||
|
echo "[INFO] Starting Elasticsearch process..."
|
||||||
|
|
||||||
|
# 启动原始的Elasticsearch entrypoint
|
||||||
|
exec /usr/local/bin/docker-entrypoint.sh elasticsearch
|
39
src/log/elasticsearch/build/supervisord.conf
Normal file
39
src/log/elasticsearch/build/supervisord.conf
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
[supervisord]
|
||||||
|
nodaemon=true
|
||||||
|
logfile=/var/log/supervisor/supervisord.log
|
||||||
|
pidfile=/var/run/supervisord.pid
|
||||||
|
user=root
|
||||||
|
|
||||||
|
[program:elasticsearch]
|
||||||
|
command=/usr/local/bin/start-es-supervised.sh
|
||||||
|
user=elasticsearch
|
||||||
|
stdout_logfile=/var/log/supervisor/elasticsearch.log
|
||||||
|
stderr_logfile=/var/log/supervisor/elasticsearch_error.log
|
||||||
|
autorestart=true
|
||||||
|
startretries=3
|
||||||
|
startsecs=30
|
||||||
|
stopwaitsecs=30
|
||||||
|
killasgroup=true
|
||||||
|
stopasgroup=true
|
||||||
|
|
||||||
|
[program:dns-monitor]
|
||||||
|
command=/usr/local/bin/dns-monitor.sh
|
||||||
|
user=root
|
||||||
|
stdout_logfile=/var/log/supervisor/dns-monitor.log
|
||||||
|
stderr_logfile=/var/log/supervisor/dns-monitor_error.log
|
||||||
|
autorestart=true
|
||||||
|
startretries=3
|
||||||
|
startsecs=5
|
||||||
|
stopwaitsecs=10
|
||||||
|
killasgroup=true
|
||||||
|
stopasgroup=true
|
||||||
|
|
||||||
|
[unix_http_server]
|
||||||
|
file=/var/run/supervisor.sock
|
||||||
|
chmod=0700
|
||||||
|
|
||||||
|
[supervisorctl]
|
||||||
|
serverurl=unix:///var/run/supervisor.sock
|
||||||
|
|
||||||
|
[rpcinterface:supervisor]
|
||||||
|
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
|
37
src/log/fluent-bit/build/etc/fluent-bit.conf
Normal file
37
src/log/fluent-bit/build/etc/fluent-bit.conf
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
[SERVICE]
|
||||||
|
Daemon Off
|
||||||
|
Parsers_File parsers.conf
|
||||||
|
HTTP_Server On
|
||||||
|
HTTP_Listen 0.0.0.0
|
||||||
|
HTTP_Port 2020
|
||||||
|
storage.path /buffers
|
||||||
|
storage.sync normal
|
||||||
|
storage.checksum on
|
||||||
|
storage.backlog.mem_limit 128M
|
||||||
|
# 备注:该镜像默认未开启 Hot Reload,修改配置后请重启容器。
|
||||||
|
|
||||||
|
@INCLUDE inputs.d/*.conf
|
||||||
|
|
||||||
|
[FILTER]
|
||||||
|
Name parser
|
||||||
|
Match app.*
|
||||||
|
Key_Name log
|
||||||
|
Parser timestamp_parser
|
||||||
|
Reserve_Data On
|
||||||
|
Preserve_Key On
|
||||||
|
Unescape_Key On
|
||||||
|
|
||||||
|
[FILTER]
|
||||||
|
Name record_modifier
|
||||||
|
Match *
|
||||||
|
Record cluster ${CLUSTER}
|
||||||
|
Record rack ${RACK}
|
||||||
|
Record host ${HOSTNAME}
|
||||||
|
|
||||||
|
[FILTER]
|
||||||
|
Name lua
|
||||||
|
Match app.*
|
||||||
|
script inject_labels.lua
|
||||||
|
call add_labels
|
||||||
|
|
||||||
|
@INCLUDE outputs.d/*.conf
|
15
src/log/fluent-bit/build/etc/inject_labels.lua
Normal file
15
src/log/fluent-bit/build/etc/inject_labels.lua
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
function add_labels(tag, ts, record)
|
||||||
|
record["job_id"] = os.getenv("FB_JOB_ID") or record["job_id"] or "unknown"
|
||||||
|
record["user"] = os.getenv("FB_USER") or record["user"] or "unknown"
|
||||||
|
record["model"] = os.getenv("FB_MODEL") or record["model"] or "unknown"
|
||||||
|
record["gpu_id"] = os.getenv("FB_GPU_ID") or record["gpu_id"] or "na"
|
||||||
|
local p = record["log_path"] or ""
|
||||||
|
if string.find(p, "/logs/infer/") then
|
||||||
|
record["role"] = "infer"
|
||||||
|
elseif string.find(p, "/logs/train/") then
|
||||||
|
record["role"] = "train"
|
||||||
|
else
|
||||||
|
record["role"] = record["role"] or "app"
|
||||||
|
end
|
||||||
|
return 1, ts, record
|
||||||
|
end
|
10
src/log/fluent-bit/build/etc/inputs.d/10-train.conf
Normal file
10
src/log/fluent-bit/build/etc/inputs.d/10-train.conf
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
[INPUT]
|
||||||
|
Name tail
|
||||||
|
Path /logs/train/*.log
|
||||||
|
Tag app.train
|
||||||
|
Path_Key log_path
|
||||||
|
Refresh_Interval 5
|
||||||
|
DB /buffers/train.db
|
||||||
|
Skip_Long_Lines On
|
||||||
|
storage.type filesystem
|
||||||
|
multiline.parser python,go,java
|
10
src/log/fluent-bit/build/etc/inputs.d/20-infer.conf
Normal file
10
src/log/fluent-bit/build/etc/inputs.d/20-infer.conf
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
[INPUT]
|
||||||
|
Name tail
|
||||||
|
Path /logs/infer/*.log
|
||||||
|
Tag app.infer
|
||||||
|
Path_Key log_path
|
||||||
|
Refresh_Interval 5
|
||||||
|
DB /buffers/infer.db
|
||||||
|
Skip_Long_Lines On
|
||||||
|
storage.type filesystem
|
||||||
|
multiline.parser python,go,java
|
24
src/log/fluent-bit/build/etc/outputs.d/10-es.conf
Normal file
24
src/log/fluent-bit/build/etc/outputs.d/10-es.conf
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
# 重要:使用 Logstash_Format + Logstash_Prefix,生成 train-*/infer-* 索引
|
||||||
|
[OUTPUT]
|
||||||
|
Name es
|
||||||
|
Match app.train
|
||||||
|
Host ${ES_HOST}
|
||||||
|
Port ${ES_PORT}
|
||||||
|
Logstash_Format On
|
||||||
|
Logstash_Prefix train
|
||||||
|
Replace_Dots On
|
||||||
|
Generate_ID On
|
||||||
|
Retry_Limit False
|
||||||
|
Suppress_Type_Name On
|
||||||
|
|
||||||
|
[OUTPUT]
|
||||||
|
Name es
|
||||||
|
Match app.infer
|
||||||
|
Host ${ES_HOST}
|
||||||
|
Port ${ES_PORT}
|
||||||
|
Logstash_Format On
|
||||||
|
Logstash_Prefix infer
|
||||||
|
Replace_Dots On
|
||||||
|
Generate_ID On
|
||||||
|
Retry_Limit False
|
||||||
|
Suppress_Type_Name On
|
27
src/log/fluent-bit/build/etc/parsers.conf
Normal file
27
src/log/fluent-bit/build/etc/parsers.conf
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
[MULTILINE_PARSER]
|
||||||
|
Name python
|
||||||
|
Type regex
|
||||||
|
Flush 2
|
||||||
|
Rule "start_state" "/^\d{4}-\d{2}-\d{2}[\sT]/" "cont"
|
||||||
|
Rule "cont" "/^\s+|^Traceback|^\tat\s+/" "cont"
|
||||||
|
|
||||||
|
[MULTILINE_PARSER]
|
||||||
|
Name go
|
||||||
|
Type regex
|
||||||
|
Flush 2
|
||||||
|
Rule "start_state" "/^[0-9]{4}\/[0-9]{2}\/[0-9]{2}/" "cont"
|
||||||
|
Rule "cont" "/^\s+|^\t/" "cont"
|
||||||
|
|
||||||
|
[MULTILINE_PARSER]
|
||||||
|
Name java
|
||||||
|
Type regex
|
||||||
|
Flush 2
|
||||||
|
Rule "start_state" "/^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}/" "cont"
|
||||||
|
Rule "cont" "/^\s+at\s+|^\t.../" "cont"
|
||||||
|
|
||||||
|
[PARSER]
|
||||||
|
Name timestamp_parser
|
||||||
|
Format regex
|
||||||
|
Regex ^(?<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\s+(?<level>\w+)\s+(?<message>.*)$
|
||||||
|
Time_Key timestamp
|
||||||
|
Time_Format %Y-%m-%d %H:%M:%S
|
BIN
src/log/fluent-bit/build/packages/fluent-bit_3.1.9_amd64.deb
Normal file
BIN
src/log/fluent-bit/build/packages/fluent-bit_3.1.9_amd64.deb
Normal file
Binary file not shown.
47
src/log/fluent-bit/build/start-fluent-bit.sh
Executable file
47
src/log/fluent-bit/build/start-fluent-bit.sh
Executable file
@ -0,0 +1,47 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
echo "[INFO] Starting Fluent Bit setup in Ubuntu container..."
|
||||||
|
|
||||||
|
# 安装必要的工具
|
||||||
|
echo "[INFO] Installing required packages..."
|
||||||
|
export DEBIAN_FRONTEND=noninteractive
|
||||||
|
apt-get update -qq
|
||||||
|
apt-get install -y -qq curl
|
||||||
|
|
||||||
|
# 解压bundle到/tmp
|
||||||
|
echo "[INFO] Extracting fluent-bit bundle..."
|
||||||
|
cp -r /private/etc /tmp
|
||||||
|
cp -r /private/packages /tmp
|
||||||
|
cd /tmp
|
||||||
|
|
||||||
|
# 安装 Fluent Bit 从 deb 包
|
||||||
|
echo "[INFO] Installing Fluent Bit from deb package..."
|
||||||
|
dpkg -i /tmp/packages/fluent-bit_3.1.9_amd64.deb || true
|
||||||
|
apt-get install -f -y -qq # 解决依赖问题
|
||||||
|
|
||||||
|
# 验证 Fluent Bit 可以运行
|
||||||
|
echo "[INFO] Fluent Bit version:"
|
||||||
|
/opt/fluent-bit/bin/fluent-bit --version
|
||||||
|
|
||||||
|
# 创建配置目录
|
||||||
|
mkdir -p /etc/fluent-bit
|
||||||
|
cp -r /tmp/etc/* /etc/fluent-bit/
|
||||||
|
|
||||||
|
# 创建日志和缓冲区目录
|
||||||
|
mkdir -p /logs/train /logs/infer /buffers
|
||||||
|
chmod 755 /logs/train /logs/infer /buffers
|
||||||
|
|
||||||
|
# 等待 Elasticsearch 就绪
|
||||||
|
echo "[INFO] Waiting for Elasticsearch to be ready..."
|
||||||
|
while ! curl -fs http://${ES_HOST}:${ES_PORT}/_cluster/health >/dev/null 2>&1; do
|
||||||
|
echo " Waiting for ES at ${ES_HOST}:${ES_PORT}..."
|
||||||
|
sleep 5
|
||||||
|
done
|
||||||
|
echo "[INFO] Elasticsearch is ready"
|
||||||
|
|
||||||
|
# 启动 Fluent Bit
|
||||||
|
echo "[INFO] Starting Fluent Bit with configuration from /etc/fluent-bit/"
|
||||||
|
echo "[INFO] Command: /opt/fluent-bit/bin/fluent-bit --config=/etc/fluent-bit/fluent-bit.conf"
|
||||||
|
exec /opt/fluent-bit/bin/fluent-bit \
|
||||||
|
--config=/etc/fluent-bit/fluent-bit.conf
|
60
src/log/kibana/build/Dockerfile
Normal file
60
src/log/kibana/build/Dockerfile
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
FROM docker.elastic.co/kibana/kibana:8.13.4
|
||||||
|
|
||||||
|
# 切换到 root 用户进行系统级安装
|
||||||
|
USER root
|
||||||
|
|
||||||
|
# 修改kibana用户的UID和GID
|
||||||
|
RUN usermod -u 2133 kibana && \
|
||||||
|
groupmod -g 2015 kibana && \
|
||||||
|
chown -R kibana:kibana /usr/share/kibana
|
||||||
|
|
||||||
|
# 设置构建参数
|
||||||
|
ARG USE_INTRANET=false
|
||||||
|
|
||||||
|
# 配置内网 apt 源 (如果指定了内网选项)
|
||||||
|
RUN if [ "$USE_INTRANET" = "true" ]; then \
|
||||||
|
echo "Configuring intranet apt sources..." && \
|
||||||
|
cp /etc/apt/sources.list /etc/apt/sources.list.bak && \
|
||||||
|
echo "deb [trusted=yes] http://10.68.64.1/ubuntu2204/ jammy main" > /etc/apt/sources.list && \
|
||||||
|
echo 'Acquire::https::Verify-Peer "false";' > /etc/apt/apt.conf.d/99disable-ssl-check && \
|
||||||
|
echo 'Acquire::https::Verify-Host "false";' >> /etc/apt/apt.conf.d/99disable-ssl-check; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 安装 supervisor, net-tools, vim
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y supervisor net-tools inetutils-ping vim && \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# 配置部署时使用的apt源
|
||||||
|
RUN if [ "$USE_INTRANET" = "true" ]; then \
|
||||||
|
echo "deb [trusted=yes] https://10.92.132.52/mirrors/ubuntu2204/ jammy main" > /etc/apt/sources.list; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 创建 supervisor 日志目录
|
||||||
|
RUN mkdir -p /var/log/supervisor
|
||||||
|
|
||||||
|
|
||||||
|
# 复制 supervisor 配置文件
|
||||||
|
COPY src/log/kibana/build/supervisord.conf /etc/supervisor/conf.d/supervisord.conf
|
||||||
|
|
||||||
|
# 复制启动脚本
|
||||||
|
COPY src/log/kibana/build/start-kibana-supervised.sh /usr/local/bin/start-kibana-supervised.sh
|
||||||
|
COPY src/log/kibana/build/kibana-post-start.sh /usr/local/bin/kibana-post-start.sh
|
||||||
|
RUN chmod +x /usr/local/bin/start-kibana-supervised.sh /usr/local/bin/kibana-post-start.sh
|
||||||
|
|
||||||
|
# 复制DNS监控脚本
|
||||||
|
COPY src/log/kibana/build/dns-monitor.sh /usr/local/bin/dns-monitor.sh
|
||||||
|
RUN chmod +x /usr/local/bin/dns-monitor.sh
|
||||||
|
|
||||||
|
# kibana需要用到 /root/.config/puppeteer 路径
|
||||||
|
RUN chmod 777 /root
|
||||||
|
|
||||||
|
# 保持 root 用户,由 supervisor 管理用户切换
|
||||||
|
USER root
|
||||||
|
|
||||||
|
# 暴露端口
|
||||||
|
EXPOSE 5601
|
||||||
|
|
||||||
|
# 使用 supervisor 作为入口点
|
||||||
|
CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
|
1
src/log/kibana/build/dns-monitor.sh
Symbolic link
1
src/log/kibana/build/dns-monitor.sh
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
../../../bind/build/dns-monitor.sh
|
146
src/log/kibana/build/kibana-post-start.sh
Normal file
146
src/log/kibana/build/kibana-post-start.sh
Normal file
@ -0,0 +1,146 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
ES_HOST="${ELASTICSEARCH_HOSTS:-http://es:9200}"
|
||||||
|
KB_HOST="http://localhost:5601"
|
||||||
|
|
||||||
|
echo "[INFO] Starting Kibana post-start configuration..."
|
||||||
|
|
||||||
|
# 等待 Elasticsearch 可用
|
||||||
|
wait_for_elasticsearch() {
|
||||||
|
echo "[INFO] Waiting for Elasticsearch..."
|
||||||
|
local max_attempts=60
|
||||||
|
local attempt=1
|
||||||
|
|
||||||
|
while [ $attempt -le $max_attempts ]; do
|
||||||
|
if curl -fs "$ES_HOST/_cluster/health" >/dev/null 2>&1; then
|
||||||
|
echo "[OK] Elasticsearch is available"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
echo " Waiting for ES... ($attempt/$max_attempts)"
|
||||||
|
sleep 5
|
||||||
|
((attempt++))
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "[ERROR] Elasticsearch timeout"
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# 等待 Kibana 可用
|
||||||
|
wait_for_kibana() {
|
||||||
|
echo "[INFO] Waiting for Kibana..."
|
||||||
|
local max_attempts=120
|
||||||
|
local attempt=1
|
||||||
|
|
||||||
|
while [ $attempt -le $max_attempts ]; do
|
||||||
|
if curl -fs "$KB_HOST/api/status" >/dev/null 2>&1; then
|
||||||
|
local status=$(curl -s "$KB_HOST/api/status" | grep -o '"level":"available"' || echo "")
|
||||||
|
if [ -n "$status" ]; then
|
||||||
|
echo "[OK] Kibana is available"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
echo " Waiting for Kibana... ($attempt/$max_attempts, status: $status)"
|
||||||
|
else
|
||||||
|
echo " Waiting for Kibana... ($attempt/$max_attempts, connection failed)"
|
||||||
|
fi
|
||||||
|
sleep 5
|
||||||
|
((attempt++))
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "[ERROR] Kibana timeout"
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# 幂等设置索引副本数为0
|
||||||
|
fix_replicas_idempotent() {
|
||||||
|
echo "[INFO] Checking and fixing index replicas..."
|
||||||
|
|
||||||
|
# 获取所有 train-* 和 infer-* 索引
|
||||||
|
local indices=$(curl -s "$ES_HOST/_cat/indices/train-*,infer-*?h=index" 2>/dev/null || echo "")
|
||||||
|
|
||||||
|
if [ -z "$indices" ]; then
|
||||||
|
echo "[INFO] No train-*/infer-* indices found, skipping replica adjustment"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
for idx in $indices; do
|
||||||
|
# 检查当前副本数
|
||||||
|
local current_replicas=$(curl -s "$ES_HOST/$idx/_settings" | grep -o '"number_of_replicas":"[^"]*"' | cut -d'"' -f4 || echo "")
|
||||||
|
|
||||||
|
if [ "$current_replicas" != "0" ]; then
|
||||||
|
echo "[INFO] Setting replicas to 0 for index: $idx (current: $current_replicas)"
|
||||||
|
curl -fsS -X PUT "$ES_HOST/$idx/_settings" \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"index":{"number_of_replicas":0}}' >/dev/null || {
|
||||||
|
echo "[WARN] Failed to set replicas for $idx"
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
echo "[OK] Updated replicas for $idx"
|
||||||
|
else
|
||||||
|
echo "[INFO] Index $idx already has 0 replicas, skipping"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
# 幂等创建数据视图
|
||||||
|
create_data_views_idempotent() {
|
||||||
|
echo "[INFO] Checking and creating data views..."
|
||||||
|
|
||||||
|
# 检查是否存在匹配的索引
|
||||||
|
local train_indices=$(curl -s "$ES_HOST/_cat/indices/train-*?h=index" 2>/dev/null | wc -l || echo "0")
|
||||||
|
local infer_indices=$(curl -s "$ES_HOST/_cat/indices/infer-*?h=index" 2>/dev/null | wc -l || echo "0")
|
||||||
|
|
||||||
|
# 创建 train 数据视图
|
||||||
|
if [ "$train_indices" -gt 0 ]; then
|
||||||
|
# 检查数据视图是否已存在
|
||||||
|
local train_exists=$(curl -s "$KB_HOST/api/data_views" -H 'kbn-xsrf: true' 2>/dev/null | grep '"title":"train-\*"' | wc -l )
|
||||||
|
|
||||||
|
if [ "$train_exists" -eq 0 ]; then
|
||||||
|
echo "[INFO] Creating data view for train-* indices"
|
||||||
|
curl -fsS -X POST "$KB_HOST/api/data_views/data_view" \
|
||||||
|
-H 'kbn-xsrf: true' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"data_view":{"name":"train","title":"train-*","timeFieldName":"@timestamp"}}' \
|
||||||
|
>/dev/null && echo "[OK] Created train data view" || echo "[WARN] Failed to create train data view"
|
||||||
|
else
|
||||||
|
echo "[INFO] Train data view already exists, skipping"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "[INFO] No train-* indices found, skipping train data view creation"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 创建 infer 数据视图
|
||||||
|
if [ "$infer_indices" -gt 0 ]; then
|
||||||
|
# 检查数据视图是否已存在
|
||||||
|
local infer_exists=$(curl -s "$KB_HOST/api/data_views" -H 'kbn-xsrf: true' 2>/dev/null | grep '"title":"infer-\*"' | wc -l )
|
||||||
|
|
||||||
|
if [ "$infer_exists" -eq 0 ]; then
|
||||||
|
echo "[INFO] Creating data view for infer-* indices"
|
||||||
|
curl -fsS -X POST "$KB_HOST/api/data_views/data_view" \
|
||||||
|
-H 'kbn-xsrf: true' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"data_view":{"name":"infer","title":"infer-*","timeFieldName":"@timestamp"}}' \
|
||||||
|
>/dev/null && echo "[OK] Created infer data view" || echo "[WARN] Failed to create infer data view"
|
||||||
|
else
|
||||||
|
echo "[INFO] Infer data view already exists, skipping"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "[INFO] No infer-* indices found, skipping infer data view creation"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# 主逻辑
|
||||||
|
main() {
|
||||||
|
# 等待服务可用
|
||||||
|
wait_for_elasticsearch || exit 1
|
||||||
|
wait_for_kibana || exit 1
|
||||||
|
|
||||||
|
# 执行幂等配置
|
||||||
|
fix_replicas_idempotent
|
||||||
|
create_data_views_idempotent
|
||||||
|
|
||||||
|
echo "[INFO] Kibana post-start configuration completed"
|
||||||
|
}
|
||||||
|
|
||||||
|
# 运行主逻辑
|
||||||
|
main
|
37
src/log/kibana/build/start-kibana-supervised.sh
Normal file
37
src/log/kibana/build/start-kibana-supervised.sh
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
echo "[INFO] Starting Kibana under supervisor..."
|
||||||
|
|
||||||
|
mkdir -p /private/argus/log/kibana
|
||||||
|
|
||||||
|
# 创建软链接到Kibana预期的数据目录
|
||||||
|
if [ -L /usr/share/kibana/data ]; then
|
||||||
|
rm /usr/share/kibana/data
|
||||||
|
elif [ -d /usr/share/kibana/data ]; then
|
||||||
|
rm -rf /usr/share/kibana/data
|
||||||
|
fi
|
||||||
|
|
||||||
|
ln -sf /private/argus/log/kibana /usr/share/kibana/data
|
||||||
|
|
||||||
|
echo "[INFO] Data directory linked: /usr/share/kibana/data -> /private/argus/log/kibana"
|
||||||
|
|
||||||
|
# 记录容器ip地址
|
||||||
|
DOMAIN=kibana.log.argus.com
|
||||||
|
IP=`ifconfig | grep -A 1 eth0 | grep inet | awk '{print $2}'`
|
||||||
|
echo current IP: ${IP}
|
||||||
|
echo ${IP} > /private/argus/etc/${DOMAIN}
|
||||||
|
|
||||||
|
# 设置环境变量
|
||||||
|
export ELASTICSEARCH_HOSTS="${ELASTICSEARCH_HOSTS:-"http://es:9200"}"
|
||||||
|
|
||||||
|
echo "[INFO] Connecting to Elasticsearch at: $ELASTICSEARCH_HOSTS"
|
||||||
|
|
||||||
|
# 启动后台配置任务
|
||||||
|
echo "[INFO] Starting background post-start configuration..."
|
||||||
|
/usr/local/bin/kibana-post-start.sh &
|
||||||
|
|
||||||
|
echo "[INFO] Starting Kibana process..."
|
||||||
|
|
||||||
|
# 启动原始的Kibana entrypoint
|
||||||
|
exec /usr/local/bin/kibana-docker
|
39
src/log/kibana/build/supervisord.conf
Normal file
39
src/log/kibana/build/supervisord.conf
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
[supervisord]
|
||||||
|
nodaemon=true
|
||||||
|
logfile=/var/log/supervisor/supervisord.log
|
||||||
|
pidfile=/var/run/supervisord.pid
|
||||||
|
user=root
|
||||||
|
|
||||||
|
[program:kibana]
|
||||||
|
command=/usr/local/bin/start-kibana-supervised.sh
|
||||||
|
user=kibana
|
||||||
|
stdout_logfile=/var/log/supervisor/kibana.log
|
||||||
|
stderr_logfile=/var/log/supervisor/kibana_error.log
|
||||||
|
autorestart=true
|
||||||
|
startretries=3
|
||||||
|
startsecs=30
|
||||||
|
stopwaitsecs=30
|
||||||
|
killasgroup=true
|
||||||
|
stopasgroup=true
|
||||||
|
|
||||||
|
[program:dns-monitor]
|
||||||
|
command=/usr/local/bin/dns-monitor.sh
|
||||||
|
user=root
|
||||||
|
stdout_logfile=/var/log/supervisor/dns-monitor.log
|
||||||
|
stderr_logfile=/var/log/supervisor/dns-monitor_error.log
|
||||||
|
autorestart=true
|
||||||
|
startretries=3
|
||||||
|
startsecs=5
|
||||||
|
stopwaitsecs=10
|
||||||
|
killasgroup=true
|
||||||
|
stopasgroup=true
|
||||||
|
|
||||||
|
[unix_http_server]
|
||||||
|
file=/var/run/supervisor.sock
|
||||||
|
chmod=0700
|
||||||
|
|
||||||
|
[supervisorctl]
|
||||||
|
serverurl=unix:///var/run/supervisor.sock
|
||||||
|
|
||||||
|
[rpcinterface:supervisor]
|
||||||
|
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
|
85
src/log/tests/docker-compose.yml
Normal file
85
src/log/tests/docker-compose.yml
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
version: "3.8"
|
||||||
|
services:
|
||||||
|
es:
|
||||||
|
build:
|
||||||
|
context: ../elasticsearch/build
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
image: argus-elasticsearch:latest
|
||||||
|
environment:
|
||||||
|
- discovery.type=single-node
|
||||||
|
- xpack.security.enabled=false
|
||||||
|
- ES_JAVA_OPTS=-Xms512m -Xmx512m
|
||||||
|
volumes:
|
||||||
|
- ./private/argus/:/private/argus/
|
||||||
|
ports: ["9200:9200"]
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "curl -fs http://localhost:9200 >/dev/null || exit 1"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 30
|
||||||
|
|
||||||
|
kibana:
|
||||||
|
build:
|
||||||
|
context: ../kibana/build
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
image: argus-kibana:latest
|
||||||
|
environment:
|
||||||
|
- ELASTICSEARCH_HOSTS=http://es.log.argus.com:9200
|
||||||
|
volumes:
|
||||||
|
- ./private/argus/:/private/argus/
|
||||||
|
ports: ["5601:5601"]
|
||||||
|
depends_on:
|
||||||
|
es:
|
||||||
|
condition: service_healthy
|
||||||
|
|
||||||
|
fluent-bit-host01:
|
||||||
|
image: ubuntu:22.04
|
||||||
|
environment:
|
||||||
|
- CLUSTER=local
|
||||||
|
- RACK=dev
|
||||||
|
- HOSTNAME=host01
|
||||||
|
- ES_HOST=es
|
||||||
|
- ES_PORT=9200
|
||||||
|
volumes:
|
||||||
|
- ../fluent-bit/build:/private/
|
||||||
|
ports: ["2020:2020"]
|
||||||
|
depends_on:
|
||||||
|
es:
|
||||||
|
condition: service_healthy
|
||||||
|
command: /private/start-fluent-bit.sh
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "curl -fs http://localhost:2020/api/v2/metrics >/dev/null || exit 1"]
|
||||||
|
interval: 15s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 30
|
||||||
|
|
||||||
|
fluent-bit-host02:
|
||||||
|
image: ubuntu:22.04
|
||||||
|
environment:
|
||||||
|
- CLUSTER=local
|
||||||
|
- RACK=dev
|
||||||
|
- HOSTNAME=host02
|
||||||
|
- ES_HOST=es
|
||||||
|
- ES_PORT=9200
|
||||||
|
volumes:
|
||||||
|
- ../fluent-bit/build:/private/
|
||||||
|
ports: ["2021:2020"]
|
||||||
|
depends_on:
|
||||||
|
es:
|
||||||
|
condition: service_healthy
|
||||||
|
command: /private/start-fluent-bit.sh
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "curl -fs http://localhost:2020/api/v2/metrics >/dev/null || exit 1"]
|
||||||
|
interval: 15s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 30
|
||||||
|
|
||||||
|
bind9:
|
||||||
|
image: argus-bind9:latest
|
||||||
|
ports:
|
||||||
|
- "53:53/tcp"
|
||||||
|
- "53:53/udp"
|
||||||
|
volumes:
|
||||||
|
- ./private/argus:/private/argus/
|
||||||
|
restart: unless-stopped
|
||||||
|
|
30
src/log/tests/scripts/01_bootstrap.sh
Executable file
30
src/log/tests/scripts/01_bootstrap.sh
Executable file
@ -0,0 +1,30 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
root="$(cd "$(dirname "${BASH_SOURCE[0]}")/../" && pwd)"
|
||||||
|
|
||||||
|
# 创建新的private目录结构 (基于argus目录结构)
|
||||||
|
echo "[INFO] Creating private directory structure for supervisor-based containers..."
|
||||||
|
mkdir -p "$root/private/argus/log/elasticsearch"
|
||||||
|
mkdir -p "$root/private/argus/log/kibana"
|
||||||
|
mkdir -p "$root/private/argus/etc/"
|
||||||
|
|
||||||
|
|
||||||
|
# 设置数据目录权限(ES 和 Kibana 容器都使用 UID 1000)
|
||||||
|
echo "[INFO] Setting permissions for data directories..."
|
||||||
|
sudo chown -R 2133:2015 "$root/private/argus/log/elasticsearch" 2>/dev/null || true
|
||||||
|
sudo chown -R 2133:2015 "$root/private/argus/log/kibana" 2>/dev/null || true
|
||||||
|
sudo chown -R 2133:2015 "$root/private/argus/etc" 2>/dev/null || true
|
||||||
|
|
||||||
|
echo "[INFO] Supervisor-based containers will manage their own scripts and configurations"
|
||||||
|
|
||||||
|
# 检查fluent-bit相关文件是否存在
|
||||||
|
if [[ ! -f "$root/../fluent-bit/fluent-bit-bundle.tar.gz" ]]; then
|
||||||
|
echo "[WARN] fluent-bit/fluent-bit-bundle.tar.gz 不存在,请确保已创建该文件"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ ! -f "$root/../fluent-bit/start-fluent-bit.sh" ]]; then
|
||||||
|
echo "[WARN] fluent-bit/start-fluent-bit.sh 不存在,请确保已创建该启动脚本"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "[OK] 初始化完成: private/argus/log/{elasticsearch,kibana}"
|
||||||
|
echo "[INFO] Fluent-bit files should be in fluent-bit/ directory"
|
10
src/log/tests/scripts/02_up.sh
Executable file
10
src/log/tests/scripts/02_up.sh
Executable file
@ -0,0 +1,10 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
cd "$(dirname "$0")/.."
|
||||||
|
compose_cmd="docker compose"
|
||||||
|
if ! $compose_cmd version >/dev/null 2>&1; then
|
||||||
|
if command -v docker-compose >/dev/null 2>&1; then compose_cmd="docker-compose"; else
|
||||||
|
echo "需要 Docker Compose,请安装后重试" >&2; exit 1; fi
|
||||||
|
fi
|
||||||
|
$compose_cmd -p logging-mvp up -d --remove-orphans
|
||||||
|
echo "[OK] 服务已启动:ES http://localhost:9200 Kibana http://localhost:5601 Fluent-Bit host01 http://localhost:2020 Fluent-Bit host02 http://localhost:2021"
|
31
src/log/tests/scripts/03_send_test_host01.sh
Executable file
31
src/log/tests/scripts/03_send_test_host01.sh
Executable file
@ -0,0 +1,31 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# 获取fluent-bit-host01容器名称
|
||||||
|
container_name="logging-mvp-fluent-bit-host01-1"
|
||||||
|
|
||||||
|
# 检查容器是否存在并运行
|
||||||
|
if ! docker ps | grep -q "$container_name"; then
|
||||||
|
echo "[ERROR] Fluent Bit容器 $container_name 未运行"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 创建日志目录
|
||||||
|
docker exec "$container_name" mkdir -p /logs/train /logs/infer
|
||||||
|
|
||||||
|
# 写入训练日志 (host01)
|
||||||
|
docker exec "$container_name" sh -c "printf '%s INFO [host01] training step=1 loss=1.23 model=bert\n' \"\$(date '+%F %T')\" >> /logs/train/train-demo.log"
|
||||||
|
docker exec "$container_name" sh -c "printf '%s INFO [host01] training step=2 loss=1.15 model=bert\n' \"\$(date '+%F %T')\" >> /logs/train/train-demo.log"
|
||||||
|
|
||||||
|
# 写入推理日志 (host01)
|
||||||
|
docker exec "$container_name" sh -c "printf '%s ERROR [host01] inference failed on batch=1\n' \"\$(date '+%F %T')\" >> /logs/infer/infer-demo.log"
|
||||||
|
docker exec "$container_name" sh -c "cat <<'STACK' >> /logs/infer/infer-demo.log
|
||||||
|
Traceback (most recent call last):
|
||||||
|
File \"inference.py\", line 15, in <module>
|
||||||
|
raise RuntimeError(\"CUDA out of memory on host01\")
|
||||||
|
RuntimeError: CUDA out of memory on host01
|
||||||
|
STACK"
|
||||||
|
|
||||||
|
echo "[OK] 已通过docker exec写入测试日志到 host01 容器内:"
|
||||||
|
echo " - /logs/train/train-demo.log"
|
||||||
|
echo " - /logs/infer/infer-demo.log"
|
27
src/log/tests/scripts/03_send_test_host02.sh
Executable file
27
src/log/tests/scripts/03_send_test_host02.sh
Executable file
@ -0,0 +1,27 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# 获取fluent-bit-host02容器名称
|
||||||
|
container_name="logging-mvp-fluent-bit-host02-1"
|
||||||
|
|
||||||
|
# 检查容器是否存在并运行
|
||||||
|
if ! docker ps | grep -q "$container_name"; then
|
||||||
|
echo "[ERROR] Fluent Bit容器 $container_name 未运行"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 创建日志目录
|
||||||
|
docker exec "$container_name" mkdir -p /logs/train /logs/infer
|
||||||
|
|
||||||
|
# 写入训练日志 (host02)
|
||||||
|
docker exec "$container_name" sh -c "printf '%s INFO [host02] training step=1 loss=1.45 model=gpt\n' \"\$(date '+%F %T')\" >> /logs/train/train-demo.log"
|
||||||
|
docker exec "$container_name" sh -c "printf '%s INFO [host02] training step=2 loss=1.38 model=gpt\n' \"\$(date '+%F %T')\" >> /logs/train/train-demo.log"
|
||||||
|
docker exec "$container_name" sh -c "printf '%s INFO [host02] training step=3 loss=1.32 model=gpt\n' \"\$(date '+%F %T')\" >> /logs/train/train-demo.log"
|
||||||
|
|
||||||
|
# 写入推理日志 (host02)
|
||||||
|
docker exec "$container_name" sh -c "printf '%s WARN [host02] inference slow on batch=5 latency=2.3s\n' \"\$(date '+%F %T')\" >> /logs/infer/infer-demo.log"
|
||||||
|
docker exec "$container_name" sh -c "printf '%s INFO [host02] inference completed batch=6 latency=0.8s\n' \"\$(date '+%F %T')\" >> /logs/infer/infer-demo.log"
|
||||||
|
|
||||||
|
echo "[OK] 已通过docker exec写入测试日志到 host02 容器内:"
|
||||||
|
echo " - /logs/train/train-demo.log"
|
||||||
|
echo " - /logs/infer/infer-demo.log"
|
7
src/log/tests/scripts/04_query_es.sh
Executable file
7
src/log/tests/scripts/04_query_es.sh
Executable file
@ -0,0 +1,7 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
ES="${ES:-http://localhost:9200}"
|
||||||
|
echo "[i] 查询 ES 端点:$ES"
|
||||||
|
curl -fsS "$ES/_cat/indices?v" | egrep 'train-|infer-|logstash' || true
|
||||||
|
printf "train-* 计数:"; curl -fsS "$ES/train-*/_count" | sed -E 's/.*"count":([0-9]+).*/\1/'; echo
|
||||||
|
printf "infer-* 计数:"; curl -fsS "$ES/infer-*/_count" | sed -E 's/.*"count":([0-9]+).*/\1/'; echo
|
21
src/log/tests/scripts/05_down.sh
Executable file
21
src/log/tests/scripts/05_down.sh
Executable file
@ -0,0 +1,21 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
cd "$(dirname "$0")/.."
|
||||||
|
compose_cmd="docker compose"
|
||||||
|
if ! $compose_cmd version >/dev/null 2>&1; then
|
||||||
|
if command -v docker-compose >/dev/null 2>&1; then compose_cmd="docker-compose"; else
|
||||||
|
echo "需要 Docker Compose,请安装后重试" >&2; exit 1; fi
|
||||||
|
fi
|
||||||
|
$compose_cmd -p logging-mvp down
|
||||||
|
echo "[OK] 已停止所有容器"
|
||||||
|
|
||||||
|
# 清理private目录内容
|
||||||
|
echo "[INFO] 清理private目录内容..."
|
||||||
|
cd "$(dirname "$0")/.."
|
||||||
|
if [ -d "private" ]; then
|
||||||
|
# 删除private目录及其所有内容
|
||||||
|
rm -rf private
|
||||||
|
echo "[OK] 已清理private目录"
|
||||||
|
else
|
||||||
|
echo "[INFO] private目录不存在,无需清理"
|
||||||
|
fi
|
208
src/log/tests/scripts/06_dns_test.sh
Executable file
208
src/log/tests/scripts/06_dns_test.sh
Executable file
@ -0,0 +1,208 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
echo "======================================="
|
||||||
|
echo "ARGUS DNS监控功能测试"
|
||||||
|
echo "======================================="
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# 记录测试开始时间
|
||||||
|
test_start_time=$(date +%s)
|
||||||
|
|
||||||
|
# 函数:显示测试步骤
|
||||||
|
show_step() {
|
||||||
|
echo ""
|
||||||
|
echo "🔄 Step $1: $2"
|
||||||
|
echo "----------------------------------------"
|
||||||
|
}
|
||||||
|
|
||||||
|
# 函数:验证步骤结果
|
||||||
|
verify_step() {
|
||||||
|
if [ $? -eq 0 ]; then
|
||||||
|
echo "✅ $1 - SUCCESS"
|
||||||
|
else
|
||||||
|
echo "❌ $1 - FAILED"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# 函数:等待服务就绪
|
||||||
|
wait_for_services() {
|
||||||
|
echo "[INFO] Waiting for services to be ready..."
|
||||||
|
local max_attempts=60
|
||||||
|
local attempt=1
|
||||||
|
|
||||||
|
while [ $attempt -le $max_attempts ]; do
|
||||||
|
if curl -fs http://localhost:9200/_cluster/health >/dev/null 2>&1 && \
|
||||||
|
curl -fs http://localhost:5601/api/status >/dev/null 2>&1; then
|
||||||
|
echo "[OK] Services are ready!"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
echo " Waiting for services... ($attempt/$max_attempts)"
|
||||||
|
sleep 5
|
||||||
|
((attempt++))
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "[ERROR] Services not ready after $max_attempts attempts"
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# 函数:检查容器中的/etc/resolv.conf
|
||||||
|
check_resolv_conf() {
|
||||||
|
local service_name=$1
|
||||||
|
local expected_dns=$2
|
||||||
|
|
||||||
|
echo "[INFO] 检查 $service_name 容器的 /etc/resolv.conf..."
|
||||||
|
|
||||||
|
local resolv_content=$(docker exec "${service_name}" cat /etc/resolv.conf 2>/dev/null || echo "")
|
||||||
|
if echo "$resolv_content" | grep -q "nameserver $expected_dns"; then
|
||||||
|
echo "✅ $service_name resolv.conf contains nameserver $expected_dns"
|
||||||
|
return 0
|
||||||
|
else
|
||||||
|
echo "❌ $service_name resolv.conf does not contain nameserver $expected_dns"
|
||||||
|
echo "实际内容:"
|
||||||
|
echo "$resolv_content"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# 函数:检查DNS监控日志
|
||||||
|
check_dns_monitor_logs() {
|
||||||
|
local service_name=$1
|
||||||
|
|
||||||
|
echo "[INFO] 检查 $service_name 的DNS监控日志..."
|
||||||
|
|
||||||
|
local dns_logs=$(docker exec "$service_name" tail -n 20 /var/log/supervisor/dns-monitor.log 2>/dev/null || echo "")
|
||||||
|
if [ -n "$dns_logs" ]; then
|
||||||
|
echo "✅ $service_name DNS监控日志存在"
|
||||||
|
echo "最近的日志:"
|
||||||
|
echo "$dns_logs"
|
||||||
|
return 0
|
||||||
|
else
|
||||||
|
echo "❌ $service_name DNS监控日志为空或不存在"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# 函数:确保目录结构存在
|
||||||
|
ensure_directories() {
|
||||||
|
echo "[INFO] 确保目录结构存在..."
|
||||||
|
# 确保目录存在
|
||||||
|
mkdir -p ./private/argus/etc/
|
||||||
|
echo "✅ 目录结构准备完成(注:使用真实的update-dns.sh脚本)"
|
||||||
|
}
|
||||||
|
|
||||||
|
# 开始DNS监控测试
|
||||||
|
show_step "1" "Bootstrap - Initialize environment"
|
||||||
|
./scripts/01_bootstrap.sh
|
||||||
|
verify_step "Bootstrap"
|
||||||
|
|
||||||
|
# 确保目录结构
|
||||||
|
ensure_directories
|
||||||
|
|
||||||
|
show_step "2" "Startup - Start all services"
|
||||||
|
./scripts/02_up.sh
|
||||||
|
verify_step "Service startup"
|
||||||
|
|
||||||
|
# 等待服务完全就绪
|
||||||
|
wait_for_services || exit 1
|
||||||
|
|
||||||
|
show_step "3" "Create initial DNS configuration"
|
||||||
|
# 创建初始的DNS配置文件 - 只有一个IP
|
||||||
|
echo "[INFO] 创建初始的dns.conf文件 (8.8.8.8)..."
|
||||||
|
cat > ./private/argus/etc/dns.conf << 'EOF'
|
||||||
|
8.8.8.8
|
||||||
|
EOF
|
||||||
|
|
||||||
|
echo "✅ 初始dns.conf文件创建成功 (8.8.8.8)"
|
||||||
|
verify_step "Initial DNS configuration creation"
|
||||||
|
|
||||||
|
# 等待DNS监控检测到配置文件
|
||||||
|
echo "[INFO] 等待DNS监控检测并处理初始配置..."
|
||||||
|
sleep 15
|
||||||
|
|
||||||
|
show_step "4" "Verify initial DNS configuration processing"
|
||||||
|
# 检查两个容器的DNS监控日志
|
||||||
|
check_dns_monitor_logs "logging-mvp-es-1"
|
||||||
|
verify_step "Elasticsearch DNS monitor logs"
|
||||||
|
|
||||||
|
check_dns_monitor_logs "logging-mvp-kibana-1"
|
||||||
|
verify_step "Kibana DNS monitor logs"
|
||||||
|
|
||||||
|
# 检查resolv.conf是否包含新的DNS服务器
|
||||||
|
check_resolv_conf "logging-mvp-es-1" "8.8.8.8"
|
||||||
|
verify_step "Elasticsearch resolv.conf initial check"
|
||||||
|
|
||||||
|
check_resolv_conf "logging-mvp-kibana-1" "8.8.8.8"
|
||||||
|
verify_step "Kibana resolv.conf initial check"
|
||||||
|
|
||||||
|
show_step "5" "Modify DNS configuration and test auto-update"
|
||||||
|
# 修改DNS配置文件 - 改为另一个IP
|
||||||
|
echo "[INFO] 修改dns.conf文件,改为1.1.1.1..."
|
||||||
|
cat > ./private/argus/etc/dns.conf << 'EOF'
|
||||||
|
1.1.1.1
|
||||||
|
EOF
|
||||||
|
|
||||||
|
echo "✅ dns.conf文件更新成功,改为1.1.1.1"
|
||||||
|
|
||||||
|
# 等待DNS监控检测到配置变化
|
||||||
|
echo "[INFO] 等待DNS监控检测配置变化并执行更新..."
|
||||||
|
sleep 15
|
||||||
|
|
||||||
|
show_step "6" "Verify DNS configuration auto-update"
|
||||||
|
# 再次检查DNS监控日志,应该看到配置变化检测
|
||||||
|
echo "[INFO] 检查DNS监控是否检测到配置变化..."
|
||||||
|
|
||||||
|
# 检查elasticsearch容器
|
||||||
|
echo "[INFO] 检查elasticsearch容器的DNS监控日志(最近30行)..."
|
||||||
|
docker exec logging-mvp-es-1 tail -n 30 /var/log/supervisor/dns-monitor.log || true
|
||||||
|
|
||||||
|
# 检查kibana容器
|
||||||
|
echo "[INFO] 检查kibana容器的DNS监控日志(最近30行)..."
|
||||||
|
docker exec logging-mvp-kibana-1 tail -n 30 /var/log/supervisor/dns-monitor.log || true
|
||||||
|
|
||||||
|
# 验证新的DNS服务器是否被添加到resolv.conf
|
||||||
|
check_resolv_conf "logging-mvp-es-1" "1.1.1.1"
|
||||||
|
verify_step "Elasticsearch resolv.conf after update"
|
||||||
|
|
||||||
|
check_resolv_conf "logging-mvp-kibana-1" "1.1.1.1"
|
||||||
|
verify_step "Kibana resolv.conf after update"
|
||||||
|
|
||||||
|
show_step "7" "Final verification - Check DNS configuration"
|
||||||
|
# 最终验证DNS配置
|
||||||
|
echo "[INFO] 最终验证elasticsearch容器的resolv.conf..."
|
||||||
|
docker exec logging-mvp-es-1 cat /etc/resolv.conf
|
||||||
|
|
||||||
|
echo "[INFO] 最终验证kibana容器的resolv.conf..."
|
||||||
|
docker exec logging-mvp-kibana-1 cat /etc/resolv.conf
|
||||||
|
|
||||||
|
echo "[INFO] 最终dns.conf内容:"
|
||||||
|
cat ./private/argus/etc/dns.conf
|
||||||
|
|
||||||
|
verify_step "Final DNS configuration verification"
|
||||||
|
|
||||||
|
show_step "8" "Cleanup - Stop all services"
|
||||||
|
./scripts/05_down.sh
|
||||||
|
verify_step "Service cleanup"
|
||||||
|
|
||||||
|
# 清理测试文件
|
||||||
|
rm -f ./private/argus/etc/dns.conf
|
||||||
|
# 注:不删除update-dns.sh,因为这是真实的脚本
|
||||||
|
|
||||||
|
# 计算总测试时间
|
||||||
|
test_end_time=$(date +%s)
|
||||||
|
total_time=$((test_end_time - test_start_time))
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "======================================="
|
||||||
|
echo "🎉 DNS监控功能测试完成!"
|
||||||
|
echo "======================================="
|
||||||
|
echo "📊 测试总结:"
|
||||||
|
echo " • 总耗时: ${total_time}秒"
|
||||||
|
echo " • 初始DNS配置: 8.8.8.8"
|
||||||
|
echo " • 更新DNS配置: 1.1.1.1"
|
||||||
|
echo " • DNS监控脚本正常工作"
|
||||||
|
echo " • 容器resolv.conf自动覆盖更新成功"
|
||||||
|
echo ""
|
||||||
|
echo "✅ DNS自动更新功能测试通过!"
|
||||||
|
echo ""
|
169
src/log/tests/scripts/e2e_test.sh
Executable file
169
src/log/tests/scripts/e2e_test.sh
Executable file
@ -0,0 +1,169 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
echo "======================================="
|
||||||
|
echo "ARGUS Log System End-to-End Test"
|
||||||
|
echo "======================================="
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# 记录测试开始时间
|
||||||
|
test_start_time=$(date +%s)
|
||||||
|
|
||||||
|
# 函数:获取ES中的日志计数
|
||||||
|
get_log_count() {
|
||||||
|
local train_count=$(curl -s "http://localhost:9200/train-*/_count" 2>/dev/null | grep -o '"count":[0-9]*' | cut -d':' -f2 || echo "0")
|
||||||
|
local infer_count=$(curl -s "http://localhost:9200/infer-*/_count" 2>/dev/null | grep -o '"count":[0-9]*' | cut -d':' -f2 || echo "0")
|
||||||
|
echo "$((train_count + infer_count))"
|
||||||
|
}
|
||||||
|
|
||||||
|
# 函数:等待服务就绪
|
||||||
|
wait_for_services() {
|
||||||
|
echo "[INFO] Waiting for all services to be ready..."
|
||||||
|
local max_attempts=60
|
||||||
|
local attempt=1
|
||||||
|
|
||||||
|
while [ $attempt -le $max_attempts ]; do
|
||||||
|
if curl -fs http://localhost:9200/_cluster/health >/dev/null 2>&1 && \
|
||||||
|
curl -fs http://localhost:5601/api/status >/dev/null 2>&1 && \
|
||||||
|
curl -fs http://localhost:2020/api/v2/metrics >/dev/null 2>&1 && \
|
||||||
|
curl -fs http://localhost:2021/api/v2/metrics >/dev/null 2>&1; then
|
||||||
|
echo "[OK] All services are ready!"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
echo " Waiting for services... ($attempt/$max_attempts)"
|
||||||
|
sleep 5
|
||||||
|
((attempt++))
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "[ERROR] Services not ready after $max_attempts attempts"
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# 函数:显示测试步骤
|
||||||
|
show_step() {
|
||||||
|
echo ""
|
||||||
|
echo "🔄 Step $1: $2"
|
||||||
|
echo "----------------------------------------"
|
||||||
|
}
|
||||||
|
|
||||||
|
# 函数:验证步骤结果
|
||||||
|
verify_step() {
|
||||||
|
if [ $? -eq 0 ]; then
|
||||||
|
echo "✅ $1 - SUCCESS"
|
||||||
|
else
|
||||||
|
echo "❌ $1 - FAILED"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# 开始端到端测试
|
||||||
|
show_step "1" "Bootstrap - Initialize environment"
|
||||||
|
./scripts/01_bootstrap.sh
|
||||||
|
verify_step "Bootstrap"
|
||||||
|
|
||||||
|
show_step "2" "Startup - Start all services"
|
||||||
|
./scripts/02_up.sh
|
||||||
|
verify_step "Service startup"
|
||||||
|
|
||||||
|
# 等待服务完全就绪
|
||||||
|
wait_for_services || exit 1
|
||||||
|
|
||||||
|
# 记录发送测试数据前的日志计数
|
||||||
|
initial_count=$(get_log_count)
|
||||||
|
echo "[INFO] Initial log count: $initial_count"
|
||||||
|
|
||||||
|
show_step "3a" "Send test data - Host01"
|
||||||
|
./scripts/03_send_test_host01.sh
|
||||||
|
verify_step "Test data sending (host01)"
|
||||||
|
|
||||||
|
show_step "3b" "Send test data - Host02"
|
||||||
|
./scripts/03_send_test_host02.sh
|
||||||
|
verify_step "Test data sending (host02)"
|
||||||
|
|
||||||
|
# 等待数据被处理
|
||||||
|
echo "[INFO] Waiting for data to be processed..."
|
||||||
|
sleep 10
|
||||||
|
|
||||||
|
show_step "4" "Verify data - Query Elasticsearch"
|
||||||
|
./scripts/04_query_es.sh
|
||||||
|
verify_step "Data verification"
|
||||||
|
|
||||||
|
# 记录发送测试数据后的日志计数
|
||||||
|
final_count=$(get_log_count)
|
||||||
|
echo "[INFO] Final log count: $final_count"
|
||||||
|
|
||||||
|
# 验证日志数量是否增加
|
||||||
|
if [ "$final_count" -gt "$initial_count" ]; then
|
||||||
|
added_logs=$((final_count - initial_count))
|
||||||
|
echo "✅ Log count verification - SUCCESS: Added $added_logs logs (from $initial_count to $final_count)"
|
||||||
|
else
|
||||||
|
echo "❌ Log count verification - FAILED: Expected count to increase, but got $initial_count -> $final_count"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 验证预期的最小日志数量(每个主机应该发送一些日志)
|
||||||
|
expected_min_logs=4 # 至少应该有几条日志
|
||||||
|
if [ "$final_count" -ge "$expected_min_logs" ]; then
|
||||||
|
echo "✅ Minimum log threshold - SUCCESS: $final_count logs (>= $expected_min_logs expected)"
|
||||||
|
else
|
||||||
|
echo "❌ Minimum log threshold - FAILED: Only $final_count logs (>= $expected_min_logs expected)"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 检查服务健康状态
|
||||||
|
show_step "Health" "Check service health"
|
||||||
|
echo "[INFO] Checking service health..."
|
||||||
|
|
||||||
|
# 检查 Elasticsearch 健康状态
|
||||||
|
es_health=$(curl -s "http://localhost:9200/_cluster/health" | grep -o '"status":"[^"]*"' | cut -d'"' -f4)
|
||||||
|
if [ "$es_health" = "green" ] || [ "$es_health" = "yellow" ]; then
|
||||||
|
echo "✅ Elasticsearch health: $es_health"
|
||||||
|
else
|
||||||
|
echo "❌ Elasticsearch health: $es_health"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 检查 Kibana 状态
|
||||||
|
if curl -fs "http://localhost:5601/api/status" >/dev/null 2>&1; then
|
||||||
|
kb_status="available"
|
||||||
|
echo "✅ Kibana status: $kb_status"
|
||||||
|
else
|
||||||
|
kb_status="unavailable"
|
||||||
|
echo "⚠️ Kibana status: $kb_status"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 检查 Fluent-Bit 指标
|
||||||
|
fb_host01_uptime=$(curl -s "http://localhost:2020/api/v2/metrics" | grep "fluentbit_uptime" | head -1 | grep -o "[0-9]\+$" || echo "0")
|
||||||
|
fb_host02_uptime=$(curl -s "http://localhost:2021/api/v2/metrics" | grep "fluentbit_uptime" | head -1 | grep -o "[0-9]\+$" || echo "0")
|
||||||
|
|
||||||
|
if [ "$fb_host01_uptime" -gt 0 ] && [ "$fb_host02_uptime" -gt 0 ]; then
|
||||||
|
echo "✅ Fluent-Bit services: host01 uptime=${fb_host01_uptime}s, host02 uptime=${fb_host02_uptime}s"
|
||||||
|
else
|
||||||
|
echo "⚠️ Fluent-Bit services: host01 uptime=${fb_host01_uptime}s, host02 uptime=${fb_host02_uptime}s"
|
||||||
|
fi
|
||||||
|
|
||||||
|
verify_step "Service health check"
|
||||||
|
|
||||||
|
show_step "5" "Cleanup - Stop all services"
|
||||||
|
./scripts/05_down.sh
|
||||||
|
verify_step "Service cleanup"
|
||||||
|
|
||||||
|
# 计算总测试时间
|
||||||
|
test_end_time=$(date +%s)
|
||||||
|
total_time=$((test_end_time - test_start_time))
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "======================================="
|
||||||
|
echo "🎉 END-TO-END TEST COMPLETED SUCCESSFULLY!"
|
||||||
|
echo "======================================="
|
||||||
|
echo "📊 Test Summary:"
|
||||||
|
echo " • Initial logs: $initial_count"
|
||||||
|
echo " • Final logs: $final_count"
|
||||||
|
echo " • Added logs: $added_logs"
|
||||||
|
echo " • Total time: ${total_time}s"
|
||||||
|
echo " • ES health: $es_health"
|
||||||
|
echo " • Kibana status: $kb_status"
|
||||||
|
echo " • DNS resolv: ✅ Passed (ES domain verified)"
|
||||||
|
echo " • All services started and stopped successfully"
|
||||||
|
echo ""
|
||||||
|
echo "✅ The ARGUS log system is working correctly!"
|
||||||
|
echo ""
|
67
src/metric/prometheus/Dockerfile
Normal file
67
src/metric/prometheus/Dockerfile
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
FROM ubuntu/prometheus:3-24.04_stable
|
||||||
|
|
||||||
|
USER root
|
||||||
|
|
||||||
|
ARG USE_INTRANET=false
|
||||||
|
|
||||||
|
# 内网 apt 源配置
|
||||||
|
RUN if [ "$USE_INTRANET" = "true" ]; then \
|
||||||
|
echo "Configuring intranet apt sources..." && \
|
||||||
|
cp /etc/apt/sources.list /etc/apt/sources.list.bak && \
|
||||||
|
echo "deb [trusted=yes] http://10.68.64.1/ubuntu2204/ jammy main" > /etc/apt/sources.list && \
|
||||||
|
echo 'Acquire::https::Verify-Peer "false";' > /etc/apt/apt.conf.d/99disable-ssl-check && \
|
||||||
|
echo 'Acquire::https::Verify-Host "false";' >> /etc/apt/apt.conf.d/99disable-ssl-check; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 常用工具
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y supervisor net-tools inetutils-ping vim && \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# 如果是部署环境替换 apt 源
|
||||||
|
RUN if [ "$USE_INTRANET" = "true" ]; then \
|
||||||
|
echo "deb [trusted=yes] https://10.92.132.52/mirrors/ubuntu2204/ jammy main" > /etc/apt/sources.list; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
# supervisor 日志目录
|
||||||
|
RUN mkdir -p /var/log/supervisor
|
||||||
|
|
||||||
|
# 设置 Prometheus 基础路径环境变量
|
||||||
|
ENV PROMETHEUS_BASE_PATH=/private/argus/metric/prometheus
|
||||||
|
|
||||||
|
# 设置用户和组ID环境变量
|
||||||
|
ARG PROMETHEUS_UID=2133
|
||||||
|
ARG PROMETHEUS_GID=2015
|
||||||
|
ENV PROMETHEUS_UID=${PROMETHEUS_UID}
|
||||||
|
ENV PROMETHEUS_GID=${PROMETHEUS_GID}
|
||||||
|
|
||||||
|
# 创建目录结构
|
||||||
|
RUN mkdir -p ${PROMETHEUS_BASE_PATH}/rules \
|
||||||
|
&& mkdir -p ${PROMETHEUS_BASE_PATH}/targets \
|
||||||
|
&& mkdir -p /private/argus/etc \
|
||||||
|
&& rm -rf /prometheus \
|
||||||
|
&& ln -s ${PROMETHEUS_BASE_PATH} /prometheus
|
||||||
|
|
||||||
|
# 修改 Prometheus 用户 UID/GID 并授权
|
||||||
|
RUN usermod -u ${PROMETHEUS_UID} nobody && \
|
||||||
|
groupmod -g ${PROMETHEUS_GID} nogroup && \
|
||||||
|
chown -h nobody:nogroup /prometheus && \
|
||||||
|
chown -R nobody:nogroup /private/argus/metric /etc/prometheus && \
|
||||||
|
chown -R nobody:nogroup ${PROMETHEUS_BASE_PATH}
|
||||||
|
|
||||||
|
# supervisor 配置
|
||||||
|
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
|
||||||
|
|
||||||
|
# 启动脚本
|
||||||
|
COPY start-prometheus-supervised.sh /usr/local/bin/start-prometheus-supervised.sh
|
||||||
|
RUN chmod +x /usr/local/bin/start-prometheus-supervised.sh
|
||||||
|
|
||||||
|
# 自定义 prometheus 配置文件
|
||||||
|
COPY prometheus.yml /etc/prometheus/prometheus.yml
|
||||||
|
|
||||||
|
USER root
|
||||||
|
|
||||||
|
EXPOSE 9090
|
||||||
|
|
||||||
|
ENTRYPOINT ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf", "-n"]
|
126
src/metric/prometheus/README.md
Normal file
126
src/metric/prometheus/README.md
Normal file
@ -0,0 +1,126 @@
|
|||||||
|
# Prometheus Docker 镜像配置
|
||||||
|
|
||||||
|
## 环境变量配置
|
||||||
|
|
||||||
|
### PROMETHEUS_BASE_PATH
|
||||||
|
|
||||||
|
设置 Prometheus 配置和数据的基础路径。
|
||||||
|
|
||||||
|
**默认值**: `/private/argus/metric/prometheus`
|
||||||
|
|
||||||
|
**用途**:
|
||||||
|
- 配置文件存储路径: `${PROMETHEUS_BASE_PATH}/prometheus.yml`
|
||||||
|
- 规则文件路径: `${PROMETHEUS_BASE_PATH}/rules/*.yml`
|
||||||
|
- 监控目标文件路径: `${PROMETHEUS_BASE_PATH}/targets/`
|
||||||
|
|
||||||
|
## 使用示例
|
||||||
|
|
||||||
|
### 1. 使用默认路径
|
||||||
|
```bash
|
||||||
|
docker run -d \
|
||||||
|
--name prometheus \
|
||||||
|
-p 9090:9090 \
|
||||||
|
-v /host/prometheus/data:/private/argus/metric/prometheus \
|
||||||
|
prometheus:latest
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. 自定义基础路径
|
||||||
|
```bash
|
||||||
|
docker run -d \
|
||||||
|
--name prometheus \
|
||||||
|
-p 9090:9090 \
|
||||||
|
-e PROMETHEUS_BASE_PATH=/custom/prometheus/path \
|
||||||
|
-v /host/prometheus/data:/custom/prometheus/path \
|
||||||
|
prometheus:latest
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Kubernetes 部署示例
|
||||||
|
```yaml
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: prometheus
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: prometheus
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: prometheus
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: prometheus
|
||||||
|
image: prometheus:latest
|
||||||
|
env:
|
||||||
|
- name: PROMETHEUS_BASE_PATH
|
||||||
|
value: "/data/prometheus"
|
||||||
|
ports:
|
||||||
|
- containerPort: 9090
|
||||||
|
volumeMounts:
|
||||||
|
- name: prometheus-data
|
||||||
|
mountPath: /data/prometheus
|
||||||
|
volumes:
|
||||||
|
- name: prometheus-data
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: prometheus-pvc
|
||||||
|
```
|
||||||
|
|
||||||
|
## 目录结构
|
||||||
|
|
||||||
|
容器启动后会在 `${PROMETHEUS_BASE_PATH}` 下创建以下目录结构:
|
||||||
|
|
||||||
|
```
|
||||||
|
${PROMETHEUS_BASE_PATH}/
|
||||||
|
├── prometheus.yml # 主配置文件
|
||||||
|
├── rules/ # 告警规则目录
|
||||||
|
│ └── *.yml
|
||||||
|
└── targets/ # 监控目标目录
|
||||||
|
├── node_exporter.json
|
||||||
|
└── dcgm_exporter.json
|
||||||
|
```
|
||||||
|
|
||||||
|
## 动态配置
|
||||||
|
|
||||||
|
- **规则文件**: 在 `rules/` 目录下添加 `.yml` 文件即可自动加载
|
||||||
|
- **监控目标**: 修改 `targets/` 目录下的 JSON 文件即可动态更新监控目标
|
||||||
|
- **主配置**: 修改 `prometheus.yml` 后可通过 Prometheus 的 `/-/reload` 端点重新加载配置
|
||||||
|
|
||||||
|
## 权限管理
|
||||||
|
|
||||||
|
### 默认路径权限
|
||||||
|
- 默认路径 `/private/argus/metric/prometheus` 在 Dockerfile 中已设置正确的权限
|
||||||
|
- nobody 用户(UID: 2133, GID: 2015)拥有完全读写权限
|
||||||
|
|
||||||
|
### 自定义路径权限
|
||||||
|
- 当使用自定义 `PROMETHEUS_BASE_PATH` 时,启动脚本会自动创建目录并设置权限
|
||||||
|
- 确保 nobody 用户对自定义路径有读写权限
|
||||||
|
|
||||||
|
### 挂载卷注意事项
|
||||||
|
1. **主机目录权限**: 确保挂载的主机目录对 nobody 用户(UID: 2133)可写
|
||||||
|
2. **SELinux**: 如果使用 SELinux,可能需要设置适当的上下文
|
||||||
|
3. **Docker 用户映射**: 确保容器内的 nobody 用户与主机用户权限匹配
|
||||||
|
|
||||||
|
## 故障排除
|
||||||
|
|
||||||
|
### 权限问题
|
||||||
|
如果遇到权限错误,可以检查:
|
||||||
|
```bash
|
||||||
|
# 检查目录权限
|
||||||
|
ls -la /path/to/prometheus/data
|
||||||
|
|
||||||
|
# 检查用户映射
|
||||||
|
id nobody
|
||||||
|
|
||||||
|
# 手动修复权限
|
||||||
|
chown -R 2133:2015 /path/to/prometheus/data
|
||||||
|
chmod -R 755 /path/to/prometheus/data
|
||||||
|
```
|
||||||
|
|
||||||
|
## 注意事项
|
||||||
|
|
||||||
|
1. 确保挂载的目录有适当的读写权限
|
||||||
|
2. 配置文件会在容器启动时自动生成,无需手动创建
|
||||||
|
3. 可以通过修改环境变量 `PROMETHEUS_BASE_PATH` 来改变所有相关路径,无需重新构建镜像
|
||||||
|
4. 自定义路径的目录会在启动时自动创建并设置权限
|
@ -1,15 +1,27 @@
|
|||||||
global:
|
global:
|
||||||
scrape_interval: 15s
|
scrape_interval: 15s
|
||||||
|
evaluation_interval: 15s
|
||||||
|
scrape_timeout: 10s
|
||||||
|
|
||||||
|
# 对接 AlertManager
|
||||||
|
alerting:
|
||||||
|
alertmanagers:
|
||||||
|
- static_configs:
|
||||||
|
- targets: []
|
||||||
|
|
||||||
|
# 规则目录
|
||||||
|
rule_files:
|
||||||
|
- "${PROMETHEUS_BASE_PATH}/rules/*.yml"
|
||||||
|
|
||||||
scrape_configs:
|
scrape_configs:
|
||||||
- job_name: "node"
|
- job_name: "node"
|
||||||
file_sd_configs:
|
file_sd_configs:
|
||||||
- files:
|
- files:
|
||||||
- "targets/node_exporter.json"
|
- "${PROMETHEUS_BASE_PATH}/targets/node_exporter.json"
|
||||||
refresh_interval: 30s
|
refresh_interval: 30s
|
||||||
|
|
||||||
- job_name: "dcgm"
|
- job_name: "dcgm"
|
||||||
file_sd_configs:
|
file_sd_configs:
|
||||||
- files:
|
- files:
|
||||||
- "targets/dcgm_exporter.json"
|
- "${PROMETHEUS_BASE_PATH}/targets/dcgm_exporter.json"
|
||||||
refresh_interval: 30s
|
refresh_interval: 30s
|
||||||
|
26
src/metric/prometheus/start-prometheus-supervised.sh
Normal file
26
src/metric/prometheus/start-prometheus-supervised.sh
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
echo "[INFO] Starting Prometheus under supervisor..."
|
||||||
|
|
||||||
|
PROMETHEUS_BASE_PATH=${PROMETHEUS_BASE_PATH:-/private/argus/metric/prometheus}
|
||||||
|
DOMAIN=prom.metric.argus.com
|
||||||
|
|
||||||
|
echo "[INFO] Prometheus base path: ${PROMETHEUS_BASE_PATH}"
|
||||||
|
|
||||||
|
# 生成配置文件
|
||||||
|
echo "[INFO] Generating prometheus.yml with base path: ${PROMETHEUS_BASE_PATH}"
|
||||||
|
sed "s|\${PROMETHEUS_BASE_PATH}|${PROMETHEUS_BASE_PATH}|g" \
|
||||||
|
/etc/prometheus/prometheus.yml > ${PROMETHEUS_BASE_PATH}/prometheus.yml
|
||||||
|
|
||||||
|
# 记录容器 IP
|
||||||
|
IP=$(ifconfig eth0 | awk '/inet /{print $2}')
|
||||||
|
echo "current IP: ${IP}"
|
||||||
|
echo "${IP}" > /private/argus/etc/${DOMAIN}
|
||||||
|
|
||||||
|
exec /bin/prometheus \
|
||||||
|
--config.file=${PROMETHEUS_BASE_PATH}/prometheus.yml \
|
||||||
|
--storage.tsdb.path=/prometheus \
|
||||||
|
--web.enable-lifecycle \
|
||||||
|
--web.console.libraries=/usr/share/prometheus/console_libraries \
|
||||||
|
--web.console.templates=/usr/share/prometheus/consoles
|
27
src/metric/prometheus/supervisord.conf
Normal file
27
src/metric/prometheus/supervisord.conf
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
[supervisord]
|
||||||
|
nodaemon=true
|
||||||
|
logfile=/var/log/supervisor/supervisord.log
|
||||||
|
pidfile=/var/run/supervisord.pid
|
||||||
|
user=root
|
||||||
|
|
||||||
|
[program:prometheus]
|
||||||
|
command=/usr/local/bin/start-prometheus-supervised.sh
|
||||||
|
user=nobody
|
||||||
|
stdout_logfile=/var/log/supervisor/prometheus.log
|
||||||
|
stderr_logfile=/var/log/supervisor/prometheus_error.log
|
||||||
|
autorestart=true
|
||||||
|
startretries=3
|
||||||
|
startsecs=30
|
||||||
|
stopwaitsecs=30
|
||||||
|
killasgroup=true
|
||||||
|
stopasgroup=true
|
||||||
|
|
||||||
|
[unix_http_server]
|
||||||
|
file=/var/run/supervisor.sock
|
||||||
|
chmod=0700
|
||||||
|
|
||||||
|
[supervisorctl]
|
||||||
|
serverurl=unix:///var/run/supervisor.sock
|
||||||
|
|
||||||
|
[rpcinterface:supervisor]
|
||||||
|
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
|
Loading…
x
Reference in New Issue
Block a user