argus/src/log/tests/scripts/03_send_test_host01.sh
yuyr 1e5e91b193 dev_1.0.0_yuyr_2:重新提交 PR,增加 master/agent 以及系统集成测试 (#17)
Reviewed-on: #17
Reviewed-by: sundapeng <sundp@mail.zgclab.edu.cn>
Reviewed-by: xuxt <xuxt@zgclab.edu.cn>
2025-10-11 15:04:46 +08:00

46 lines
1.6 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
# 获取fluent-bit-host01容器名称
container_name="logging-mvp-fluent-bit-host01-1"
wait_for_container() {
local name="$1"
local attempts=30
local delay=5
local i
for ((i = 1; i <= attempts; i++)); do
if docker ps --format '{{.Names}}' | grep -Fx "$name" >/dev/null; then
return 0
fi
echo "[INFO] 等待容器 $name 启动中... ($i/$attempts)"
sleep "$delay"
done
return 1
}
if ! wait_for_container "$container_name"; then
echo "[ERROR] Fluent Bit容器 $container_name 未运行"
exit 1
fi
# 创建日志目录
docker exec "$container_name" mkdir -p /logs/train /logs/infer
# 写入训练日志 (host01)
docker exec "$container_name" sh -c "printf '%s INFO [host01] training step=1 loss=1.23 model=bert\n' \"\$(date '+%F %T')\" >> /logs/train/train-demo.log"
docker exec "$container_name" sh -c "printf '%s INFO [host01] training step=2 loss=1.15 model=bert\n' \"\$(date '+%F %T')\" >> /logs/train/train-demo.log"
# 写入推理日志 (host01)
docker exec "$container_name" sh -c "printf '%s ERROR [host01] inference failed on batch=1\n' \"\$(date '+%F %T')\" >> /logs/infer/infer-demo.log"
docker exec "$container_name" sh -c "cat <<'STACK' >> /logs/infer/infer-demo.log
Traceback (most recent call last):
File \"inference.py\", line 15, in <module>
raise RuntimeError(\"CUDA out of memory on host01\")
RuntimeError: CUDA out of memory on host01
STACK"
echo "[OK] 已通过docker exec写入测试日志到 host01 容器内:"
echo " - /logs/train/train-demo.log"
echo " - /logs/infer/infer-demo.log"