31 lines
1.3 KiB
Bash
Executable File
31 lines
1.3 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
# 获取fluent-bit-host01容器名称
|
|
container_name="logging-mvp-fluent-bit-host01-1"
|
|
|
|
# 检查容器是否存在并运行
|
|
if ! docker ps | grep -q "$container_name"; then
|
|
echo "[ERROR] Fluent Bit容器 $container_name 未运行"
|
|
exit 1
|
|
fi
|
|
|
|
# 创建日志目录
|
|
docker exec "$container_name" mkdir -p /logs/train /logs/infer
|
|
|
|
# 写入训练日志 (host01)
|
|
docker exec "$container_name" sh -c "printf '%s INFO [host01] training step=1 loss=1.23 model=bert\n' \"\$(date '+%F %T')\" >> /logs/train/train-demo.log"
|
|
docker exec "$container_name" sh -c "printf '%s INFO [host01] training step=2 loss=1.15 model=bert\n' \"\$(date '+%F %T')\" >> /logs/train/train-demo.log"
|
|
|
|
# 写入推理日志 (host01)
|
|
docker exec "$container_name" sh -c "printf '%s ERROR [host01] inference failed on batch=1\n' \"\$(date '+%F %T')\" >> /logs/infer/infer-demo.log"
|
|
docker exec "$container_name" sh -c "cat <<'STACK' >> /logs/infer/infer-demo.log
|
|
Traceback (most recent call last):
|
|
File \"inference.py\", line 15, in <module>
|
|
raise RuntimeError(\"CUDA out of memory on host01\")
|
|
RuntimeError: CUDA out of memory on host01
|
|
STACK"
|
|
|
|
echo "[OK] 已通过docker exec写入测试日志到 host01 容器内:"
|
|
echo " - /logs/train/train-demo.log"
|
|
echo " - /logs/infer/infer-demo.log" |