Co-authored-by: xiuting.xu <xiutingxt.xu@gmail.com> Reviewed-on: #21 Reviewed-by: huhy <husteryezi@163.com> Reviewed-by: sundapeng <sundp@mail.zgclab.edu.cn> Reviewed-by: yuyr <yuyr@zgclab.edu.cn>
38 lines
1.5 KiB
YAML
38 lines
1.5 KiB
YAML
groups:
|
|
- name: example-rules
|
|
interval: 30s # 每30秒评估一次
|
|
rules:
|
|
- alert: InstanceDown
|
|
expr: up == 0
|
|
for: 1m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "实例 {{ $labels.instance }} 已宕机"
|
|
description: "{{ $labels.instance }} 在 {{ $labels.job }} 中无响应超过 1 分钟。"
|
|
|
|
- alert: HighCpuUsage
|
|
expr: 100 - (avg by (instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "CPU 使用率过高"
|
|
description: "实例 {{ $labels.instance }} CPU 使用率超过 80% 持续 5 分钟。"
|
|
- alert: HighMemoryUsage
|
|
expr: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100 > 80
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "内存使用率过高"
|
|
description: "实例 {{ $labels.instance }} 内存使用率超过 80% 持续 5 分钟。"
|
|
- alert: DiskSpaceLow
|
|
expr: (node_filesystem_size_bytes{fstype!~"tmpfs|overlay"} - node_filesystem_free_bytes{fstype!~"tmpfs|overlay"}) / node_filesystem_size_bytes{fstype!~"tmpfs|overlay"} * 100 > 90
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "磁盘空间不足"
|
|
description: "实例 {{ $labels.instance }} 磁盘空间不足超过 90% 持续 10 分钟。"
|