991 lines
24 KiB
JSON
991 lines
24 KiB
JSON
{
|
|
"annotations": {
|
|
"list": [
|
|
{
|
|
"builtIn": 1,
|
|
"datasource": {
|
|
"type": "grafana",
|
|
"uid": "-- Grafana --"
|
|
},
|
|
"enable": true,
|
|
"hide": true,
|
|
"iconColor": "rgba(0, 211, 255, 1)",
|
|
"name": "Annotations & Alerts",
|
|
"type": "dashboard"
|
|
}
|
|
]
|
|
},
|
|
"editable": true,
|
|
"fiscalYearStartMonth": 0,
|
|
"graphTooltip": 0,
|
|
"id": 9,
|
|
"links": [],
|
|
"panels": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "Load",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 0,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 2,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "short"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 0
|
|
},
|
|
"id": 101,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "node_load1{hostname=\"$hostname\"}",
|
|
"legendFormat": "{{hostname}} load1",
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"expr": "node_load5{hostname=\"$hostname\"}",
|
|
"legendFormat": "{{hostname}} load5",
|
|
"refId": "B"
|
|
},
|
|
{
|
|
"expr": "node_load15{hostname=\"$hostname\"}",
|
|
"legendFormat": "{{hostname}} load15",
|
|
"refId": "C"
|
|
}
|
|
],
|
|
"title": "System Load",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 0,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "percent"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 0
|
|
},
|
|
"id": 1,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"mode": "single",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "100 * (1 - avg by(hostname) (irate(node_cpu_seconds_total{mode=\"idle\",hostname=\"$hostname\"}[5m])))",
|
|
"legendFormat": "{{hostname}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "CPU Usage",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "%",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 20,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 2,
|
|
"pointSize": 4,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "orange",
|
|
"value": 70
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 90
|
|
}
|
|
]
|
|
},
|
|
"unit": "percent"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 8
|
|
},
|
|
"id": 5,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"mode": "single",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "100 * (1 - (node_memory_MemAvailable_bytes{hostname=\"$hostname\"} / node_memory_MemTotal_bytes{hostname=\"$hostname\"}))",
|
|
"legendFormat": "{{hostname}}",
|
|
"refId": "B"
|
|
}
|
|
],
|
|
"title": "Node Memory Usage",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "Bytes/s",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 20,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 2,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "Bps"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 8
|
|
},
|
|
"id": 6,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "sum by(hostname) (rate(node_disk_read_bytes_total{device!~\"^(loop|ram|sr0).*\",hostname=\"$hostname\"}[5m]))",
|
|
"legendFormat": "{{hostname}} read",
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"expr": "sum by(hostname) (rate(node_disk_written_bytes_total{device!~\"^(loop|ram|sr0).*\",hostname=\"$hostname\"}[5m]))",
|
|
"legendFormat": "{{hostname}} write",
|
|
"refId": "B"
|
|
}
|
|
],
|
|
"title": "Node Disk I/O (Bytes/s)",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "Bytes/s",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 0,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 2,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "Bps"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 16
|
|
},
|
|
"id": 102,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "sum by(hostname)(rate(node_network_receive_bytes_total{device!~\"^(lo|docker.*)\",hostname=\"$hostname\"}[5m]))",
|
|
"legendFormat": "{{hostname}} RX",
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"expr": "sum by(hostname)(rate(node_network_transmit_bytes_total{device!~\"^(lo|docker.*)\",hostname=\"$hostname\"}[5m]))",
|
|
"legendFormat": "{{hostname}} TX",
|
|
"refId": "B"
|
|
}
|
|
],
|
|
"title": "Network Traffic",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "Processes",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 0,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 2,
|
|
"pointSize": 4,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "orange",
|
|
"value": 200
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 500
|
|
}
|
|
]
|
|
},
|
|
"unit": "short"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 16
|
|
},
|
|
"id": 104,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "node_procs_running{hostname=\"$hostname\"}",
|
|
"legendFormat": "{{hostname}} Running",
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"expr": "node_procs_blocked{hostname=\"$hostname\"}",
|
|
"legendFormat": "{{hostname}} Blocked",
|
|
"refId": "B"
|
|
}
|
|
],
|
|
"title": "Node Process Count",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "GPU Utilization (%)",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 10,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 2,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"mappings": [],
|
|
"max": 100,
|
|
"min": 0,
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "orange",
|
|
"value": 80
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 95
|
|
}
|
|
]
|
|
},
|
|
"unit": "percent"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 24
|
|
},
|
|
"id": 301,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "DCGM_FI_DEV_GPU_UTIL{hostname=~\"$hostname\"}",
|
|
"legendFormat": "{{hostname}} GPU{{gpu}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "GPU 利用率 (单卡)",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": true,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "Memory Used (%)",
|
|
"axisPlacement": "left",
|
|
"barAlignment": 0,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 10,
|
|
"gradientMode": "none",
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 2,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
}
|
|
},
|
|
"mappings": [],
|
|
"max": 100,
|
|
"min": 0,
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green"
|
|
},
|
|
{
|
|
"color": "orange",
|
|
"value": 80
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 95
|
|
}
|
|
]
|
|
},
|
|
"unit": "percent"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 24
|
|
},
|
|
"id": 403,
|
|
"options": {
|
|
"legend": {
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "round(DCGM_FI_DEV_FB_USED{hostname=~\"$hostname\"} / (DCGM_FI_DEV_FB_USED{hostname=~\"$hostname\"} + DCGM_FI_DEV_FB_FREE{hostname=~\"$hostname\"}) * 100)",
|
|
"legendFormat": "{{hostname}} GPU{{gpu}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "GPU 显存使用率 (单卡)",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": true,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "Temperature (℃)",
|
|
"axisPlacement": "left",
|
|
"barAlignment": 0,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 10,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 2,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"mappings": [],
|
|
"max": 100,
|
|
"min": 0,
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "orange",
|
|
"value": 75
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 85
|
|
}
|
|
]
|
|
},
|
|
"unit": "celsius"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 32
|
|
},
|
|
"id": 501,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "DCGM_FI_DEV_GPU_TEMP{hostname=~\"$hostname\"}",
|
|
"legendFormat": "{{hostname}} GPU{{gpu}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "GPU 温度(单卡)",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": true,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "Power (W)",
|
|
"axisPlacement": "left",
|
|
"barAlignment": 0,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 10,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 2,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"mappings": [],
|
|
"max": 300,
|
|
"min": 0,
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "orange",
|
|
"value": 200
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 300
|
|
}
|
|
]
|
|
},
|
|
"unit": "watt"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 32
|
|
},
|
|
"id": 502,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "DCGM_FI_DEV_POWER_USAGE{hostname=~\"$hostname\"}",
|
|
"legendFormat": "{{hostname}} GPU{{gpu}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "GPU 功率 (单卡)",
|
|
"type": "timeseries"
|
|
}
|
|
],
|
|
"refresh": "15s",
|
|
"schemaVersion": 39,
|
|
"tags": [],
|
|
"templating": {
|
|
"list": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"definition": "label_values(node_cpu_seconds_total,hostname)",
|
|
"hide": 0,
|
|
"includeAll": false,
|
|
"label": "hostname",
|
|
"multi": false,
|
|
"name": "hostname",
|
|
"options": [],
|
|
"query": {
|
|
"qryType": 1,
|
|
"query": "label_values(node_cpu_seconds_total,hostname)",
|
|
"refId": "PrometheusVariableQueryEditor-VariableQuery"
|
|
},
|
|
"refresh": 1,
|
|
"regex": "",
|
|
"skipUrlSync": false,
|
|
"sort": 0,
|
|
"type": "query"
|
|
}
|
|
]
|
|
},
|
|
"time": {
|
|
"from": "now-12h",
|
|
"to": "now"
|
|
},
|
|
"timepicker": {},
|
|
"timezone": "",
|
|
"title": "Node and GPU Metrics (by hostname)",
|
|
"uid": "node_gpu_metrics_by_hostname",
|
|
"weekStart": ""
|
|
}
|