[#31] grafana 面板uid冲突问题解决

This commit is contained in:
yuyr 2025-10-28 17:13:42 +08:00
parent d036da2d5e
commit 824eddde67
3 changed files with 373 additions and 12 deletions

View File

@ -1 +1 @@
1.33.0 1.35.0

View File

@ -581,6 +581,372 @@
], ],
"title": "Node Process Count", "title": "Node Process Count",
"type": "timeseries" "type": "timeseries"
},
{
"datasource": {
"type": "prometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "GPU Utilization (%)",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"max": 100,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "orange",
"value": 80
},
{
"color": "red",
"value": 95
}
]
},
"unit": "percent"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 24
},
"id": 301,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"expr": "DCGM_FI_DEV_GPU_UTIL{hostname=~\"$hostname\"}",
"legendFormat": "{{hostname}} GPU{{gpu}}",
"refId": "A"
}
],
"title": "GPU 利用率 (单卡)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": true,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "Memory Used (%)",
"axisPlacement": "left",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
}
},
"mappings": [],
"max": 100,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "orange",
"value": 80
},
{
"color": "red",
"value": 95
}
]
},
"unit": "percent"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 24
},
"id": 403,
"options": {
"legend": {
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"expr": "round(DCGM_FI_DEV_FB_USED{hostname=~\"$hostname\"} / (DCGM_FI_DEV_FB_USED{hostname=~\"$hostname\"} + DCGM_FI_DEV_FB_FREE{hostname=~\"$hostname\"}) * 100)",
"legendFormat": "{{hostname}} GPU{{gpu}}",
"refId": "A"
}
],
"title": "GPU 显存使用率 (单卡)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": true,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "Temperature (℃)",
"axisPlacement": "left",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"max": 100,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "orange",
"value": 75
},
{
"color": "red",
"value": 85
}
]
},
"unit": "celsius"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 32
},
"id": 501,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"expr": "DCGM_FI_DEV_GPU_TEMP{hostname=~\"$hostname\"}",
"legendFormat": "{{hostname}} GPU{{gpu}}",
"refId": "A"
}
],
"title": "GPU 温度(单卡)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": true,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "Power (W)",
"axisPlacement": "left",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"max": 300,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "orange",
"value": 200
},
{
"color": "red",
"value": 300
}
]
},
"unit": "watt"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 32
},
"id": 502,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"expr": "DCGM_FI_DEV_POWER_USAGE{hostname=~\"$hostname\"}",
"legendFormat": "{{hostname}} GPU{{gpu}}",
"refId": "A"
}
],
"title": "GPU 功率 (单卡)",
"type": "timeseries"
} }
], ],
"refresh": "15s", "refresh": "15s",
@ -589,11 +955,6 @@
"templating": { "templating": {
"list": [ "list": [
{ {
"current": {
"selected": true,
"text": "node-exporter-A1",
"value": "node-exporter-A1"
},
"datasource": { "datasource": {
"type": "prometheus" "type": "prometheus"
}, },
@ -623,7 +984,7 @@
}, },
"timepicker": {}, "timepicker": {},
"timezone": "", "timezone": "",
"title": "Node and GPU Metrics", "title": "Node and GPU Metrics (by hostname)",
"uid": "node_gpu_metrics", "uid": "node_gpu_metrics_by_hostname",
"weekStart": "" "weekStart": ""
} }

View File

@ -622,7 +622,7 @@
}, },
"timepicker": {}, "timepicker": {},
"timezone": "", "timezone": "",
"title": "Node and GPU Metrics", "title": "Node and GPU Metrics (by instance)",
"uid": "node_gpu_metrics", "uid": "node_gpu_metrics_by_instance",
"weekStart": "" "weekStart": ""
} }