[#37] 修复alert镜像用户
This commit is contained in:
parent
7548e46d1f
commit
1819fb9c46
@ -6,7 +6,7 @@ user=root
|
||||
|
||||
[program:alertmanager]
|
||||
command=/usr/local/bin/start-am-supervised.sh
|
||||
user=ubuntu
|
||||
user=alertmanager
|
||||
stdout_logfile=/var/log/supervisor/alertmanager.log
|
||||
stderr_logfile=/var/log/supervisor/alertmanager_error.log
|
||||
autorestart=true
|
||||
|
||||
@ -1,21 +0,0 @@
|
||||
SERVER_PROJECT=argus-swarm-server
|
||||
NODES_PROJECT=argus-swarm-nodes
|
||||
|
||||
# Host ports for server compose
|
||||
MASTER_PORT=32300
|
||||
ES_HTTP_PORT=9200
|
||||
KIBANA_PORT=5601
|
||||
PROMETHEUS_PORT=9090
|
||||
GRAFANA_PORT=3000
|
||||
ALERTMANAGER_PORT=9093
|
||||
WEB_PROXY_PORT_8080=8080
|
||||
WEB_PROXY_PORT_8081=8081
|
||||
WEB_PROXY_PORT_8082=8082
|
||||
WEB_PROXY_PORT_8083=8083
|
||||
WEB_PROXY_PORT_8084=8084
|
||||
WEB_PROXY_PORT_8085=8085
|
||||
|
||||
# UID/GID for volume ownership in containers
|
||||
ARGUS_BUILD_UID=1000
|
||||
ARGUS_BUILD_GID=1000
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
BINDIP=10.0.1.5
|
||||
FTPIP=10.0.1.4
|
||||
BINDIP=10.0.4.25
|
||||
FTPIP=10.0.4.29
|
||||
MASTER_ENDPOINT=http://master.argus.com:3000
|
||||
FTP_USER=ftpuser
|
||||
FTP_PASSWORD=ZGClab1234!
|
||||
5
src/sys/swarm_tests/.gitignore
vendored
5
src/sys/swarm_tests/.gitignore
vendored
@ -1,2 +1,7 @@
|
||||
|
||||
private-*/
|
||||
|
||||
tmp/
|
||||
|
||||
.env
|
||||
.env.nodes
|
||||
|
||||
83
src/sys/swarm_tests/scripts/es-relax.sh
Executable file
83
src/sys/swarm_tests/scripts/es-relax.sh
Executable file
@ -0,0 +1,83 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
ENV_FILE="$ROOT/compose/.env"; [[ -f "$ENV_FILE" ]] && set -a && source "$ENV_FILE" && set +a
|
||||
|
||||
ES_URL="http://localhost:${ES_HTTP_PORT:-9200}"
|
||||
|
||||
# Tunables (env overrides)
|
||||
RELAX_WM_LOW="${RELAX_WM_LOW:-99%}"
|
||||
RELAX_WM_HIGH="${RELAX_WM_HIGH:-99%}"
|
||||
RELAX_WM_FLOOD="${RELAX_WM_FLOOD:-99%}"
|
||||
DISABLE_WATERMARK="${DISABLE_WATERMARK:-1}"
|
||||
SET_KIBANA_REPLICAS_ZERO="${SET_KIBANA_REPLICAS_ZERO:-1}"
|
||||
CLEAR_READONLY_BLOCKS="${CLEAR_READONLY_BLOCKS:-1}"
|
||||
|
||||
echo "[RELAX] Checking Elasticsearch at $ES_URL"
|
||||
code=$(curl -s -o /dev/null -w '%{http_code}' "$ES_URL/_cluster/health" || true)
|
||||
if [[ "$code" != "200" ]]; then
|
||||
echo "[RELAX][ERROR] ES not reachable (code=$code). Ensure argus-es-sys is running." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "[RELAX] Applying transient cluster settings (watermarks)"
|
||||
th_enabled=$([[ "$DISABLE_WATERMARK" == "1" ]] && echo false || echo true)
|
||||
curl -sS -H 'Content-Type: application/json' -X PUT "$ES_URL/_cluster/settings" -d "{
|
||||
\"transient\": {
|
||||
\"cluster.routing.allocation.disk.threshold_enabled\": $th_enabled,
|
||||
\"cluster.routing.allocation.disk.watermark.low\": \"$RELAX_WM_LOW\",
|
||||
\"cluster.routing.allocation.disk.watermark.high\": \"$RELAX_WM_HIGH\",
|
||||
\"cluster.routing.allocation.disk.watermark.flood_stage\": \"$RELAX_WM_FLOOD\"
|
||||
}
|
||||
}" | sed -n '1,5p'
|
||||
|
||||
if [[ "$CLEAR_READONLY_BLOCKS" == "1" ]]; then
|
||||
echo "[RELAX] Clearing read_only/read_only_allow_delete blocks on all indices (best-effort)"
|
||||
curl -sS -H 'Content-Type: application/json' -X PUT "$ES_URL/_all/_settings" -d '{
|
||||
"index.blocks.read_only": false,
|
||||
"index.blocks.read_only_allow_delete": false
|
||||
}' >/dev/null || true
|
||||
fi
|
||||
|
||||
if [[ "${SET_KIBANA_REPLICAS_ZERO:-1}" != "0" ]]; then
|
||||
echo "[RELAX] Ensure .kibana* use replicas=0 via index template and per-index settings (best-effort)"
|
||||
# high priority template for .kibana* only, avoid impacting other indices
|
||||
curl -sS -H 'Content-Type: application/json' -X PUT "$ES_URL/_index_template/kibana-replicas-0" -d '{
|
||||
"index_patterns": [".kibana*"],
|
||||
"priority": 200,
|
||||
"template": { "settings": { "number_of_replicas": 0 } }
|
||||
}' >/dev/null || true
|
||||
# set existing .kibana* to replicas=0
|
||||
idxs=$(curl -sS "$ES_URL/_cat/indices/.kibana*?h=index" | awk '{print $1}')
|
||||
for i in $idxs; do
|
||||
[[ -n "$i" ]] || continue
|
||||
curl -sS -H 'Content-Type: application/json' -X PUT "$ES_URL/$i/_settings" -d '{"index":{"number_of_replicas":0}}' >/dev/null || true
|
||||
done
|
||||
fi
|
||||
|
||||
# Retry failed shard allocations (best-effort)
|
||||
curl -sS -H 'Content-Type: application/json' -X POST "$ES_URL/_cluster/reroute?retry_failed=true" -d '{}' >/dev/null || true
|
||||
|
||||
echo "[RELAX] Cluster health (post):"
|
||||
curl -sS "$ES_URL/_cluster/health?pretty" | sed -n '1,80p'
|
||||
|
||||
# Simple current status summary
|
||||
ch=$(curl -sS "$ES_URL/_cluster/health" || true)
|
||||
status=$(printf '%s' "$ch" | awk -F'"' '/"status"/{print $4; exit}')
|
||||
unassigned=$(printf '%s' "$ch" | awk -F'[,: ]+' '/"unassigned_shards"/{print $3; exit}')
|
||||
duse=$(docker exec argus-es-sys sh -lc 'df -P /usr/share/elasticsearch/data | awk "NR==2{print \$5}"' 2>/dev/null || true)
|
||||
settings=$(curl -sS "$ES_URL/_cluster/settings?flat_settings=true" || true)
|
||||
th=$(printf '%s' "$settings" | grep -o '"cluster.routing.allocation.disk.threshold_enabled"[^,}]*' | awk -F: '{gsub(/["} ]/,"",$2);print $2}' | tail -n1)
|
||||
low=$(printf '%s' "$settings" | grep -o '"cluster.routing.allocation.disk.watermark.low"[^,}]*' | awk -F: '{gsub(/["} ]/,"",$2);print $2}' | tail -n1)
|
||||
high=$(printf '%s' "$settings" | grep -o '"cluster.routing.allocation.disk.watermark.high"[^,}]*' | awk -F: '{gsub(/["} ]/,"",$2);print $2}' | tail -n1)
|
||||
flood=$(printf '%s' "$settings" | grep -o '"cluster.routing.allocation.disk.watermark.flood_stage"[^,}]*' | awk -F: '{gsub(/["} ]/,"",$2);print $2}' | tail -n1)
|
||||
ks=$(curl -sS "$ES_URL/_cat/shards/.kibana*?h=state" || true)
|
||||
total=$(printf '%s' "$ks" | awk 'NF{c++} END{print c+0}')
|
||||
started=$(printf '%s' "$ks" | awk '/STARTED/{c++} END{print c+0}')
|
||||
unass=$(printf '%s' "$ks" | awk '/UNASSIGNED/{c++} END{print c+0}')
|
||||
echo "[RELAX][SUMMARY] status=${status:-?} unassigned=${unassigned:-?} es.data.use=${duse:-?} watermarks(threshold=${th:-?} low=${low:-?} high=${high:-?} flood=${flood:-?}) kibana_shards(total=${total},started=${started},unassigned=${unass})"
|
||||
|
||||
echo "[RELAX] Done. Remember to run scripts/es-watermark-restore.sh after freeing disk space and cluster becomes stable."
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user