#!/usr/bin/env bash set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" ENV_FILE="$ROOT/compose/.env"; [[ -f "$ENV_FILE" ]] && set -a && source "$ENV_FILE" && set +a ES_URL="http://localhost:${ES_HTTP_PORT:-9200}" # Tunables (env overrides) RELAX_WM_LOW="${RELAX_WM_LOW:-99%}" RELAX_WM_HIGH="${RELAX_WM_HIGH:-99%}" RELAX_WM_FLOOD="${RELAX_WM_FLOOD:-99%}" DISABLE_WATERMARK="${DISABLE_WATERMARK:-1}" SET_KIBANA_REPLICAS_ZERO="${SET_KIBANA_REPLICAS_ZERO:-1}" CLEAR_READONLY_BLOCKS="${CLEAR_READONLY_BLOCKS:-1}" echo "[RELAX] Checking Elasticsearch at $ES_URL" code=$(curl -s -o /dev/null -w '%{http_code}' "$ES_URL/_cluster/health" || true) if [[ "$code" != "200" ]]; then echo "[RELAX][ERROR] ES not reachable (code=$code). Ensure argus-es-sys is running." >&2 exit 1 fi echo "[RELAX] Applying transient cluster settings (watermarks)" th_enabled=$([[ "$DISABLE_WATERMARK" == "1" ]] && echo false || echo true) curl -sS -H 'Content-Type: application/json' -X PUT "$ES_URL/_cluster/settings" -d "{ \"transient\": { \"cluster.routing.allocation.disk.threshold_enabled\": $th_enabled, \"cluster.routing.allocation.disk.watermark.low\": \"$RELAX_WM_LOW\", \"cluster.routing.allocation.disk.watermark.high\": \"$RELAX_WM_HIGH\", \"cluster.routing.allocation.disk.watermark.flood_stage\": \"$RELAX_WM_FLOOD\" } }" | sed -n '1,5p' if [[ "$CLEAR_READONLY_BLOCKS" == "1" ]]; then echo "[RELAX] Clearing read_only/read_only_allow_delete blocks on all indices (best-effort)" curl -sS -H 'Content-Type: application/json' -X PUT "$ES_URL/_all/_settings" -d '{ "index.blocks.read_only": false, "index.blocks.read_only_allow_delete": false }' >/dev/null || true fi if [[ "${SET_KIBANA_REPLICAS_ZERO:-1}" != "0" ]]; then echo "[RELAX] Ensure .kibana* use replicas=0 via index template and per-index settings (best-effort)" # high priority template for .kibana* only, avoid impacting other indices curl -sS -H 'Content-Type: application/json' -X PUT "$ES_URL/_index_template/kibana-replicas-0" -d '{ "index_patterns": [".kibana*"], "priority": 200, "template": { "settings": { "number_of_replicas": 0 } } }' >/dev/null || true # set existing .kibana* to replicas=0 idxs=$(curl -sS "$ES_URL/_cat/indices/.kibana*?h=index" | awk '{print $1}') for i in $idxs; do [[ -n "$i" ]] || continue curl -sS -H 'Content-Type: application/json' -X PUT "$ES_URL/$i/_settings" -d '{"index":{"number_of_replicas":0}}' >/dev/null || true done fi # Retry failed shard allocations (best-effort) curl -sS -H 'Content-Type: application/json' -X POST "$ES_URL/_cluster/reroute?retry_failed=true" -d '{}' >/dev/null || true echo "[RELAX] Cluster health (post):" curl -sS "$ES_URL/_cluster/health?pretty" | sed -n '1,80p' # Simple current status summary ch=$(curl -sS "$ES_URL/_cluster/health" || true) status=$(printf '%s' "$ch" | awk -F'"' '/"status"/{print $4; exit}') unassigned=$(printf '%s' "$ch" | awk -F'[,: ]+' '/"unassigned_shards"/{print $3; exit}') duse=$(docker exec argus-es-sys sh -lc 'df -P /usr/share/elasticsearch/data | awk "NR==2{print \$5}"' 2>/dev/null || true) settings=$(curl -sS "$ES_URL/_cluster/settings?flat_settings=true" || true) th=$(printf '%s' "$settings" | grep -o '"cluster.routing.allocation.disk.threshold_enabled"[^,}]*' | awk -F: '{gsub(/["} ]/,"",$2);print $2}' | tail -n1) low=$(printf '%s' "$settings" | grep -o '"cluster.routing.allocation.disk.watermark.low"[^,}]*' | awk -F: '{gsub(/["} ]/,"",$2);print $2}' | tail -n1) high=$(printf '%s' "$settings" | grep -o '"cluster.routing.allocation.disk.watermark.high"[^,}]*' | awk -F: '{gsub(/["} ]/,"",$2);print $2}' | tail -n1) flood=$(printf '%s' "$settings" | grep -o '"cluster.routing.allocation.disk.watermark.flood_stage"[^,}]*' | awk -F: '{gsub(/["} ]/,"",$2);print $2}' | tail -n1) ks=$(curl -sS "$ES_URL/_cat/shards/.kibana*?h=state" || true) total=$(printf '%s' "$ks" | awk 'NF{c++} END{print c+0}') started=$(printf '%s' "$ks" | awk '/STARTED/{c++} END{print c+0}') unass=$(printf '%s' "$ks" | awk '/UNASSIGNED/{c++} END{print c+0}') echo "[RELAX][SUMMARY] status=${status:-?} unassigned=${unassigned:-?} es.data.use=${duse:-?} watermarks(threshold=${th:-?} low=${low:-?} high=${high:-?} flood=${flood:-?}) kibana_shards(total=${total},started=${started},unassigned=${unass})" echo "[RELAX] Done. Remember to run scripts/es-watermark-restore.sh after freeing disk space and cluster becomes stable."