init
This commit is contained in:
commit
fd21f27db5
215
netapp_exporter.py
Normal file
215
netapp_exporter.py
Normal file
@ -0,0 +1,215 @@
|
||||
import os
|
||||
import logging
|
||||
import requests
|
||||
from requests.auth import HTTPBasicAuth
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from flask import Flask, request, Response
|
||||
from prometheus_client import Gauge, Counter, generate_latest
|
||||
from datetime import datetime
|
||||
|
||||
# Désactivation des avertissements SSL (à utiliser avec prudence)
|
||||
requests.packages.urllib3.disable_warnings()
|
||||
|
||||
# Configuration du logging
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
# --------------------------
|
||||
# Métriques pour les volumes
|
||||
# --------------------------
|
||||
volume_used = Gauge("netapp_volume_used_bytes", "Espace utilisé sur un volume", ["volume"])
|
||||
volume_size = Gauge("netapp_volume_size_bytes", "Taille totale du volume", ["volume"])
|
||||
volume_available = Gauge("netapp_volume_available_bytes", "Espace disponible sur un volume", ["volume"])
|
||||
volume_usage_percent = Gauge("netapp_volume_usage_percent", "Pourcentage utilisé d'un volume", ["volume"])
|
||||
# Mètre les informations de métadonnées du volume (info metric : toujours à 1)
|
||||
volume_info = Gauge("netapp_volume_info", "Information sur le volume (1=present)",
|
||||
["volume", "style", "type", "snapshot_policy", "svm"])
|
||||
# Heure de création du volume en timestamp Unix
|
||||
volume_create_time = Gauge("netapp_volume_create_time", "Heure de création du volume en timestamp Unix", ["volume"])
|
||||
|
||||
# -------------------------------
|
||||
# Métriques pour les agrégats (tiers)
|
||||
# -------------------------------
|
||||
tier_used = Gauge("netapp_tier_used_bytes", "Espace utilisé sur un agrégat", ["tier"])
|
||||
tier_size = Gauge("netapp_tier_size_bytes", "Taille totale de l'agrégat", ["tier"])
|
||||
tier_available = Gauge("netapp_tier_available_bytes", "Espace disponible sur un agrégat", ["tier"])
|
||||
tier_usage_percent = Gauge("netapp_tier_usage_percent", "Pourcentage utilisé d'un agrégat", ["tier"])
|
||||
tier_full_threshold_percent = Gauge("netapp_tier_full_threshold_percent", "Seuil d'alerte plein d'un agrégat", ["tier"])
|
||||
tier_physical_used = Gauge("netapp_tier_physical_used_bytes", "Espace utilisé physiquement sur un agrégat", ["tier"])
|
||||
|
||||
# --------------------------
|
||||
# Métriques pour les nœuds (nodes)
|
||||
# --------------------------
|
||||
node_uptime = Gauge("netapp_node_uptime_seconds", "Uptime du nœud en secondes", ["node"])
|
||||
node_state = Gauge("netapp_node_state", "État du nœud (1 = up, 0 = down)", ["node"])
|
||||
node_cpu_count = Gauge("netapp_node_cpu_count", "Nombre de CPU dans le nœud", ["node"])
|
||||
node_memory_size = Gauge("netapp_node_memory_size_bytes", "Taille de la mémoire du nœud en octets", ["node"])
|
||||
|
||||
# --------------------------
|
||||
# Compteur pour les requêtes échouées
|
||||
# --------------------------
|
||||
failed_requests = Counter("netapp_failed_requests", "Nombre de requêtes NetApp échouées", ["endpoint"])
|
||||
|
||||
def fetch_json(url, username, password):
|
||||
"""Effectue une requête GET et renvoie le JSON en cas de succès."""
|
||||
try:
|
||||
response = requests.get(url, auth=HTTPBasicAuth(username, password), verify=False, timeout=10)
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
else:
|
||||
logging.error(f"Erreur {response.status_code} lors de la récupération de {url}")
|
||||
failed_requests.labels(endpoint=url).inc()
|
||||
except Exception as e:
|
||||
logging.error(f"Exception lors de la récupération de {url} : {e}")
|
||||
failed_requests.labels(endpoint=url).inc()
|
||||
return None
|
||||
|
||||
def collect_volume_metrics(ontap_ip, username, password):
|
||||
"""Récupère les métriques pour tous les volumes en appelant leur endpoint détaillé."""
|
||||
logging.info("Récupération des volumes détaillés...")
|
||||
volumes_list_url = f"https://{ontap_ip}/api/storage/volumes"
|
||||
data = fetch_json(volumes_list_url, username, password)
|
||||
if data is None:
|
||||
logging.error("Impossible de récupérer la liste des volumes")
|
||||
return
|
||||
|
||||
volumes = data.get("records", data)
|
||||
for vol in volumes:
|
||||
vol_name = vol.get("name", "Inconnu")
|
||||
self_href = vol.get("_links", {}).get("self", {}).get("href")
|
||||
if not self_href:
|
||||
logging.error(f"Aucun lien de détail pour le volume {vol_name}")
|
||||
continue
|
||||
vol_detail_url = f"https://{ontap_ip}{self_href}"
|
||||
vol_detail = fetch_json(vol_detail_url, username, password)
|
||||
if vol_detail is None:
|
||||
logging.error(f"Impossible de récupérer les détails pour le volume {vol_name}")
|
||||
continue
|
||||
|
||||
# Si la section "space" existe, on récupère les métriques de stockage
|
||||
space = vol_detail.get("space", {})
|
||||
if space:
|
||||
used = space.get("used", 0)
|
||||
size = space.get("size", 0)
|
||||
available = space.get("available", 0)
|
||||
percent_used = (used / size * 100) if size > 0 else 0
|
||||
volume_used.labels(volume=vol_name).set(used)
|
||||
volume_size.labels(volume=vol_name).set(size)
|
||||
volume_available.labels(volume=vol_name).set(available)
|
||||
volume_usage_percent.labels(volume=vol_name).set(percent_used)
|
||||
logging.info(f"Volume {vol_name} (space): used={used}, size={size}, available={available}, usage={percent_used:.2f}%")
|
||||
else:
|
||||
logging.info(f"Volume {vol_name} n'a pas de métriques 'space'")
|
||||
|
||||
# Récupération de la date de création et conversion en timestamp Unix
|
||||
create_time_str = vol_detail.get("create_time")
|
||||
ts = 0
|
||||
if create_time_str:
|
||||
try:
|
||||
dt = datetime.fromisoformat(create_time_str.replace("Z", "+00:00"))
|
||||
ts = dt.timestamp()
|
||||
except Exception as e:
|
||||
logging.error(f"Erreur lors du parsing de la date pour le volume {vol_name}: {create_time_str}")
|
||||
volume_create_time.labels(volume=vol_name).set(ts)
|
||||
|
||||
# Collecte des informations statiques en tant que métrique "info"
|
||||
style = vol_detail.get("style", "unknown")
|
||||
vtype = vol_detail.get("type", "unknown")
|
||||
snapshot_policy = vol_detail.get("snapshot_policy", {}).get("name", "unknown")
|
||||
svm_name = vol_detail.get("svm", {}).get("name", "unknown")
|
||||
volume_info.labels(volume=vol_name, style=style, type=vtype, snapshot_policy=snapshot_policy, svm=svm_name).set(1)
|
||||
|
||||
def collect_tier_metrics(ontap_ip, username, password):
|
||||
"""Récupère les métriques pour tous les agrégats (tiers) via leur endpoint détaillé."""
|
||||
logging.info("Récupération des agrégats détaillés...")
|
||||
aggregates_list_url = f"https://{ontap_ip}/api/storage/aggregates"
|
||||
data = fetch_json(aggregates_list_url, username, password)
|
||||
if data is None:
|
||||
logging.error("Impossible de récupérer la liste des agrégats")
|
||||
return
|
||||
|
||||
aggregates = data.get("records", data)
|
||||
for agg in aggregates:
|
||||
tier_name = agg.get("name", "Inconnu")
|
||||
self_href = agg.get("_links", {}).get("self", {}).get("href")
|
||||
if not self_href:
|
||||
logging.error(f"Aucun lien de détail pour l'agrégat {tier_name}")
|
||||
continue
|
||||
agg_detail_url = f"https://{ontap_ip}{self_href}"
|
||||
agg_detail = fetch_json(agg_detail_url, username, password)
|
||||
if agg_detail is None:
|
||||
logging.error(f"Impossible de récupérer les détails pour l'agrégat {tier_name}")
|
||||
continue
|
||||
|
||||
block_storage = agg_detail.get("space", {}).get("block_storage", {})
|
||||
used = block_storage.get("used", 0)
|
||||
size = block_storage.get("size", 0)
|
||||
available = block_storage.get("available", 0)
|
||||
percent_used = (used / size * 100) if size > 0 else 0
|
||||
|
||||
tier_used.labels(tier=tier_name).set(used)
|
||||
tier_size.labels(tier=tier_name).set(size)
|
||||
tier_available.labels(tier=tier_name).set(available)
|
||||
tier_usage_percent.labels(tier=tier_name).set(percent_used)
|
||||
|
||||
# Nouveaux champs
|
||||
full_threshold = block_storage.get("full_threshold_percent", 0)
|
||||
physical_used = block_storage.get("physical_used", 0)
|
||||
tier_full_threshold_percent.labels(tier=tier_name).set(full_threshold)
|
||||
tier_physical_used.labels(tier=tier_name).set(physical_used)
|
||||
|
||||
logging.info(f"Agrégat {tier_name}: used={used}, size={size}, available={available}, usage={percent_used:.2f}%, threshold={full_threshold}%, physical_used={physical_used}")
|
||||
|
||||
def collect_node_metrics(ontap_ip, username, password):
|
||||
"""Récupère les métriques pour tous les nœuds du cluster."""
|
||||
logging.info("Récupération des nœuds du cluster...")
|
||||
nodes_url = f"https://{ontap_ip}/api/cluster/nodes"
|
||||
data = fetch_json(nodes_url, username, password)
|
||||
if data is None:
|
||||
logging.error("Impossible de récupérer la liste des nœuds")
|
||||
return
|
||||
|
||||
nodes = data.get("records", data)
|
||||
for node in nodes:
|
||||
node_name = node.get("name", "Inconnu")
|
||||
uptime = node.get("uptime", 0)
|
||||
state_str = node.get("state", "").lower()
|
||||
state_val = 1 if state_str == "up" else 0
|
||||
cpu_count = node.get("controller", {}).get("cpu", {}).get("count", 0)
|
||||
memory_size = node.get("controller", {}).get("memory_size", 0)
|
||||
|
||||
node_uptime.labels(node=node_name).set(uptime)
|
||||
node_state.labels(node=node_name).set(state_val)
|
||||
node_cpu_count.labels(node=node_name).set(cpu_count)
|
||||
node_memory_size.labels(node=node_name).set(memory_size)
|
||||
|
||||
logging.info(f"Nœud {node_name}: uptime={uptime}, state={state_str}, cpu_count={cpu_count}, memory_size={memory_size}")
|
||||
|
||||
def collect_metrics(ontap_ip, username, password):
|
||||
"""Collecte toutes les métriques en parallèle pour volumes, agrégats et nœuds."""
|
||||
with ThreadPoolExecutor(max_workers=3) as executor:
|
||||
executor.submit(collect_volume_metrics, ontap_ip, username, password)
|
||||
executor.submit(collect_tier_metrics, ontap_ip, username, password)
|
||||
executor.submit(collect_node_metrics, ontap_ip, username, password)
|
||||
|
||||
@app.route("/metrics")
|
||||
def metrics():
|
||||
"""
|
||||
Endpoint /metrics qui :
|
||||
- Lit les paramètres de connexion depuis la query string (ou via les variables d'environnement par défaut)
|
||||
- Lance la collecte des métriques NetApp
|
||||
- Retourne les métriques au format Prometheus
|
||||
"""
|
||||
ontap_ip = request.args.get("ONTAP_IP") or os.getenv("ONTAP_IP", "default_ip")
|
||||
username = request.args.get("ONTAP_USER") or os.getenv("ONTAP_USER", "admin")
|
||||
password = request.args.get("ONTAP_PASS") or os.getenv("ONTAP_PASS", "password")
|
||||
|
||||
logging.info(f"Collecte des métriques pour ONTAP_IP={ontap_ip}")
|
||||
collect_metrics(ontap_ip, username, password)
|
||||
|
||||
output = generate_latest()
|
||||
return Response(output, mimetype="text/plain; version=0.0.4; charset=utf-8")
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(host="0.0.0.0", port=8000)
|
Loading…
x
Reference in New Issue
Block a user