setup prometheus alertmanager

This commit is contained in:
Sam 2025-02-04 16:28:59 +00:00
parent f7876d08f6
commit 996e51f56e
3 changed files with 114 additions and 44 deletions

8
flake.lock generated
View file

@ -539,11 +539,11 @@
},
"nix-secrets": {
"locked": {
"lastModified": 1738358831,
"narHash": "sha256-BFkqC7xQwGpA7mYYGDBkzw9iehWao+BkR5Bp/dFicWY=",
"lastModified": 1738685297,
"narHash": "sha256-JOv3+toYlftzBm47QF5tzaBhTbQIm1IBq1tKeQrQLyM=",
"ref": "refs/heads/master",
"rev": "e7311c8f523ad3ffe187efe63f6438140fa0cf45",
"revCount": 268,
"rev": "3be1d509f9823292dd9ca6b396743fbf722bd8b9",
"revCount": 269,
"type": "git",
"url": "ssh://git@git.bitlab21.com/sam/nix-secrets.git"
},

View file

@ -8,6 +8,9 @@
containerName = "metrics-server";
containerIp = configVars.networking.addresses.metrics-server.ip;
notifybotJid = configVars.xmpp.notifybotJid;
receiverJid = configVars.xmpp.personalAccount;
dockerContainerIp = configVars.networking.addresses.docker.ip;
smWorkerIp = configVars.networking.addresses.sm-worker.ip;
merlinIp = configVars.networking.addresses.merlin.ip;
@ -83,6 +86,7 @@ in {
config.services.prometheus.port
config.services.grafana.port
config.services.prometheus.exporters.blackbox.port
9199 #xmpp listen port
];
};
useHostResolvConf = lib.mkForce false;
@ -98,6 +102,9 @@ in {
secrets = {
"software/restic-passphrase" = {};
"software/restic-exporter-credentials" = {};
"comms/xmpp/notifybot/password" = {
mode = "0644";
};
};
};
@ -112,9 +119,48 @@ in {
pkgs.git
];
services.grafana = {
enable = true;
settings.server = {
http_port = 2342;
http_addr = "0.0.0.0";
};
};
# main prometheus service
services.prometheus = {
enable = true;
port = 9001;
alertmanagers = [
{
scheme = "http";
path_prefix = "/";
static_configs = [
{
targets = [
"0.0.0.0:9093"
];
}
];
}
];
ruleFiles = [
"${pkgs.writeText
"alert_rule.yml"
''
groups:
- name: blackbox_alert
rules:
- alert: EndpointDown
expr: probe_success{job="blackbox"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: "Endpoint {{ $labels.instance }} down"
description: "An endpoint has been down for more than 1 minute."
''}"
];
scrapeConfigs = [
{
job_name = "node_exporter";
@ -171,51 +217,74 @@ in {
];
};
services.grafana = {
# setup alertmanager
services.prometheus.xmpp-alerts = {
enable = true;
settings.server = {
http_port = 2342;
http_addr = "0.0.0.0";
settings = {
jid = notifybotJid;
password_command = "cat ${config.sops.secrets."comms/xmpp/notifybot/password".path}";
to_jid = receiverJid;
listen_address = "0.0.0.0";
listen_port = 9199;
};
};
services.prometheus.alertmanager = {
webExternalUrl = containerIp;
enable = true;
openFirewall = true;
port = 9093;
configText = ''
global:
resolve_timeout: 1m
services.prometheus = {
exporters = {
blackbox = {
enable = true;
configFile = pkgs.writeText "blackbox-conf.yaml" ''
modules:
http_basic:
prober: http
timeout: 5s
http:
preferred_ip_protocol: ip4
valid_http_versions: ["HTTP/1.1", "HTTP/2"]
method: GET
# fail_if_ssl: false
# fail_if_not_ssl: true
# tls_config:
# insecure_skip_verify: true
tcp_connect:
prober: tcp
tcp:
preferred_ip_protocol: ip4
route:
group_by: ['...']
repeat_interval: 1h
receiver: 'xmpp-alerts'
'';
};
node = {
enable = true;
enabledCollectors = ["systemd"];
port = 9002;
};
restic = {
enable = true;
repository = "";
environmentFile = config.sops.secrets."software/restic-exporter-credentials".path;
passwordFile = config.sops.secrets."software/restic-passphrase".path;
refreshInterval = 10800; # refresh every 3 hours
port = 8001;
};
receivers:
- name: 'xmpp-alerts'
webhook_configs:
- url: 'http://0.0.0.0:9199/alert'
'';
};
# prometheus exporters
services.prometheus.exporters = {
blackbox = {
enable = true;
configFile = pkgs.writeText "blackbox-conf.yaml" ''
modules:
http_basic:
prober: http
timeout: 5s
http:
preferred_ip_protocol: ip4
valid_http_versions: ["HTTP/1.1", "HTTP/2"]
method: GET
# fail_if_ssl: false
# fail_if_not_ssl: true
# tls_config:
# insecure_skip_verify: true
tcp_connect:
prober: tcp
tcp:
preferred_ip_protocol: ip4
'';
};
node = {
enable = true;
enabledCollectors = ["systemd"];
port = 9002;
};
restic = {
enable = true;
repository = "";
environmentFile = config.sops.secrets."software/restic-exporter-credentials".path;
passwordFile = config.sops.secrets."software/restic-passphrase".path;
refreshInterval = 10800; # refresh every 3 hours
port = 8001;
};
};

View file

@ -4,6 +4,7 @@
networking
email
metrics-server
xmpp
;
locations = {
mediaDataMountPoint = "/media/media";