setup prometheus alertmanager

This commit is contained in:
Sam 2025-02-04 16:28:59 +00:00
parent f7876d08f6
commit 996e51f56e
3 changed files with 114 additions and 44 deletions

8
flake.lock generated
View file

@ -539,11 +539,11 @@
}, },
"nix-secrets": { "nix-secrets": {
"locked": { "locked": {
"lastModified": 1738358831, "lastModified": 1738685297,
"narHash": "sha256-BFkqC7xQwGpA7mYYGDBkzw9iehWao+BkR5Bp/dFicWY=", "narHash": "sha256-JOv3+toYlftzBm47QF5tzaBhTbQIm1IBq1tKeQrQLyM=",
"ref": "refs/heads/master", "ref": "refs/heads/master",
"rev": "e7311c8f523ad3ffe187efe63f6438140fa0cf45", "rev": "3be1d509f9823292dd9ca6b396743fbf722bd8b9",
"revCount": 268, "revCount": 269,
"type": "git", "type": "git",
"url": "ssh://git@git.bitlab21.com/sam/nix-secrets.git" "url": "ssh://git@git.bitlab21.com/sam/nix-secrets.git"
}, },

View file

@ -8,6 +8,9 @@
containerName = "metrics-server"; containerName = "metrics-server";
containerIp = configVars.networking.addresses.metrics-server.ip; containerIp = configVars.networking.addresses.metrics-server.ip;
notifybotJid = configVars.xmpp.notifybotJid;
receiverJid = configVars.xmpp.personalAccount;
dockerContainerIp = configVars.networking.addresses.docker.ip; dockerContainerIp = configVars.networking.addresses.docker.ip;
smWorkerIp = configVars.networking.addresses.sm-worker.ip; smWorkerIp = configVars.networking.addresses.sm-worker.ip;
merlinIp = configVars.networking.addresses.merlin.ip; merlinIp = configVars.networking.addresses.merlin.ip;
@ -83,6 +86,7 @@ in {
config.services.prometheus.port config.services.prometheus.port
config.services.grafana.port config.services.grafana.port
config.services.prometheus.exporters.blackbox.port config.services.prometheus.exporters.blackbox.port
9199 #xmpp listen port
]; ];
}; };
useHostResolvConf = lib.mkForce false; useHostResolvConf = lib.mkForce false;
@ -98,6 +102,9 @@ in {
secrets = { secrets = {
"software/restic-passphrase" = {}; "software/restic-passphrase" = {};
"software/restic-exporter-credentials" = {}; "software/restic-exporter-credentials" = {};
"comms/xmpp/notifybot/password" = {
mode = "0644";
};
}; };
}; };
@ -112,9 +119,48 @@ in {
pkgs.git pkgs.git
]; ];
services.grafana = {
enable = true;
settings.server = {
http_port = 2342;
http_addr = "0.0.0.0";
};
};
# main prometheus service
services.prometheus = { services.prometheus = {
enable = true; enable = true;
port = 9001; port = 9001;
alertmanagers = [
{
scheme = "http";
path_prefix = "/";
static_configs = [
{
targets = [
"0.0.0.0:9093"
];
}
];
}
];
ruleFiles = [
"${pkgs.writeText
"alert_rule.yml"
''
groups:
- name: blackbox_alert
rules:
- alert: EndpointDown
expr: probe_success{job="blackbox"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: "Endpoint {{ $labels.instance }} down"
description: "An endpoint has been down for more than 1 minute."
''}"
];
scrapeConfigs = [ scrapeConfigs = [
{ {
job_name = "node_exporter"; job_name = "node_exporter";
@ -171,51 +217,74 @@ in {
]; ];
}; };
services.grafana = { # setup alertmanager
services.prometheus.xmpp-alerts = {
enable = true; enable = true;
settings.server = { settings = {
http_port = 2342; jid = notifybotJid;
http_addr = "0.0.0.0"; password_command = "cat ${config.sops.secrets."comms/xmpp/notifybot/password".path}";
to_jid = receiverJid;
listen_address = "0.0.0.0";
listen_port = 9199;
}; };
}; };
services.prometheus.alertmanager = {
webExternalUrl = containerIp;
enable = true;
openFirewall = true;
port = 9093;
configText = ''
global:
resolve_timeout: 1m
services.prometheus = { route:
exporters = { group_by: ['...']
blackbox = { repeat_interval: 1h
enable = true; receiver: 'xmpp-alerts'
configFile = pkgs.writeText "blackbox-conf.yaml" ''
modules:
http_basic:
prober: http
timeout: 5s
http:
preferred_ip_protocol: ip4
valid_http_versions: ["HTTP/1.1", "HTTP/2"]
method: GET
# fail_if_ssl: false
# fail_if_not_ssl: true
# tls_config:
# insecure_skip_verify: true
tcp_connect:
prober: tcp
tcp:
preferred_ip_protocol: ip4
''; receivers:
}; - name: 'xmpp-alerts'
node = { webhook_configs:
enable = true; - url: 'http://0.0.0.0:9199/alert'
enabledCollectors = ["systemd"]; '';
port = 9002; };
};
restic = { # prometheus exporters
enable = true; services.prometheus.exporters = {
repository = ""; blackbox = {
environmentFile = config.sops.secrets."software/restic-exporter-credentials".path; enable = true;
passwordFile = config.sops.secrets."software/restic-passphrase".path; configFile = pkgs.writeText "blackbox-conf.yaml" ''
refreshInterval = 10800; # refresh every 3 hours modules:
port = 8001; http_basic:
}; prober: http
timeout: 5s
http:
preferred_ip_protocol: ip4
valid_http_versions: ["HTTP/1.1", "HTTP/2"]
method: GET
# fail_if_ssl: false
# fail_if_not_ssl: true
# tls_config:
# insecure_skip_verify: true
tcp_connect:
prober: tcp
tcp:
preferred_ip_protocol: ip4
'';
};
node = {
enable = true;
enabledCollectors = ["systemd"];
port = 9002;
};
restic = {
enable = true;
repository = "";
environmentFile = config.sops.secrets."software/restic-exporter-credentials".path;
passwordFile = config.sops.secrets."software/restic-passphrase".path;
refreshInterval = 10800; # refresh every 3 hours
port = 8001;
}; };
}; };

View file

@ -4,6 +4,7 @@
networking networking
email email
metrics-server metrics-server
xmpp
; ;
locations = { locations = {
mediaDataMountPoint = "/media/media"; mediaDataMountPoint = "/media/media";