setup prometheus alertmanager

This commit is contained in:
Sam 2025-02-04 16:28:59 +00:00
parent e0129dee3b
commit a563b41953
2 changed files with 30 additions and 170 deletions

16
flake.lock generated
View file

@ -539,19 +539,11 @@
}, },
"nix-secrets": { "nix-secrets": {
"locked": { "locked": {
<<<<<<< HEAD "lastModified": 1737899664,
"lastModified": 1738685297, "narHash": "sha256-iZpzTSERNQ5UvFfEzrBLuEmcRUGjBSal7ShtXurYq8Q=",
"narHash": "sha256-JOv3+toYlftzBm47QF5tzaBhTbQIm1IBq1tKeQrQLyM=",
"ref": "refs/heads/master", "ref": "refs/heads/master",
"rev": "3be1d509f9823292dd9ca6b396743fbf722bd8b9", "rev": "a9844a78dcbdc8a84679835112970d80822b113c",
"revCount": 269, "revCount": 257,
=======
"lastModified": 1738356588,
"narHash": "sha256-mb3P2bNaZuCz1is4NR05r2xm66n6ABQAkYLP5U5/eCY=",
"ref": "refs/heads/master",
"rev": "3ae59d3cfe419e10087da719129cca5c01b8cbcd",
"revCount": 267,
>>>>>>> 24e1bc1 (add restic-exporter to prometheus metrics)
"type": "git", "type": "git",
"url": "ssh://git@git.bitlab21.com/sam/nix-secrets.git" "url": "ssh://git@git.bitlab21.com/sam/nix-secrets.git"
}, },

View file

@ -2,15 +2,11 @@
pkgs, pkgs,
lib, lib,
configVars, configVars,
inputs,
... ...
}: let }: let
containerName = "metrics-server"; containerName = "metrics-server";
containerIp = configVars.networking.addresses.metrics-server.ip; containerIp = configVars.networking.addresses.metrics-server.ip;
notifybotJid = configVars.xmpp.notifybotJid;
receiverJid = configVars.xmpp.personalAccount;
dockerContainerIp = configVars.networking.addresses.docker.ip; dockerContainerIp = configVars.networking.addresses.docker.ip;
smWorkerIp = configVars.networking.addresses.sm-worker.ip; smWorkerIp = configVars.networking.addresses.sm-worker.ip;
merlinIp = configVars.networking.addresses.merlin.ip; merlinIp = configVars.networking.addresses.merlin.ip;
@ -19,7 +15,6 @@
bitcoinNode = configVars.networking.addresses.bitcoin-node.ip; bitcoinNode = configVars.networking.addresses.bitcoin-node.ip;
postres = configVars.networking.addresses.postgres.ip; postres = configVars.networking.addresses.postgres.ip;
backupServer = configVars.networking.addresses.backup-server.ip; backupServer = configVars.networking.addresses.backup-server.ip;
sops-nix = inputs.sops-nix;
http_endpoints = configVars.metrics-server.blackbox.http_endpoints; http_endpoints = configVars.metrics-server.blackbox.http_endpoints;
@ -57,10 +52,6 @@ in {
hostPath = metricsServerContainerData; hostPath = metricsServerContainerData;
isReadOnly = false; isReadOnly = false;
}; };
"/etc/ssh/ssh_host_ed25519_key" = {
hostPath = "/etc/ssh/ssh_host_ed25519_key";
isReadOnly = true;
};
}; };
config = { config = {
@ -68,10 +59,7 @@ in {
lib, lib,
config, config,
... ...
}: let }: {
secretsDirectory = builtins.toString inputs.nix-secrets;
secretsFile = "${secretsDirectory}/secrets.yaml";
in {
networking = { networking = {
defaultGateway = "${gatewayIp}"; defaultGateway = "${gatewayIp}";
interfaces.eth0.ipv4.addresses = [ interfaces.eth0.ipv4.addresses = [
@ -86,35 +74,14 @@ in {
config.services.prometheus.port config.services.prometheus.port
config.services.grafana.port config.services.grafana.port
config.services.prometheus.exporters.blackbox.port config.services.prometheus.exporters.blackbox.port
9199 #xmpp listen port
]; ];
}; };
useHostResolvConf = lib.mkForce false; useHostResolvConf = lib.mkForce false;
}; };
sops = {
defaultSopsFile = "${secretsFile}";
validateSopsFiles = false;
age = {
sshKeyPaths = ["/etc/ssh/ssh_host_ed25519_key"];
};
secrets = {
"software/restic-passphrase" = {};
"software/restic-exporter-credentials" = {};
<<<<<<< HEAD
"comms/xmpp/notifybot/password" = {
mode = "0644";
};
=======
>>>>>>> 24e1bc1 (add restic-exporter to prometheus metrics)
};
};
services.resolved.enable = true; services.resolved.enable = true;
imports = [ imports = [
sops-nix.nixosModules.sops
]; ];
environment.systemPackages = [ environment.systemPackages = [
@ -122,49 +89,9 @@ in {
pkgs.git pkgs.git
]; ];
services.grafana = {
enable = true;
settings.server = {
http_port = 2342;
http_addr = "0.0.0.0";
};
};
# main prometheus service
services.prometheus = { services.prometheus = {
enable = true; enable = true;
webExternalUrl = "http://${containerIp}:9001";
port = 9001; port = 9001;
alertmanagers = [
{
scheme = "http";
path_prefix = "/";
static_configs = [
{
targets = [
"0.0.0.0:9093"
];
}
];
}
];
ruleFiles = [
"${pkgs.writeText
"alert_rule.yml"
''
groups:
- name: blackbox_alert
rules:
- alert: EndpointDown
expr: probe_success{job="blackbox"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: "Endpoint {{ $labels.instance }} down"
description: "An endpoint has been down for more than 1 minute."
''}"
];
scrapeConfigs = [ scrapeConfigs = [
{ {
job_name = "node_exporter"; job_name = "node_exporter";
@ -183,16 +110,6 @@ in {
} }
]; ];
} }
{
job_name = "restic-exporter";
static_configs = [
{
targets = [
"0.0.0.0:8001"
];
}
];
}
{ {
job_name = "blackbox"; job_name = "blackbox";
@ -221,41 +138,16 @@ in {
]; ];
}; };
# setup alertmanager services.grafana = {
services.prometheus.xmpp-alerts = {
enable = true; enable = true;
settings = { settings.server = {
jid = notifybotJid; http_port = 2342;
password_command = "cat ${config.sops.secrets."comms/xmpp/notifybot/password".path}"; http_addr = "0.0.0.0";
to_jid = receiverJid;
listen_address = "0.0.0.0";
listen_port = 9199;
}; };
}; };
services.prometheus.alertmanager = {
webExternalUrl = "http://${containerIp}:9093";
enable = true;
openFirewall = true;
port = 9093;
configText = ''
global:
resolve_timeout: 1m
route: services.prometheus = {
group_by: ['...'] exporters = {
repeat_interval: 1h
receiver: 'xmpp-alerts'
<<<<<<< HEAD
receivers:
- name: 'xmpp-alerts'
webhook_configs:
- url: 'http://0.0.0.0:9199/alert'
'';
};
# prometheus exporters
services.prometheus.exporters = {
blackbox = { blackbox = {
enable = true; enable = true;
configFile = pkgs.writeText "blackbox-conf.yaml" '' configFile = pkgs.writeText "blackbox-conf.yaml" ''
@ -283,30 +175,6 @@ in {
enabledCollectors = ["systemd"]; enabledCollectors = ["systemd"];
port = 9002; port = 9002;
}; };
restic = {
enable = true;
repository = "";
environmentFile = config.sops.secrets."software/restic-exporter-credentials".path;
passwordFile = config.sops.secrets."software/restic-passphrase".path;
refreshInterval = 10800; # refresh every 3 hours
port = 8001;
=======
'';
};
node = {
enable = true;
enabledCollectors = ["systemd"];
port = 9002;
};
restic = {
enable = true;
repository = "";
environmentFile = config.sops.secrets."software/restic-exporter-credentials".path;
passwordFile = config.sops.secrets."software/restic-passphrase".path;
refreshInterval = 10800; # refresh every 3 hours
port = 8001;
};
>>>>>>> 24e1bc1 (add restic-exporter to prometheus metrics)
}; };
}; };