setup prometheus alertmanager
This commit is contained in:
parent
e0129dee3b
commit
a563b41953
2 changed files with 30 additions and 170 deletions
16
flake.lock
generated
16
flake.lock
generated
|
@ -539,19 +539,11 @@
|
||||||
},
|
},
|
||||||
"nix-secrets": {
|
"nix-secrets": {
|
||||||
"locked": {
|
"locked": {
|
||||||
<<<<<<< HEAD
|
"lastModified": 1737899664,
|
||||||
"lastModified": 1738685297,
|
"narHash": "sha256-iZpzTSERNQ5UvFfEzrBLuEmcRUGjBSal7ShtXurYq8Q=",
|
||||||
"narHash": "sha256-JOv3+toYlftzBm47QF5tzaBhTbQIm1IBq1tKeQrQLyM=",
|
|
||||||
"ref": "refs/heads/master",
|
"ref": "refs/heads/master",
|
||||||
"rev": "3be1d509f9823292dd9ca6b396743fbf722bd8b9",
|
"rev": "a9844a78dcbdc8a84679835112970d80822b113c",
|
||||||
"revCount": 269,
|
"revCount": 257,
|
||||||
=======
|
|
||||||
"lastModified": 1738356588,
|
|
||||||
"narHash": "sha256-mb3P2bNaZuCz1is4NR05r2xm66n6ABQAkYLP5U5/eCY=",
|
|
||||||
"ref": "refs/heads/master",
|
|
||||||
"rev": "3ae59d3cfe419e10087da719129cca5c01b8cbcd",
|
|
||||||
"revCount": 267,
|
|
||||||
>>>>>>> 24e1bc1 (add restic-exporter to prometheus metrics)
|
|
||||||
"type": "git",
|
"type": "git",
|
||||||
"url": "ssh://git@git.bitlab21.com/sam/nix-secrets.git"
|
"url": "ssh://git@git.bitlab21.com/sam/nix-secrets.git"
|
||||||
},
|
},
|
||||||
|
|
|
@ -2,15 +2,11 @@
|
||||||
pkgs,
|
pkgs,
|
||||||
lib,
|
lib,
|
||||||
configVars,
|
configVars,
|
||||||
inputs,
|
|
||||||
...
|
...
|
||||||
}: let
|
}: let
|
||||||
containerName = "metrics-server";
|
containerName = "metrics-server";
|
||||||
containerIp = configVars.networking.addresses.metrics-server.ip;
|
containerIp = configVars.networking.addresses.metrics-server.ip;
|
||||||
|
|
||||||
notifybotJid = configVars.xmpp.notifybotJid;
|
|
||||||
receiverJid = configVars.xmpp.personalAccount;
|
|
||||||
|
|
||||||
dockerContainerIp = configVars.networking.addresses.docker.ip;
|
dockerContainerIp = configVars.networking.addresses.docker.ip;
|
||||||
smWorkerIp = configVars.networking.addresses.sm-worker.ip;
|
smWorkerIp = configVars.networking.addresses.sm-worker.ip;
|
||||||
merlinIp = configVars.networking.addresses.merlin.ip;
|
merlinIp = configVars.networking.addresses.merlin.ip;
|
||||||
|
@ -19,7 +15,6 @@
|
||||||
bitcoinNode = configVars.networking.addresses.bitcoin-node.ip;
|
bitcoinNode = configVars.networking.addresses.bitcoin-node.ip;
|
||||||
postres = configVars.networking.addresses.postgres.ip;
|
postres = configVars.networking.addresses.postgres.ip;
|
||||||
backupServer = configVars.networking.addresses.backup-server.ip;
|
backupServer = configVars.networking.addresses.backup-server.ip;
|
||||||
sops-nix = inputs.sops-nix;
|
|
||||||
|
|
||||||
http_endpoints = configVars.metrics-server.blackbox.http_endpoints;
|
http_endpoints = configVars.metrics-server.blackbox.http_endpoints;
|
||||||
|
|
||||||
|
@ -57,10 +52,6 @@ in {
|
||||||
hostPath = metricsServerContainerData;
|
hostPath = metricsServerContainerData;
|
||||||
isReadOnly = false;
|
isReadOnly = false;
|
||||||
};
|
};
|
||||||
"/etc/ssh/ssh_host_ed25519_key" = {
|
|
||||||
hostPath = "/etc/ssh/ssh_host_ed25519_key";
|
|
||||||
isReadOnly = true;
|
|
||||||
};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
config = {
|
config = {
|
||||||
|
@ -68,10 +59,7 @@ in {
|
||||||
lib,
|
lib,
|
||||||
config,
|
config,
|
||||||
...
|
...
|
||||||
}: let
|
}: {
|
||||||
secretsDirectory = builtins.toString inputs.nix-secrets;
|
|
||||||
secretsFile = "${secretsDirectory}/secrets.yaml";
|
|
||||||
in {
|
|
||||||
networking = {
|
networking = {
|
||||||
defaultGateway = "${gatewayIp}";
|
defaultGateway = "${gatewayIp}";
|
||||||
interfaces.eth0.ipv4.addresses = [
|
interfaces.eth0.ipv4.addresses = [
|
||||||
|
@ -86,35 +74,14 @@ in {
|
||||||
config.services.prometheus.port
|
config.services.prometheus.port
|
||||||
config.services.grafana.port
|
config.services.grafana.port
|
||||||
config.services.prometheus.exporters.blackbox.port
|
config.services.prometheus.exporters.blackbox.port
|
||||||
9199 #xmpp listen port
|
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
useHostResolvConf = lib.mkForce false;
|
useHostResolvConf = lib.mkForce false;
|
||||||
};
|
};
|
||||||
|
|
||||||
sops = {
|
|
||||||
defaultSopsFile = "${secretsFile}";
|
|
||||||
validateSopsFiles = false;
|
|
||||||
|
|
||||||
age = {
|
|
||||||
sshKeyPaths = ["/etc/ssh/ssh_host_ed25519_key"];
|
|
||||||
};
|
|
||||||
secrets = {
|
|
||||||
"software/restic-passphrase" = {};
|
|
||||||
"software/restic-exporter-credentials" = {};
|
|
||||||
<<<<<<< HEAD
|
|
||||||
"comms/xmpp/notifybot/password" = {
|
|
||||||
mode = "0644";
|
|
||||||
};
|
|
||||||
=======
|
|
||||||
>>>>>>> 24e1bc1 (add restic-exporter to prometheus metrics)
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
services.resolved.enable = true;
|
services.resolved.enable = true;
|
||||||
|
|
||||||
imports = [
|
imports = [
|
||||||
sops-nix.nixosModules.sops
|
|
||||||
];
|
];
|
||||||
|
|
||||||
environment.systemPackages = [
|
environment.systemPackages = [
|
||||||
|
@ -122,49 +89,9 @@ in {
|
||||||
pkgs.git
|
pkgs.git
|
||||||
];
|
];
|
||||||
|
|
||||||
services.grafana = {
|
|
||||||
enable = true;
|
|
||||||
settings.server = {
|
|
||||||
http_port = 2342;
|
|
||||||
http_addr = "0.0.0.0";
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
# main prometheus service
|
|
||||||
services.prometheus = {
|
services.prometheus = {
|
||||||
enable = true;
|
enable = true;
|
||||||
webExternalUrl = "http://${containerIp}:9001";
|
|
||||||
port = 9001;
|
port = 9001;
|
||||||
alertmanagers = [
|
|
||||||
{
|
|
||||||
scheme = "http";
|
|
||||||
path_prefix = "/";
|
|
||||||
static_configs = [
|
|
||||||
{
|
|
||||||
targets = [
|
|
||||||
"0.0.0.0:9093"
|
|
||||||
];
|
|
||||||
}
|
|
||||||
];
|
|
||||||
}
|
|
||||||
];
|
|
||||||
ruleFiles = [
|
|
||||||
"${pkgs.writeText
|
|
||||||
"alert_rule.yml"
|
|
||||||
''
|
|
||||||
groups:
|
|
||||||
- name: blackbox_alert
|
|
||||||
rules:
|
|
||||||
- alert: EndpointDown
|
|
||||||
expr: probe_success{job="blackbox"} == 0
|
|
||||||
for: 1m
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
annotations:
|
|
||||||
summary: "Endpoint {{ $labels.instance }} down"
|
|
||||||
description: "An endpoint has been down for more than 1 minute."
|
|
||||||
''}"
|
|
||||||
];
|
|
||||||
scrapeConfigs = [
|
scrapeConfigs = [
|
||||||
{
|
{
|
||||||
job_name = "node_exporter";
|
job_name = "node_exporter";
|
||||||
|
@ -183,16 +110,6 @@ in {
|
||||||
}
|
}
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
{
|
|
||||||
job_name = "restic-exporter";
|
|
||||||
static_configs = [
|
|
||||||
{
|
|
||||||
targets = [
|
|
||||||
"0.0.0.0:8001"
|
|
||||||
];
|
|
||||||
}
|
|
||||||
];
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
{
|
||||||
job_name = "blackbox";
|
job_name = "blackbox";
|
||||||
|
@ -221,41 +138,16 @@ in {
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
|
|
||||||
# setup alertmanager
|
services.grafana = {
|
||||||
services.prometheus.xmpp-alerts = {
|
|
||||||
enable = true;
|
enable = true;
|
||||||
settings = {
|
settings.server = {
|
||||||
jid = notifybotJid;
|
http_port = 2342;
|
||||||
password_command = "cat ${config.sops.secrets."comms/xmpp/notifybot/password".path}";
|
http_addr = "0.0.0.0";
|
||||||
to_jid = receiverJid;
|
|
||||||
listen_address = "0.0.0.0";
|
|
||||||
listen_port = 9199;
|
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
services.prometheus.alertmanager = {
|
|
||||||
webExternalUrl = "http://${containerIp}:9093";
|
|
||||||
enable = true;
|
|
||||||
openFirewall = true;
|
|
||||||
port = 9093;
|
|
||||||
configText = ''
|
|
||||||
global:
|
|
||||||
resolve_timeout: 1m
|
|
||||||
|
|
||||||
route:
|
services.prometheus = {
|
||||||
group_by: ['...']
|
exporters = {
|
||||||
repeat_interval: 1h
|
|
||||||
receiver: 'xmpp-alerts'
|
|
||||||
|
|
||||||
<<<<<<< HEAD
|
|
||||||
receivers:
|
|
||||||
- name: 'xmpp-alerts'
|
|
||||||
webhook_configs:
|
|
||||||
- url: 'http://0.0.0.0:9199/alert'
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
|
|
||||||
# prometheus exporters
|
|
||||||
services.prometheus.exporters = {
|
|
||||||
blackbox = {
|
blackbox = {
|
||||||
enable = true;
|
enable = true;
|
||||||
configFile = pkgs.writeText "blackbox-conf.yaml" ''
|
configFile = pkgs.writeText "blackbox-conf.yaml" ''
|
||||||
|
@ -283,30 +175,6 @@ in {
|
||||||
enabledCollectors = ["systemd"];
|
enabledCollectors = ["systemd"];
|
||||||
port = 9002;
|
port = 9002;
|
||||||
};
|
};
|
||||||
restic = {
|
|
||||||
enable = true;
|
|
||||||
repository = "";
|
|
||||||
environmentFile = config.sops.secrets."software/restic-exporter-credentials".path;
|
|
||||||
passwordFile = config.sops.secrets."software/restic-passphrase".path;
|
|
||||||
refreshInterval = 10800; # refresh every 3 hours
|
|
||||||
port = 8001;
|
|
||||||
=======
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
node = {
|
|
||||||
enable = true;
|
|
||||||
enabledCollectors = ["systemd"];
|
|
||||||
port = 9002;
|
|
||||||
};
|
|
||||||
restic = {
|
|
||||||
enable = true;
|
|
||||||
repository = "";
|
|
||||||
environmentFile = config.sops.secrets."software/restic-exporter-credentials".path;
|
|
||||||
passwordFile = config.sops.secrets."software/restic-passphrase".path;
|
|
||||||
refreshInterval = 10800; # refresh every 3 hours
|
|
||||||
port = 8001;
|
|
||||||
};
|
|
||||||
>>>>>>> 24e1bc1 (add restic-exporter to prometheus metrics)
|
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue