setup prometheus alertmanager
This commit is contained in:
parent
f7876d08f6
commit
996e51f56e
3 changed files with 114 additions and 44 deletions
8
flake.lock
generated
8
flake.lock
generated
|
@ -539,11 +539,11 @@
|
||||||
},
|
},
|
||||||
"nix-secrets": {
|
"nix-secrets": {
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1738358831,
|
"lastModified": 1738685297,
|
||||||
"narHash": "sha256-BFkqC7xQwGpA7mYYGDBkzw9iehWao+BkR5Bp/dFicWY=",
|
"narHash": "sha256-JOv3+toYlftzBm47QF5tzaBhTbQIm1IBq1tKeQrQLyM=",
|
||||||
"ref": "refs/heads/master",
|
"ref": "refs/heads/master",
|
||||||
"rev": "e7311c8f523ad3ffe187efe63f6438140fa0cf45",
|
"rev": "3be1d509f9823292dd9ca6b396743fbf722bd8b9",
|
||||||
"revCount": 268,
|
"revCount": 269,
|
||||||
"type": "git",
|
"type": "git",
|
||||||
"url": "ssh://git@git.bitlab21.com/sam/nix-secrets.git"
|
"url": "ssh://git@git.bitlab21.com/sam/nix-secrets.git"
|
||||||
},
|
},
|
||||||
|
|
|
@ -8,6 +8,9 @@
|
||||||
containerName = "metrics-server";
|
containerName = "metrics-server";
|
||||||
containerIp = configVars.networking.addresses.metrics-server.ip;
|
containerIp = configVars.networking.addresses.metrics-server.ip;
|
||||||
|
|
||||||
|
notifybotJid = configVars.xmpp.notifybotJid;
|
||||||
|
receiverJid = configVars.xmpp.personalAccount;
|
||||||
|
|
||||||
dockerContainerIp = configVars.networking.addresses.docker.ip;
|
dockerContainerIp = configVars.networking.addresses.docker.ip;
|
||||||
smWorkerIp = configVars.networking.addresses.sm-worker.ip;
|
smWorkerIp = configVars.networking.addresses.sm-worker.ip;
|
||||||
merlinIp = configVars.networking.addresses.merlin.ip;
|
merlinIp = configVars.networking.addresses.merlin.ip;
|
||||||
|
@ -83,6 +86,7 @@ in {
|
||||||
config.services.prometheus.port
|
config.services.prometheus.port
|
||||||
config.services.grafana.port
|
config.services.grafana.port
|
||||||
config.services.prometheus.exporters.blackbox.port
|
config.services.prometheus.exporters.blackbox.port
|
||||||
|
9199 #xmpp listen port
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
useHostResolvConf = lib.mkForce false;
|
useHostResolvConf = lib.mkForce false;
|
||||||
|
@ -98,6 +102,9 @@ in {
|
||||||
secrets = {
|
secrets = {
|
||||||
"software/restic-passphrase" = {};
|
"software/restic-passphrase" = {};
|
||||||
"software/restic-exporter-credentials" = {};
|
"software/restic-exporter-credentials" = {};
|
||||||
|
"comms/xmpp/notifybot/password" = {
|
||||||
|
mode = "0644";
|
||||||
|
};
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -112,9 +119,48 @@ in {
|
||||||
pkgs.git
|
pkgs.git
|
||||||
];
|
];
|
||||||
|
|
||||||
|
services.grafana = {
|
||||||
|
enable = true;
|
||||||
|
settings.server = {
|
||||||
|
http_port = 2342;
|
||||||
|
http_addr = "0.0.0.0";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
# main prometheus service
|
||||||
services.prometheus = {
|
services.prometheus = {
|
||||||
enable = true;
|
enable = true;
|
||||||
port = 9001;
|
port = 9001;
|
||||||
|
alertmanagers = [
|
||||||
|
{
|
||||||
|
scheme = "http";
|
||||||
|
path_prefix = "/";
|
||||||
|
static_configs = [
|
||||||
|
{
|
||||||
|
targets = [
|
||||||
|
"0.0.0.0:9093"
|
||||||
|
];
|
||||||
|
}
|
||||||
|
];
|
||||||
|
}
|
||||||
|
];
|
||||||
|
ruleFiles = [
|
||||||
|
"${pkgs.writeText
|
||||||
|
"alert_rule.yml"
|
||||||
|
''
|
||||||
|
groups:
|
||||||
|
- name: blackbox_alert
|
||||||
|
rules:
|
||||||
|
- alert: EndpointDown
|
||||||
|
expr: probe_success{job="blackbox"} == 0
|
||||||
|
for: 1m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
annotations:
|
||||||
|
summary: "Endpoint {{ $labels.instance }} down"
|
||||||
|
description: "An endpoint has been down for more than 1 minute."
|
||||||
|
''}"
|
||||||
|
];
|
||||||
scrapeConfigs = [
|
scrapeConfigs = [
|
||||||
{
|
{
|
||||||
job_name = "node_exporter";
|
job_name = "node_exporter";
|
||||||
|
@ -171,51 +217,74 @@ in {
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
|
|
||||||
services.grafana = {
|
# setup alertmanager
|
||||||
|
services.prometheus.xmpp-alerts = {
|
||||||
enable = true;
|
enable = true;
|
||||||
settings.server = {
|
settings = {
|
||||||
http_port = 2342;
|
jid = notifybotJid;
|
||||||
http_addr = "0.0.0.0";
|
password_command = "cat ${config.sops.secrets."comms/xmpp/notifybot/password".path}";
|
||||||
|
to_jid = receiverJid;
|
||||||
|
listen_address = "0.0.0.0";
|
||||||
|
listen_port = 9199;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
services.prometheus.alertmanager = {
|
||||||
|
webExternalUrl = containerIp;
|
||||||
|
enable = true;
|
||||||
|
openFirewall = true;
|
||||||
|
port = 9093;
|
||||||
|
configText = ''
|
||||||
|
global:
|
||||||
|
resolve_timeout: 1m
|
||||||
|
|
||||||
services.prometheus = {
|
route:
|
||||||
exporters = {
|
group_by: ['...']
|
||||||
blackbox = {
|
repeat_interval: 1h
|
||||||
enable = true;
|
receiver: 'xmpp-alerts'
|
||||||
configFile = pkgs.writeText "blackbox-conf.yaml" ''
|
|
||||||
modules:
|
|
||||||
http_basic:
|
|
||||||
prober: http
|
|
||||||
timeout: 5s
|
|
||||||
http:
|
|
||||||
preferred_ip_protocol: ip4
|
|
||||||
valid_http_versions: ["HTTP/1.1", "HTTP/2"]
|
|
||||||
method: GET
|
|
||||||
# fail_if_ssl: false
|
|
||||||
# fail_if_not_ssl: true
|
|
||||||
# tls_config:
|
|
||||||
# insecure_skip_verify: true
|
|
||||||
tcp_connect:
|
|
||||||
prober: tcp
|
|
||||||
tcp:
|
|
||||||
preferred_ip_protocol: ip4
|
|
||||||
|
|
||||||
'';
|
receivers:
|
||||||
};
|
- name: 'xmpp-alerts'
|
||||||
node = {
|
webhook_configs:
|
||||||
enable = true;
|
- url: 'http://0.0.0.0:9199/alert'
|
||||||
enabledCollectors = ["systemd"];
|
'';
|
||||||
port = 9002;
|
};
|
||||||
};
|
|
||||||
restic = {
|
# prometheus exporters
|
||||||
enable = true;
|
services.prometheus.exporters = {
|
||||||
repository = "";
|
blackbox = {
|
||||||
environmentFile = config.sops.secrets."software/restic-exporter-credentials".path;
|
enable = true;
|
||||||
passwordFile = config.sops.secrets."software/restic-passphrase".path;
|
configFile = pkgs.writeText "blackbox-conf.yaml" ''
|
||||||
refreshInterval = 10800; # refresh every 3 hours
|
modules:
|
||||||
port = 8001;
|
http_basic:
|
||||||
};
|
prober: http
|
||||||
|
timeout: 5s
|
||||||
|
http:
|
||||||
|
preferred_ip_protocol: ip4
|
||||||
|
valid_http_versions: ["HTTP/1.1", "HTTP/2"]
|
||||||
|
method: GET
|
||||||
|
# fail_if_ssl: false
|
||||||
|
# fail_if_not_ssl: true
|
||||||
|
# tls_config:
|
||||||
|
# insecure_skip_verify: true
|
||||||
|
tcp_connect:
|
||||||
|
prober: tcp
|
||||||
|
tcp:
|
||||||
|
preferred_ip_protocol: ip4
|
||||||
|
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
node = {
|
||||||
|
enable = true;
|
||||||
|
enabledCollectors = ["systemd"];
|
||||||
|
port = 9002;
|
||||||
|
};
|
||||||
|
restic = {
|
||||||
|
enable = true;
|
||||||
|
repository = "";
|
||||||
|
environmentFile = config.sops.secrets."software/restic-exporter-credentials".path;
|
||||||
|
passwordFile = config.sops.secrets."software/restic-passphrase".path;
|
||||||
|
refreshInterval = 10800; # refresh every 3 hours
|
||||||
|
port = 8001;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
networking
|
networking
|
||||||
email
|
email
|
||||||
metrics-server
|
metrics-server
|
||||||
|
xmpp
|
||||||
;
|
;
|
||||||
locations = {
|
locations = {
|
||||||
mediaDataMountPoint = "/media/media";
|
mediaDataMountPoint = "/media/media";
|
||||||
|
|
Loading…
Add table
Reference in a new issue