|
/etc/prometheus/alerts/alert_healthchecks.yml > Selfmonitoring
|
| Labels |
State |
Active Since |
Value |
|
alertname="SelfMonitoringAlwaysFiring"
application="leonard_healthchecks"
severity="info"
|
firing |
2026-03-04 14:20:46.797458603 +0000 UTC |
18 |
|
|
/etc/prometheus/alerts/alert_loadbalancing.yml > lowpref
|
| Labels |
State |
Active Since |
Value |
|
alertname="LowGatewayPreference"
instance="gw06n01"
job="json_gwpref"
segment="1"
severity="page"
|
pending |
2026-03-04 14:21:10.619007883 +0000 UTC |
9 |
| Annotations |
- summary
- has low gateway preference (9)
|
|
|
/etc/prometheus/alerts/blackbox-exporter.yml > BlackboxExporter
|
|
|
alert: BlackboxProbeFailed
expr: probe_success
== 0
for: 15m
labels:
severity: critical
annotations:
description: |-
Probe failed
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Blackbox probe failed (instance {{ $labels.instance }})
|
|
|
|
|
|
|
|
|
|
|
|
/etc/prometheus/alerts/general.yml > general
|
|
|
|
|
|
|
|
|
|
|
|
/etc/prometheus/alerts/node-exporter.yml > NodeExporter
|
| Labels |
State |
Active Since |
Value |
|
alertname="HostRequiresReboot"
instance="ffs13"
job="node"
nodename="ffs13"
severity="info"
|
firing |
2026-03-04 14:21:02.079934382 +0000 UTC |
1 |
| Annotations |
- description
- ffs13 requires a reboot.
VALUE = 1
LABELS = map[instance:ffs13 job:node nodename:ffs13]
- summary
- Host requires reboot (instance ffs13)
|
|
alertname="HostRequiresReboot"
instance="gw04n05"
job="node"
nodename="gw04n05"
severity="info"
|
firing |
2026-03-04 14:21:02.079934382 +0000 UTC |
1 |
| Annotations |
- description
- gw04n05 requires a reboot.
VALUE = 1
LABELS = map[instance:gw04n05 job:node nodename:gw04n05]
- summary
- Host requires reboot (instance gw04n05)
|
|
alertname="HostRequiresReboot"
instance="gitlab-runner02"
job="node"
nodename="gitlab-runner02"
severity="info"
|
firing |
2026-03-04 14:21:02.079934382 +0000 UTC |
1 |
| Annotations |
- description
- gitlab-runner02 requires a reboot.
VALUE = 1
LABELS = map[instance:gitlab-runner02 job:node nodename:gitlab-runner02]
- summary
- Host requires reboot (instance gitlab-runner02)
|
|
alertname="HostRequiresReboot"
instance="dhcp03"
job="node"
nodename="dhcp03"
severity="info"
|
firing |
2026-03-04 14:21:02.079934382 +0000 UTC |
1 |
| Annotations |
- description
- dhcp03 requires a reboot.
VALUE = 1
LABELS = map[instance:dhcp03 job:node nodename:dhcp03]
- summary
- Host requires reboot (instance dhcp03)
|
|
alertname="HostRequiresReboot"
instance="ffs14"
job="node"
nodename="ffs14"
severity="info"
|
firing |
2026-03-04 14:21:02.079934382 +0000 UTC |
1 |
| Annotations |
- description
- ffs14 requires a reboot.
VALUE = 1
LABELS = map[instance:ffs14 job:node nodename:ffs14]
- summary
- Host requires reboot (instance ffs14)
|
|
alertname="HostRequiresReboot"
instance="ffs10"
job="node"
nodename="ffs10"
severity="info"
|
firing |
2026-03-04 14:21:02.079934382 +0000 UTC |
1 |
| Annotations |
- description
- ffs10 requires a reboot.
VALUE = 1
LABELS = map[instance:ffs10 job:node nodename:ffs10]
- summary
- Host requires reboot (instance ffs10)
|
|
alertname="HostRequiresReboot"
instance="gw09n04"
job="node"
nodename="gw09n04"
severity="info"
|
firing |
2026-03-04 14:21:17.079934382 +0000 UTC |
1 |
| Annotations |
- description
- gw09n04 requires a reboot.
VALUE = 1
LABELS = map[instance:gw09n04 job:node nodename:gw09n04]
- summary
- Host requires reboot (instance gw09n04)
|
|
alertname="HostRequiresReboot"
instance="dns02"
job="node"
nodename="dns02"
severity="info"
|
pending |
2026-03-05 06:36:17.079934382 +0000 UTC |
1 |
| Annotations |
- description
- dns02 requires a reboot.
VALUE = 1
LABELS = map[instance:dns02 job:node nodename:dns02]
- summary
- Host requires reboot (instance dns02)
|
|
alertname="HostRequiresReboot"
instance="gw06n01"
job="node"
nodename="gw06n01"
severity="info"
|
firing |
2026-03-04 14:21:02.079934382 +0000 UTC |
1 |
| Annotations |
- description
- gw06n01 requires a reboot.
VALUE = 1
LABELS = map[instance:gw06n01 job:node nodename:gw06n01]
- summary
- Host requires reboot (instance gw06n01)
|
|
alertname="HostRequiresReboot"
instance="ffs11"
job="node"
nodename="ffs11"
severity="info"
|
firing |
2026-03-04 14:21:02.079934382 +0000 UTC |
1 |
| Annotations |
- description
- ffs11 requires a reboot.
VALUE = 1
LABELS = map[instance:ffs11 job:node nodename:ffs11]
- summary
- Host requires reboot (instance ffs11)
|
|
alertname="HostRequiresReboot"
instance="ffs05"
job="node"
nodename="ffs05"
severity="info"
|
firing |
2026-03-04 14:21:17.079934382 +0000 UTC |
1 |
| Annotations |
- description
- ffs05 requires a reboot.
VALUE = 1
LABELS = map[instance:ffs05 job:node nodename:ffs05]
- summary
- Host requires reboot (instance ffs05)
|
|
alertname="HostRequiresReboot"
instance="ffs08"
job="node"
nodename="ffs08"
severity="info"
|
firing |
2026-03-04 14:21:17.079934382 +0000 UTC |
1 |
| Annotations |
- description
- ffs08 requires a reboot.
VALUE = 1
LABELS = map[instance:ffs08 job:node nodename:ffs08]
- summary
- Host requires reboot (instance ffs08)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
alert: HostFilesystemDeviceError
expr: node_filesystem_device_error
== 1
for: 2m
labels:
severity: critical
annotations:
description: |-
{{ $labels.instance }}: Device error with the {{ $labels.mountpoint }} filesystem
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Host filesystem device error (instance {{ $labels.instance }})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/etc/prometheus/alerts/postfix.yml > postfix_smtp_status_deferred
|
|
|
|
/etc/prometheus/alerts/pve.yml > pve-guest-alerts
|
|
|
|
|
|
/etc/prometheus/alerts/smartctl-exporter.yml > SmartctlExporter
|
alert: SmartCriticalWarning
expr: smartctl_device_critical_warning
> 0
for: 15m
labels:
severity: critical
annotations:
description: |-
device has critical warning (instance {{ $labels.instance }})
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Smart critical warning (instance {{ $labels.instance }})
|
alert: SmartDeviceTemperatureCritical
expr: smartctl_device_temperature
> 80
for: 2m
labels:
severity: critical
annotations:
description: |-
Device temperature critical (instance {{ $labels.instance }})
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Smart device temperature critical (instance {{ $labels.instance }})
|
alert: SmartDeviceTemperatureWarning
expr: smartctl_device_temperature
> 60
for: 2m
labels:
severity: warning
annotations:
description: |-
Device temperature warning (instance {{ $labels.instance }})
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Smart device temperature warning (instance {{ $labels.instance }})
|
alert: SmartMediaErrors
expr: smartctl_device_media_errors
> 0
for: 15m
labels:
severity: critical
annotations:
description: |-
device has media errors (instance {{ $labels.instance }})
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Smart media errors (instance {{ $labels.instance }})
|
|
|