remove zfs-health.sh, configure zfs-zed (#609)

Reviewed-on: #609
Co-authored-by: Michael Grote <michael.grote@posteo.de>
Co-committed-by: Michael Grote <michael.grote@posteo.de>
This commit is contained in:
Michael Grote 2023-11-24 12:54:24 +01:00 committed by mg
parent b61a027163
commit 92eb30ccb3
11 changed files with 27 additions and 145 deletions

View file

@ -6,7 +6,6 @@
- { role: mgrote_zfs_manage_datasets, tags: "datasets" }
- { role: mgrote_zfs_scrub, tags: "zfs_scrub" }
- { role: mgrote_zfs_zed, tags: "zfs_zed" }
- { role: mgrote_zfs_health, tags: "zfs_health" }
- { role: mgrote_zfs_sanoid, tags: "sanoid" }
- { role: mgrote_smart, tags: "smart" }
- { role: mgrote_pbs_users, tags: "pbs_users" }

View file

@ -6,7 +6,6 @@
- { role: mgrote_zfs_manage_datasets, tags: "datasets" }
- { role: mgrote_zfs_scrub, tags: "zfs_scrub" }
- { role: mgrote_zfs_zed, tags: "zfs_zed" }
- { role: mgrote_zfs_health, tags: "zfs_health" }
- { role: mgrote_zfs_sanoid, tags: "sanoid" }
- { role: mgrote_smart, tags: "smart" }
- { role: mgrote_cv4pve_autosnap, tags: "cv4pve" }

View file

@ -1,12 +0,0 @@
## mgrote.zfs_health
### Beschreibung
Richtet "zfs_health.sh", ein ZFS-Checkscript das auch Mails versendet bei Fehlern.
### getestet auf
- [x] ProxMox 6.1
### Variablen + Defaults
- see [defaults](./defaults/main.yml)
- Variablen für ``mgrote.zfs_health/trim/scrub/zed/arc_mem/`` sind zusammengefasst unter zfs_extra_*

View file

@ -1,7 +0,0 @@
---
### when should the script be run
zfs_extra_cron_minutes_zfs_health: "0"
zfs_extra_cron_hours_zfs_health: "*"
### under which user the script is run
zfs_health_user_group: "root"
zfs_health_user: "zfs-health"

View file

@ -1,22 +0,0 @@
---
- name: include user tasks
ansible.builtin.include_tasks: user.yml
- name: template script
become: true
ansible.builtin.template:
src: zfs-health.sh
dest: /usr/local/bin/zfs-health.sh
mode: "0744"
owner: "{{ zfs_health_user }}"
group: "{{ zfs_health_user_group }}"
- name: ensure cronjob exists
become: true
ansible.builtin.cron:
name: zfs_health
state: present
job: "/usr/local/bin/zfs-health.sh"
minute: "{{ zfs_extra_cron_minutes_zfs_health }}"
hour: "{{ zfs_extra_cron_hours_zfs_health }}"
user: "{{ zfs_health_user }}"

View file

@ -1,19 +0,0 @@
---
- name: ensure group exists
become: true
ansible.builtin.group:
name: "{{ zfs_health_user_group }}"
state: present
when:
- zfs_health_user_group is defined
- name: ensure user exists
become: true
ansible.builtin.user:
name: "{{ zfs_health_user }}"
group: "{{ zfs_health_user_group }}"
shell: /usr/sbin/nologin
create_home: false
when:
- zfs_health_user_group is defined
- zfs_health_user is defined

View file

@ -1,48 +0,0 @@
#! /bin/bash
{{ file_header | default () }}
problems=0
emailSubject="`hostname` - ZFS pool - HEALTH check"
emailMessage=""
# Health - Check if all zfs volumes are in good condition. We are looking for
# any keyword signifying a degraded or broken array.
condition=$(/sbin/zpool status | egrep -i '(DEGRADED|FAULTED|OFFLINE|UNAVAIL|REMOVED|FAIL|DESTROYED|corrupt|cannot|unrecover)')
if [ "${condition}" ]; then
emailSubject="$emailSubject - fault"
problems=1
fi
# Errors - Check the columns for READ, WRITE and CKSUM (checksum) drive errors
# on all volumes and all drives using "zpool status". If any non-zero errors
# are reported an email will be sent out. You should then look to replace the
# faulty drive and run "zpool scrub" on the affected volume after resilvering.
if [ ${problems} -eq 0 ]; then
errors=$(/sbin/zpool status | grep ONLINE | grep -v state | awk '{print $3 $4 $5}' | grep -v 000)
if [ "${errors}" ]; then
emailSubject="$emailSubject - Drive Errors"
problems=1
fi
fi
# Notifications - On any problems send email with drive status information and
# capacities including a helpful subject line to root. Also use logger to write
# the email subject to the local logs. This is the place you may want to put
# any other notifications like:
#
# + Update an anonymous twitter account with your ZFS status (https://twitter.com/zfsmonitor)
# + Playing a sound file or beep the internal speaker
# + Update Nagios, Cacti, Zabbix, Munin or even BigBrother
if [ "$problems" -ne 0 ]; then
logger $emailSubject
echo -e "$emailSubject \n\n\n `/sbin/zpool list` \n\n\n `/sbin/zpool status`" | mail -s "$emailSubject" {{ my_mail }}
fi
### EOF ###

View file

@ -0,0 +1,6 @@
---
zed_time_bettween_warning_s: 3600 # in seconds
zed_mail_to: "{{ my_mail }}"
zed_notify_verbosity: "1" # If set to 0, suppress notification if the pool is healthy. If set to 1, send notification regardless of pool health.
zed_notify_data: 1 # Send notifications for 'ereport.fs.zfs.data' events. Disabled by default, any non-empty value will enable the feature.
zed_scrub_after_resilver: 1 # Run a scrub after every resilver. Disabled by default, 1 to enable and 0 to disable.

View file

@ -1,5 +1,8 @@
---
- name: testmail # noqa no-changed-when
ansible.builtin.shell:
cmd: "set -o pipefail && echo 'zed ist eingerichtet' | mail -s '{{ ansible_hostname }} - zed' '{{ my_mail }}'"
executable: /bin/bash
- name: Restart zfs-zed.service
become: true
ansible.builtin.systemd:
name: "zfs-zed.service"
enabled: true
masked: false
state: restarted

View file

@ -1,9 +1,10 @@
---
- name: kopiere zed.rc
- name: Template "zed.rc"
become: true
ansible.builtin.template:
owner: root
group: root
mode: "0600"
src: zed.rc
src: zed.rc.j2
dest: /etc/zfs/zed.d/zed.rc
notify: testmail
notify: Restart zfs-zed.service

View file

@ -16,14 +16,14 @@
# Email will only be sent if ZED_EMAIL_ADDR is defined.
# Disabled by default; uncomment to enable.
#
ZED_EMAIL_ADDR="{{ my_mail }}"
ZED_EMAIL_ADDR="{{ zed_mail_to }}"
##
# Name or path of executable responsible for sending notifications via email;
# the mail program must be capable of reading a message body from stdin.
# Email will only be sent if ZED_EMAIL_ADDR is defined.
#
#ZED_EMAIL_PROG="mail"
ZED_EMAIL_PROG="mail"
##
# Command-line options for ZED_EMAIL_PROG.
@ -32,53 +32,35 @@ ZED_EMAIL_ADDR="{{ my_mail }}"
# this should be protected with quotes to prevent word-splitting.
# Email will only be sent if ZED_EMAIL_ADDR is defined.
#
#ZED_EMAIL_OPTS="-s '@SUBJECT@' @ADDRESS@"
ZED_EMAIL_OPTS="-s '@SUBJECT@' @ADDRESS@"
##
# Default directory for zed lock files.
#
#ZED_LOCKDIR="/var/lock"
ZED_LOCKDIR="/var/lock"
##
# Minimum number of seconds between notifications for a similar event.
#
ZED_NOTIFY_INTERVAL_SECS=3600
ZED_NOTIFY_INTERVAL_SECS={{ zed_time_bettween_warning_s }}
##
# Notification verbosity.
# If set to 0, suppress notification if the pool is healthy.
# If set to 1, send notification regardless of pool health.
#
ZED_NOTIFY_VERBOSE=1
ZED_NOTIFY_VERBOSE={{ zed_notify_verbosity }}
##
# Send notifications for 'ereport.fs.zfs.data' events.
# Disabled by default, any non-empty value will enable the feature.
#
ZED_NOTIFY_DATA=1
##
# Pushbullet access token.
# This grants full access to your account -- protect it accordingly!
# <https://www.pushbullet.com/get-started>
# <https://www.pushbullet.com/account>
# Disabled by default; uncomment to enable.
#
#ZED_PUSHBULLET_ACCESS_TOKEN=""
##
# Pushbullet channel tag for push notification feeds that can be subscribed to.
# <https://www.pushbullet.com/my-channel>
# If not defined, push notifications will instead be sent to all devices
# associated with the account specified by the access token.
# Disabled by default; uncomment to enable.
#
#ZED_PUSHBULLET_CHANNEL_TAG=""
ZED_NOTIFY_DATA={{ zed_notify_data }}
##
# Default directory for zed state files.
#
#ZED_RUNDIR="/var/run"
ZED_RUNDIR="/var/run"
##
# Turn on/off enclosure LEDs when drives get DEGRADED/FAULTED. This works for
@ -90,7 +72,7 @@ ZED_USE_ENCLOSURE_LEDS=1
##
# Run a scrub after every resilver
# Disabled by default, 1 to enable and 0 to disable.
ZED_SCRUB_AFTER_RESILVER=1
ZED_SCRUB_AFTER_RESILVER={{ zed_scrub_after_resilver }}
##
# The syslog priority (e.g., specified as a "facility.level" pair).