From 92eb30ccb3d1179b3de30532c46a95036594f6fd Mon Sep 17 00:00:00 2001 From: Michael Grote Date: Fri, 24 Nov 2023 12:54:24 +0100 Subject: [PATCH] remove zfs-health.sh, configure zfs-zed (#609) Reviewed-on: https://git.mgrote.net/mg/homeserver/pulls/609 Co-authored-by: Michael Grote Co-committed-by: Michael Grote --- playbooks/3_service/pbs.yml | 1 - playbooks/3_service/pve.yml | 1 - roles/mgrote_zfs_health/README.md | 12 ----- roles/mgrote_zfs_health/defaults/main.yml | 7 --- roles/mgrote_zfs_health/tasks/main.yml | 22 --------- roles/mgrote_zfs_health/tasks/user.yml | 19 -------- .../mgrote_zfs_health/templates/zfs-health.sh | 48 ------------------- roles/mgrote_zfs_zed/defaults/main.yml | 6 +++ roles/mgrote_zfs_zed/handlers/main.yml | 11 +++-- roles/mgrote_zfs_zed/tasks/main.yml | 7 +-- .../templates/{zed.rc => zed.rc.j2} | 38 ++++----------- 11 files changed, 27 insertions(+), 145 deletions(-) delete mode 100644 roles/mgrote_zfs_health/README.md delete mode 100644 roles/mgrote_zfs_health/defaults/main.yml delete mode 100644 roles/mgrote_zfs_health/tasks/main.yml delete mode 100644 roles/mgrote_zfs_health/tasks/user.yml delete mode 100644 roles/mgrote_zfs_health/templates/zfs-health.sh create mode 100644 roles/mgrote_zfs_zed/defaults/main.yml rename roles/mgrote_zfs_zed/templates/{zed.rc => zed.rc.j2} (72%) diff --git a/playbooks/3_service/pbs.yml b/playbooks/3_service/pbs.yml index 7abd58fe..d4f25e4d 100644 --- a/playbooks/3_service/pbs.yml +++ b/playbooks/3_service/pbs.yml @@ -6,7 +6,6 @@ - { role: mgrote_zfs_manage_datasets, tags: "datasets" } - { role: mgrote_zfs_scrub, tags: "zfs_scrub" } - { role: mgrote_zfs_zed, tags: "zfs_zed" } - - { role: mgrote_zfs_health, tags: "zfs_health" } - { role: mgrote_zfs_sanoid, tags: "sanoid" } - { role: mgrote_smart, tags: "smart" } - { role: mgrote_pbs_users, tags: "pbs_users" } diff --git a/playbooks/3_service/pve.yml b/playbooks/3_service/pve.yml index 96b0b085..2942574e 100644 --- a/playbooks/3_service/pve.yml +++ b/playbooks/3_service/pve.yml @@ -6,7 +6,6 @@ - { role: mgrote_zfs_manage_datasets, tags: "datasets" } - { role: mgrote_zfs_scrub, tags: "zfs_scrub" } - { role: mgrote_zfs_zed, tags: "zfs_zed" } - - { role: mgrote_zfs_health, tags: "zfs_health" } - { role: mgrote_zfs_sanoid, tags: "sanoid" } - { role: mgrote_smart, tags: "smart" } - { role: mgrote_cv4pve_autosnap, tags: "cv4pve" } diff --git a/roles/mgrote_zfs_health/README.md b/roles/mgrote_zfs_health/README.md deleted file mode 100644 index 3de1f6d8..00000000 --- a/roles/mgrote_zfs_health/README.md +++ /dev/null @@ -1,12 +0,0 @@ -## mgrote.zfs_health - -### Beschreibung -Richtet "zfs_health.sh", ein ZFS-Checkscript das auch Mails versendet bei Fehlern. - - -### getestet auf -- [x] ProxMox 6.1 - -### Variablen + Defaults -- see [defaults](./defaults/main.yml) -- Variablen für ``mgrote.zfs_health/trim/scrub/zed/arc_mem/`` sind zusammengefasst unter zfs_extra_* diff --git a/roles/mgrote_zfs_health/defaults/main.yml b/roles/mgrote_zfs_health/defaults/main.yml deleted file mode 100644 index 5b4266fa..00000000 --- a/roles/mgrote_zfs_health/defaults/main.yml +++ /dev/null @@ -1,7 +0,0 @@ ---- -### when should the script be run -zfs_extra_cron_minutes_zfs_health: "0" -zfs_extra_cron_hours_zfs_health: "*" -### under which user the script is run -zfs_health_user_group: "root" -zfs_health_user: "zfs-health" diff --git a/roles/mgrote_zfs_health/tasks/main.yml b/roles/mgrote_zfs_health/tasks/main.yml deleted file mode 100644 index 55af11a5..00000000 --- a/roles/mgrote_zfs_health/tasks/main.yml +++ /dev/null @@ -1,22 +0,0 @@ ---- -- name: include user tasks - ansible.builtin.include_tasks: user.yml - -- name: template script - become: true - ansible.builtin.template: - src: zfs-health.sh - dest: /usr/local/bin/zfs-health.sh - mode: "0744" - owner: "{{ zfs_health_user }}" - group: "{{ zfs_health_user_group }}" - -- name: ensure cronjob exists - become: true - ansible.builtin.cron: - name: zfs_health - state: present - job: "/usr/local/bin/zfs-health.sh" - minute: "{{ zfs_extra_cron_minutes_zfs_health }}" - hour: "{{ zfs_extra_cron_hours_zfs_health }}" - user: "{{ zfs_health_user }}" diff --git a/roles/mgrote_zfs_health/tasks/user.yml b/roles/mgrote_zfs_health/tasks/user.yml deleted file mode 100644 index 5d1bf383..00000000 --- a/roles/mgrote_zfs_health/tasks/user.yml +++ /dev/null @@ -1,19 +0,0 @@ ---- -- name: ensure group exists - become: true - ansible.builtin.group: - name: "{{ zfs_health_user_group }}" - state: present - when: - - zfs_health_user_group is defined - -- name: ensure user exists - become: true - ansible.builtin.user: - name: "{{ zfs_health_user }}" - group: "{{ zfs_health_user_group }}" - shell: /usr/sbin/nologin - create_home: false - when: - - zfs_health_user_group is defined - - zfs_health_user is defined diff --git a/roles/mgrote_zfs_health/templates/zfs-health.sh b/roles/mgrote_zfs_health/templates/zfs-health.sh deleted file mode 100644 index 340c2f04..00000000 --- a/roles/mgrote_zfs_health/templates/zfs-health.sh +++ /dev/null @@ -1,48 +0,0 @@ -#! /bin/bash -{{ file_header | default () }} - -problems=0 -emailSubject="`hostname` - ZFS pool - HEALTH check" -emailMessage="" - -# Health - Check if all zfs volumes are in good condition. We are looking for -# any keyword signifying a degraded or broken array. - -condition=$(/sbin/zpool status | egrep -i '(DEGRADED|FAULTED|OFFLINE|UNAVAIL|REMOVED|FAIL|DESTROYED|corrupt|cannot|unrecover)') -if [ "${condition}" ]; then - emailSubject="$emailSubject - fault" - problems=1 -fi - - - -# Errors - Check the columns for READ, WRITE and CKSUM (checksum) drive errors -# on all volumes and all drives using "zpool status". If any non-zero errors -# are reported an email will be sent out. You should then look to replace the -# faulty drive and run "zpool scrub" on the affected volume after resilvering. - -if [ ${problems} -eq 0 ]; then - errors=$(/sbin/zpool status | grep ONLINE | grep -v state | awk '{print $3 $4 $5}' | grep -v 000) - if [ "${errors}" ]; then - emailSubject="$emailSubject - Drive Errors" - problems=1 - fi -fi - - - -# Notifications - On any problems send email with drive status information and -# capacities including a helpful subject line to root. Also use logger to write -# the email subject to the local logs. This is the place you may want to put -# any other notifications like: -# -# + Update an anonymous twitter account with your ZFS status (https://twitter.com/zfsmonitor) -# + Playing a sound file or beep the internal speaker -# + Update Nagios, Cacti, Zabbix, Munin or even BigBrother - -if [ "$problems" -ne 0 ]; then - logger $emailSubject - echo -e "$emailSubject \n\n\n `/sbin/zpool list` \n\n\n `/sbin/zpool status`" | mail -s "$emailSubject" {{ my_mail }} -fi - -### EOF ### diff --git a/roles/mgrote_zfs_zed/defaults/main.yml b/roles/mgrote_zfs_zed/defaults/main.yml new file mode 100644 index 00000000..3ce80847 --- /dev/null +++ b/roles/mgrote_zfs_zed/defaults/main.yml @@ -0,0 +1,6 @@ +--- +zed_time_bettween_warning_s: 3600 # in seconds +zed_mail_to: "{{ my_mail }}" +zed_notify_verbosity: "1" # If set to 0, suppress notification if the pool is healthy. If set to 1, send notification regardless of pool health. +zed_notify_data: 1 # Send notifications for 'ereport.fs.zfs.data' events. Disabled by default, any non-empty value will enable the feature. +zed_scrub_after_resilver: 1 # Run a scrub after every resilver. Disabled by default, 1 to enable and 0 to disable. diff --git a/roles/mgrote_zfs_zed/handlers/main.yml b/roles/mgrote_zfs_zed/handlers/main.yml index a7a3113c..6c872a05 100644 --- a/roles/mgrote_zfs_zed/handlers/main.yml +++ b/roles/mgrote_zfs_zed/handlers/main.yml @@ -1,5 +1,8 @@ --- -- name: testmail # noqa no-changed-when - ansible.builtin.shell: - cmd: "set -o pipefail && echo 'zed ist eingerichtet' | mail -s '{{ ansible_hostname }} - zed' '{{ my_mail }}'" - executable: /bin/bash +- name: Restart zfs-zed.service + become: true + ansible.builtin.systemd: + name: "zfs-zed.service" + enabled: true + masked: false + state: restarted diff --git a/roles/mgrote_zfs_zed/tasks/main.yml b/roles/mgrote_zfs_zed/tasks/main.yml index 69e817df..d286af4f 100644 --- a/roles/mgrote_zfs_zed/tasks/main.yml +++ b/roles/mgrote_zfs_zed/tasks/main.yml @@ -1,9 +1,10 @@ --- -- name: kopiere zed.rc +- name: Template "zed.rc" become: true ansible.builtin.template: owner: root + group: root mode: "0600" - src: zed.rc + src: zed.rc.j2 dest: /etc/zfs/zed.d/zed.rc - notify: testmail + notify: Restart zfs-zed.service diff --git a/roles/mgrote_zfs_zed/templates/zed.rc b/roles/mgrote_zfs_zed/templates/zed.rc.j2 similarity index 72% rename from roles/mgrote_zfs_zed/templates/zed.rc rename to roles/mgrote_zfs_zed/templates/zed.rc.j2 index 34687cd7..9f1d6d5e 100644 --- a/roles/mgrote_zfs_zed/templates/zed.rc +++ b/roles/mgrote_zfs_zed/templates/zed.rc.j2 @@ -16,14 +16,14 @@ # Email will only be sent if ZED_EMAIL_ADDR is defined. # Disabled by default; uncomment to enable. # -ZED_EMAIL_ADDR="{{ my_mail }}" +ZED_EMAIL_ADDR="{{ zed_mail_to }}" ## # Name or path of executable responsible for sending notifications via email; # the mail program must be capable of reading a message body from stdin. # Email will only be sent if ZED_EMAIL_ADDR is defined. # -#ZED_EMAIL_PROG="mail" +ZED_EMAIL_PROG="mail" ## # Command-line options for ZED_EMAIL_PROG. @@ -32,57 +32,39 @@ ZED_EMAIL_ADDR="{{ my_mail }}" # this should be protected with quotes to prevent word-splitting. # Email will only be sent if ZED_EMAIL_ADDR is defined. # -#ZED_EMAIL_OPTS="-s '@SUBJECT@' @ADDRESS@" +ZED_EMAIL_OPTS="-s '@SUBJECT@' @ADDRESS@" ## # Default directory for zed lock files. # -#ZED_LOCKDIR="/var/lock" +ZED_LOCKDIR="/var/lock" ## # Minimum number of seconds between notifications for a similar event. # -ZED_NOTIFY_INTERVAL_SECS=3600 +ZED_NOTIFY_INTERVAL_SECS={{ zed_time_bettween_warning_s }} ## # Notification verbosity. # If set to 0, suppress notification if the pool is healthy. # If set to 1, send notification regardless of pool health. # -ZED_NOTIFY_VERBOSE=1 +ZED_NOTIFY_VERBOSE={{ zed_notify_verbosity }} ## # Send notifications for 'ereport.fs.zfs.data' events. # Disabled by default, any non-empty value will enable the feature. # -ZED_NOTIFY_DATA=1 - -## -# Pushbullet access token. -# This grants full access to your account -- protect it accordingly! -# -# -# Disabled by default; uncomment to enable. -# -#ZED_PUSHBULLET_ACCESS_TOKEN="" - -## -# Pushbullet channel tag for push notification feeds that can be subscribed to. -# -# If not defined, push notifications will instead be sent to all devices -# associated with the account specified by the access token. -# Disabled by default; uncomment to enable. -# -#ZED_PUSHBULLET_CHANNEL_TAG="" +ZED_NOTIFY_DATA={{ zed_notify_data }} ## # Default directory for zed state files. # -#ZED_RUNDIR="/var/run" +ZED_RUNDIR="/var/run" ## # Turn on/off enclosure LEDs when drives get DEGRADED/FAULTED. This works for -# device mapper and multipath devices as well. Your enclosure must be +# device mapper and multipath devices as well. Your enclosure must be # supported by the Linux SES driver for this to work. # ZED_USE_ENCLOSURE_LEDS=1 @@ -90,7 +72,7 @@ ZED_USE_ENCLOSURE_LEDS=1 ## # Run a scrub after every resilver # Disabled by default, 1 to enable and 0 to disable. -ZED_SCRUB_AFTER_RESILVER=1 +ZED_SCRUB_AFTER_RESILVER={{ zed_scrub_after_resilver }} ## # The syslog priority (e.g., specified as a "facility.level" pair).