remove zfs-health.sh, configure zfs-zed (#609)
Reviewed-on: #609 Co-authored-by: Michael Grote <michael.grote@posteo.de> Co-committed-by: Michael Grote <michael.grote@posteo.de>
This commit is contained in:
parent
b61a027163
commit
92eb30ccb3
11 changed files with 27 additions and 145 deletions
|
@ -6,7 +6,6 @@
|
|||
- { role: mgrote_zfs_manage_datasets, tags: "datasets" }
|
||||
- { role: mgrote_zfs_scrub, tags: "zfs_scrub" }
|
||||
- { role: mgrote_zfs_zed, tags: "zfs_zed" }
|
||||
- { role: mgrote_zfs_health, tags: "zfs_health" }
|
||||
- { role: mgrote_zfs_sanoid, tags: "sanoid" }
|
||||
- { role: mgrote_smart, tags: "smart" }
|
||||
- { role: mgrote_pbs_users, tags: "pbs_users" }
|
||||
|
|
|
@ -6,7 +6,6 @@
|
|||
- { role: mgrote_zfs_manage_datasets, tags: "datasets" }
|
||||
- { role: mgrote_zfs_scrub, tags: "zfs_scrub" }
|
||||
- { role: mgrote_zfs_zed, tags: "zfs_zed" }
|
||||
- { role: mgrote_zfs_health, tags: "zfs_health" }
|
||||
- { role: mgrote_zfs_sanoid, tags: "sanoid" }
|
||||
- { role: mgrote_smart, tags: "smart" }
|
||||
- { role: mgrote_cv4pve_autosnap, tags: "cv4pve" }
|
||||
|
|
|
@ -1,12 +0,0 @@
|
|||
## mgrote.zfs_health
|
||||
|
||||
### Beschreibung
|
||||
Richtet "zfs_health.sh", ein ZFS-Checkscript das auch Mails versendet bei Fehlern.
|
||||
|
||||
|
||||
### getestet auf
|
||||
- [x] ProxMox 6.1
|
||||
|
||||
### Variablen + Defaults
|
||||
- see [defaults](./defaults/main.yml)
|
||||
- Variablen für ``mgrote.zfs_health/trim/scrub/zed/arc_mem/`` sind zusammengefasst unter zfs_extra_*
|
|
@ -1,7 +0,0 @@
|
|||
---
|
||||
### when should the script be run
|
||||
zfs_extra_cron_minutes_zfs_health: "0"
|
||||
zfs_extra_cron_hours_zfs_health: "*"
|
||||
### under which user the script is run
|
||||
zfs_health_user_group: "root"
|
||||
zfs_health_user: "zfs-health"
|
|
@ -1,22 +0,0 @@
|
|||
---
|
||||
- name: include user tasks
|
||||
ansible.builtin.include_tasks: user.yml
|
||||
|
||||
- name: template script
|
||||
become: true
|
||||
ansible.builtin.template:
|
||||
src: zfs-health.sh
|
||||
dest: /usr/local/bin/zfs-health.sh
|
||||
mode: "0744"
|
||||
owner: "{{ zfs_health_user }}"
|
||||
group: "{{ zfs_health_user_group }}"
|
||||
|
||||
- name: ensure cronjob exists
|
||||
become: true
|
||||
ansible.builtin.cron:
|
||||
name: zfs_health
|
||||
state: present
|
||||
job: "/usr/local/bin/zfs-health.sh"
|
||||
minute: "{{ zfs_extra_cron_minutes_zfs_health }}"
|
||||
hour: "{{ zfs_extra_cron_hours_zfs_health }}"
|
||||
user: "{{ zfs_health_user }}"
|
|
@ -1,19 +0,0 @@
|
|||
---
|
||||
- name: ensure group exists
|
||||
become: true
|
||||
ansible.builtin.group:
|
||||
name: "{{ zfs_health_user_group }}"
|
||||
state: present
|
||||
when:
|
||||
- zfs_health_user_group is defined
|
||||
|
||||
- name: ensure user exists
|
||||
become: true
|
||||
ansible.builtin.user:
|
||||
name: "{{ zfs_health_user }}"
|
||||
group: "{{ zfs_health_user_group }}"
|
||||
shell: /usr/sbin/nologin
|
||||
create_home: false
|
||||
when:
|
||||
- zfs_health_user_group is defined
|
||||
- zfs_health_user is defined
|
|
@ -1,48 +0,0 @@
|
|||
#! /bin/bash
|
||||
{{ file_header | default () }}
|
||||
|
||||
problems=0
|
||||
emailSubject="`hostname` - ZFS pool - HEALTH check"
|
||||
emailMessage=""
|
||||
|
||||
# Health - Check if all zfs volumes are in good condition. We are looking for
|
||||
# any keyword signifying a degraded or broken array.
|
||||
|
||||
condition=$(/sbin/zpool status | egrep -i '(DEGRADED|FAULTED|OFFLINE|UNAVAIL|REMOVED|FAIL|DESTROYED|corrupt|cannot|unrecover)')
|
||||
if [ "${condition}" ]; then
|
||||
emailSubject="$emailSubject - fault"
|
||||
problems=1
|
||||
fi
|
||||
|
||||
|
||||
|
||||
# Errors - Check the columns for READ, WRITE and CKSUM (checksum) drive errors
|
||||
# on all volumes and all drives using "zpool status". If any non-zero errors
|
||||
# are reported an email will be sent out. You should then look to replace the
|
||||
# faulty drive and run "zpool scrub" on the affected volume after resilvering.
|
||||
|
||||
if [ ${problems} -eq 0 ]; then
|
||||
errors=$(/sbin/zpool status | grep ONLINE | grep -v state | awk '{print $3 $4 $5}' | grep -v 000)
|
||||
if [ "${errors}" ]; then
|
||||
emailSubject="$emailSubject - Drive Errors"
|
||||
problems=1
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
|
||||
# Notifications - On any problems send email with drive status information and
|
||||
# capacities including a helpful subject line to root. Also use logger to write
|
||||
# the email subject to the local logs. This is the place you may want to put
|
||||
# any other notifications like:
|
||||
#
|
||||
# + Update an anonymous twitter account with your ZFS status (https://twitter.com/zfsmonitor)
|
||||
# + Playing a sound file or beep the internal speaker
|
||||
# + Update Nagios, Cacti, Zabbix, Munin or even BigBrother
|
||||
|
||||
if [ "$problems" -ne 0 ]; then
|
||||
logger $emailSubject
|
||||
echo -e "$emailSubject \n\n\n `/sbin/zpool list` \n\n\n `/sbin/zpool status`" | mail -s "$emailSubject" {{ my_mail }}
|
||||
fi
|
||||
|
||||
### EOF ###
|
6
roles/mgrote_zfs_zed/defaults/main.yml
Normal file
6
roles/mgrote_zfs_zed/defaults/main.yml
Normal file
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
zed_time_bettween_warning_s: 3600 # in seconds
|
||||
zed_mail_to: "{{ my_mail }}"
|
||||
zed_notify_verbosity: "1" # If set to 0, suppress notification if the pool is healthy. If set to 1, send notification regardless of pool health.
|
||||
zed_notify_data: 1 # Send notifications for 'ereport.fs.zfs.data' events. Disabled by default, any non-empty value will enable the feature.
|
||||
zed_scrub_after_resilver: 1 # Run a scrub after every resilver. Disabled by default, 1 to enable and 0 to disable.
|
|
@ -1,5 +1,8 @@
|
|||
---
|
||||
- name: testmail # noqa no-changed-when
|
||||
ansible.builtin.shell:
|
||||
cmd: "set -o pipefail && echo 'zed ist eingerichtet' | mail -s '{{ ansible_hostname }} - zed' '{{ my_mail }}'"
|
||||
executable: /bin/bash
|
||||
- name: Restart zfs-zed.service
|
||||
become: true
|
||||
ansible.builtin.systemd:
|
||||
name: "zfs-zed.service"
|
||||
enabled: true
|
||||
masked: false
|
||||
state: restarted
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
---
|
||||
- name: kopiere zed.rc
|
||||
- name: Template "zed.rc"
|
||||
become: true
|
||||
ansible.builtin.template:
|
||||
owner: root
|
||||
group: root
|
||||
mode: "0600"
|
||||
src: zed.rc
|
||||
src: zed.rc.j2
|
||||
dest: /etc/zfs/zed.d/zed.rc
|
||||
notify: testmail
|
||||
notify: Restart zfs-zed.service
|
||||
|
|
|
@ -16,14 +16,14 @@
|
|||
# Email will only be sent if ZED_EMAIL_ADDR is defined.
|
||||
# Disabled by default; uncomment to enable.
|
||||
#
|
||||
ZED_EMAIL_ADDR="{{ my_mail }}"
|
||||
ZED_EMAIL_ADDR="{{ zed_mail_to }}"
|
||||
|
||||
##
|
||||
# Name or path of executable responsible for sending notifications via email;
|
||||
# the mail program must be capable of reading a message body from stdin.
|
||||
# Email will only be sent if ZED_EMAIL_ADDR is defined.
|
||||
#
|
||||
#ZED_EMAIL_PROG="mail"
|
||||
ZED_EMAIL_PROG="mail"
|
||||
|
||||
##
|
||||
# Command-line options for ZED_EMAIL_PROG.
|
||||
|
@ -32,53 +32,35 @@ ZED_EMAIL_ADDR="{{ my_mail }}"
|
|||
# this should be protected with quotes to prevent word-splitting.
|
||||
# Email will only be sent if ZED_EMAIL_ADDR is defined.
|
||||
#
|
||||
#ZED_EMAIL_OPTS="-s '@SUBJECT@' @ADDRESS@"
|
||||
ZED_EMAIL_OPTS="-s '@SUBJECT@' @ADDRESS@"
|
||||
|
||||
##
|
||||
# Default directory for zed lock files.
|
||||
#
|
||||
#ZED_LOCKDIR="/var/lock"
|
||||
ZED_LOCKDIR="/var/lock"
|
||||
|
||||
##
|
||||
# Minimum number of seconds between notifications for a similar event.
|
||||
#
|
||||
ZED_NOTIFY_INTERVAL_SECS=3600
|
||||
ZED_NOTIFY_INTERVAL_SECS={{ zed_time_bettween_warning_s }}
|
||||
|
||||
##
|
||||
# Notification verbosity.
|
||||
# If set to 0, suppress notification if the pool is healthy.
|
||||
# If set to 1, send notification regardless of pool health.
|
||||
#
|
||||
ZED_NOTIFY_VERBOSE=1
|
||||
ZED_NOTIFY_VERBOSE={{ zed_notify_verbosity }}
|
||||
|
||||
##
|
||||
# Send notifications for 'ereport.fs.zfs.data' events.
|
||||
# Disabled by default, any non-empty value will enable the feature.
|
||||
#
|
||||
ZED_NOTIFY_DATA=1
|
||||
|
||||
##
|
||||
# Pushbullet access token.
|
||||
# This grants full access to your account -- protect it accordingly!
|
||||
# <https://www.pushbullet.com/get-started>
|
||||
# <https://www.pushbullet.com/account>
|
||||
# Disabled by default; uncomment to enable.
|
||||
#
|
||||
#ZED_PUSHBULLET_ACCESS_TOKEN=""
|
||||
|
||||
##
|
||||
# Pushbullet channel tag for push notification feeds that can be subscribed to.
|
||||
# <https://www.pushbullet.com/my-channel>
|
||||
# If not defined, push notifications will instead be sent to all devices
|
||||
# associated with the account specified by the access token.
|
||||
# Disabled by default; uncomment to enable.
|
||||
#
|
||||
#ZED_PUSHBULLET_CHANNEL_TAG=""
|
||||
ZED_NOTIFY_DATA={{ zed_notify_data }}
|
||||
|
||||
##
|
||||
# Default directory for zed state files.
|
||||
#
|
||||
#ZED_RUNDIR="/var/run"
|
||||
ZED_RUNDIR="/var/run"
|
||||
|
||||
##
|
||||
# Turn on/off enclosure LEDs when drives get DEGRADED/FAULTED. This works for
|
||||
|
@ -90,7 +72,7 @@ ZED_USE_ENCLOSURE_LEDS=1
|
|||
##
|
||||
# Run a scrub after every resilver
|
||||
# Disabled by default, 1 to enable and 0 to disable.
|
||||
ZED_SCRUB_AFTER_RESILVER=1
|
||||
ZED_SCRUB_AFTER_RESILVER={{ zed_scrub_after_resilver }}
|
||||
|
||||
##
|
||||
# The syslog priority (e.g., specified as a "facility.level" pair).
|
Loading…
Reference in a new issue