diff --git a/INSTALL b/INSTALL index 33b510d..f0de17b 100644 --- a/INSTALL +++ b/INSTALL @@ -8,7 +8,7 @@ default for SSH transport since v1.4.6. Syncoid runs will fail if one of them is not available on either end of the transport. On Ubuntu: apt install pv lzop mbuffer -On CentOS: yum install lzo pv mbuffer lzop +On CentOS: yum install lzo pv mbuffer lzop perl-Data-Dumper On FreeBSD: pkg install pv mbuffer lzop FreeBSD notes: FreeBSD may place pv and lzop in somewhere other than diff --git a/README.md b/README.md index f51bd84..b833dec 100644 --- a/README.md +++ b/README.md @@ -118,6 +118,32 @@ If ZFS supports resumeable send/receive streams on both the source and target th As of 1.4.18, syncoid also automatically supports and enables resume of interrupted replication when both source and target support this feature. +##### Syncoid Dataset Properties + ++ syncoid:sync + + Available values: + + + `true` (default if unset) + + This dataset will be synchronised to all hosts. + + + `false` + + This dataset will not be synchronised to any hosts - it will be skipped. This can be useful for preventing certain datasets from being transferred when recursively handling a tree. + + + `host1,host2,...` + + A comma separated list of hosts. This dataset will only be synchronised by hosts listed in the property. + + _Note_: this check is performed by the host running `syncoid`, thus the local hostname must be present for inclusion during a push operation // the remote hostname must be present for a pull. + + _Note_: this will also prevent syncoid from handling the dataset if given explicitly on the command line. + + _Note_: syncing a child of a no-sync dataset will currently result in a critical error. + + _Note_: empty properties will be handled as if they were unset. + ##### Syncoid Command Line Options + [source] @@ -128,10 +154,18 @@ As of 1.4.18, syncoid also automatically supports and enables resume of interrup This is the destination dataset. It can be either local or remote. ++ --identifier= + + Adds the given identifier to the snapshot name after "syncoid_" prefix and before the hostname. This enables the use case of reliable replication to multiple targets from the same host. The following chars are allowed: a-z, A-Z, 0-9, _, -, : and . . + + -r --recursive This will also transfer child datasets. ++ --skip-parent + + This will skip the syncing of the parent dataset. Does nothing without '--recursive' option. + + --compress Currently accepted options: gzip, pigz-fast, pigz-slow, lzo (default) & none. If the selected compression method is unavailable on the source and destination, no compression will be used. diff --git a/sanoid b/sanoid index 9cd9d33..485ee08 100755 --- a/sanoid +++ b/sanoid @@ -976,6 +976,11 @@ sub check_zpool() { ## other cases my ($dev, $sta) = /^\s+(\S+)\s+(\S+)/; + if (!defined($sta)) { + # cache and logs are special and don't have a status + next; + } + ## pool online, not degraded thanks to dead/corrupted disk if ($state eq "OK" && $sta eq "UNAVAIL") { $state="WARNING"; @@ -1111,7 +1116,7 @@ sub checklock { # make sure lockfile contains something if ( -z $lockfile) { # zero size lockfile, something is wrong - die "ERROR: something is wrong! $lockfile is empty\n"; + die "ERROR: something is wrong! $lockfile is empty\n"; } # lockfile exists. read pid and mutex from it. see if it's our pid. if not, see if diff --git a/syncoid b/syncoid index 0e60680..be80f13 100755 --- a/syncoid +++ b/syncoid @@ -19,7 +19,7 @@ use Sys::Hostname; my %args = ('sshkey' => '', 'sshport' => '', 'sshcipher' => '', 'sshoption' => [], 'target-bwlimit' => '', 'source-bwlimit' => ''); GetOptions(\%args, "no-command-checks", "monitor-version", "compress=s", "dumpsnaps", "recursive|r", "source-bwlimit=s", "target-bwlimit=s", "sshkey=s", "sshport=i", "sshcipher|c=s", "sshoption|o=s@", - "debug", "quiet", "no-stream", "no-sync-snap", "no-resume", "exclude=s@") or pod2usage(2); + "debug", "quiet", "no-stream", "no-sync-snap", "no-resume", "exclude=s@", "skip-parent", "identifier=s") or pod2usage(2); my %compressargs = %{compressargset($args{'compress'} || 'default')}; # Can't be done with GetOptions arg, as default still needs to be set @@ -71,6 +71,17 @@ if (length $args{'sshkey'}) { } my $sshoptions = join " ", map { "-o " . $_ } @{$args{'sshoption'}}; # deref required +my $identifier = ""; +if (length $args{'identifier'}) { + if ($args{'identifier'} !~ /^[a-zA-Z0-9-_:.]+$/) { + # invalid extra identifier + print("CRITICAL: extra identifier contains invalid chars!\n"); + pod2usage(2); + exit 127; + } + $identifier = "$args{'identifier'}_"; +} + # figure out if source and/or target are remote. $sshcmd = "$sshcmd $args{'sshcipher'} $sshoptions $args{'sshport'} $args{'sshkey'}"; if ($debug) { print "DEBUG: SSHCMD: $sshcmd\n"; } @@ -86,6 +97,7 @@ my $targetsudocmd = $targetisroot ? '' : $sudocmd; my %avail = checkcommands(); my %snaps; +my $exitcode = 0; ## break here to call replication individually so that we ## ## can loop across children separately, for recursive ## @@ -116,7 +128,7 @@ if ($targethost ne '') { close FH; } -exit 0; +exit $exitcode; ############################################################################## ############################################################################## @@ -141,6 +153,11 @@ sub getchilddatasets { my @children = ; close FH; + if (defined $args{'skip-parent'}) { + # parent dataset is the first element + shift @children; + } + if (defined $args{'exclude'}) { my $excludes = $args{'exclude'}; foreach (@$excludes) { @@ -167,9 +184,27 @@ sub syncdataset { if ($debug) { print "DEBUG: syncing source $sourcefs to target $targetfs.\n"; } + my $sync = getzfsvalue($sourcehost,$sourcefs,$sourceisroot,'syncoid:sync'); + + if ($sync eq 'true' || $sync eq '-' || $sync eq '') { + # empty is handled the same as unset (aka: '-') + # definitely sync this dataset - if a host is called 'true' or '-', then you're special + } elsif ($sync eq 'false') { + if (!$quiet) { print "INFO: Skipping dataset (syncoid:sync=false): $sourcefs...\n"; } + return 0; + } else { + my $hostid = hostname(); + my @hosts = split(/,/,$sync); + if (!(grep $hostid eq $_, @hosts)) { + if (!$quiet) { print "INFO: Skipping dataset (syncoid:sync doesn't include $hostid): $sourcefs...\n"; } + return 0; + } + } + # make sure target is not currently in receive. if (iszfsbusy($targethost,$targetfs,$targetisroot)) { warn "Cannot sync now: $targetfs is already target of a zfs receive process.\n"; + if ($exitcode < 1) { $exitcode = 1; } return 0; } @@ -215,11 +250,16 @@ sub syncdataset { if (!defined $args{'no-sync-snap'}) { # create a new syncoid snapshot on the source filesystem. $newsyncsnap = newsyncsnap($sourcehost,$sourcefs,$sourceisroot); + if (!$newsyncsnap) { + # we already whined about the error + return 0; + } } else { # we don't want sync snapshots created, so use the newest snapshot we can find. $newsyncsnap = getnewestsnapshot($sourcehost,$sourcefs,$sourceisroot); if ($newsyncsnap eq 0) { warn "CRITICAL: no snapshots exist on source $sourcefs, and you asked for --no-sync-snap.\n"; + if ($exitcode < 1) { $exitcode = 1; } return 0; } } @@ -248,6 +288,11 @@ sub syncdataset { } my $oldestsnap = getoldestsnapshot(\%snaps); if (! $oldestsnap) { + if (defined ($args{'no-sync-snap'}) ) { + # we already whined about the missing snapshots + return 0; + } + # getoldestsnapshot() returned false, so use new sync snapshot if ($debug) { print "DEBUG: getoldestsnapshot() returned false, so using $newsyncsnap.\n"; } $oldestsnap = $newsyncsnap; @@ -276,10 +321,14 @@ sub syncdataset { # make sure target is (still) not currently in receive. if (iszfsbusy($targethost,$targetfs,$targetisroot)) { warn "Cannot sync now: $targetfs is already target of a zfs receive process.\n"; + if ($exitcode < 1) { $exitcode = 1; } return 0; } - system($synccmd) == 0 - or die "CRITICAL ERROR: $synccmd failed: $?"; + system($synccmd) == 0 or do { + warn "CRITICAL ERROR: $synccmd failed: $?"; + if ($exitcode < 2) { $exitcode = 2; } + return 0; + }; # now do an -I to the new sync snapshot, assuming there were any snapshots # other than the new sync snapshot to begin with, of course - and that we @@ -302,6 +351,7 @@ sub syncdataset { # make sure target is (still) not currently in receive. if (iszfsbusy($targethost,$targetfs,$targetisroot)) { warn "Cannot sync now: $targetfs is already target of a zfs receive process.\n"; + if ($exitcode < 1) { $exitcode = 1; } return 0; } @@ -309,9 +359,12 @@ sub syncdataset { if ($debug) { print "DEBUG: $synccmd\n"; } if ($oldestsnap ne $newsyncsnap) { - system($synccmd) == 0 - or warn "CRITICAL ERROR: $synccmd failed: $?"; + my $ret = system($synccmd); + if ($ret != 0) { + warn "CRITICAL ERROR: $synccmd failed: $?"; + if ($exitcode < 1) { $exitcode = 1; } return 0; + } } else { if (!$quiet) { print "INFO: no incremental sync needed; $oldestsnap is already the newest available snapshot.\n"; } } @@ -335,8 +388,11 @@ sub syncdataset { if (!$quiet) { print "Resuming interrupted zfs send/receive from $sourcefs to $targetfs (~ $disp_pvsize remaining):\n"; } if ($debug) { print "DEBUG: $synccmd\n"; } - system("$synccmd") == 0 - or die "CRITICAL ERROR: $synccmd failed: $?"; + system("$synccmd") == 0 or do { + warn "CRITICAL ERROR: $synccmd failed: $?"; + if ($exitcode < 2) { $exitcode = 2; } + return 0; + }; # a resumed transfer will only be done to the next snapshot, # so do an normal sync cycle @@ -364,6 +420,7 @@ sub syncdataset { # make sure target is (still) not currently in receive. if (iszfsbusy($targethost,$targetfs,$targetisroot)) { warn "Cannot sync now: $targetfs is already target of a zfs receive process.\n"; + if ($exitcode < 1) { $exitcode = 1; } return 0; } @@ -393,16 +450,20 @@ sub syncdataset { if ($debug) { print "DEBUG: $synccmd\n"; } my $output = `$synccmd 2>&1`; - my $exitcode = $?; + my $ret = $?; - if ($exitcode != 0) { + if ($ret != 0) { if (!$resume && $output =~ /\Qcontains partially-complete state\E/) { if (!$quiet) { print "Resetting partially receive state\n"; } resetreceivestate($targethost,$targetfs,$targetisroot); - system($synccmd) == 0 - or die "CRITICAL ERROR: $synccmd failed: $?"; + system("$synccmd") == 0 or do { + warn "CRITICAL ERROR: $synccmd failed: $?"; + if ($exitcode < 2) { $exitcode = 2; } + return 0; + }; } else { - die "CRITICAL ERROR: $synccmd failed: $exitcode"; + warn "CRITICAL ERROR: $synccmd failed: $ret"; + if ($exitcode < 2) { $exitcode = 2; } } } @@ -574,7 +635,7 @@ sub checkcommands { if ($debug) { print "DEBUG: checking availability of $mbuffercmd on source...\n"; } $avail{'sourcembuffer'} = `$sourcessh $lscmd $mbuffercmd 2>/dev/null`; if ($avail{'sourcembuffer'} eq '') { - print "WARN: $mbuffercmd not available on source $s - sync will continue without source buffering.\n"; + if (!$quiet) { print "WARN: $mbuffercmd not available on source $s - sync will continue without source buffering.\n"; } $avail{'sourcembuffer'} = 0; } else { $avail{'sourcembuffer'} = 1; @@ -583,7 +644,7 @@ sub checkcommands { if ($debug) { print "DEBUG: checking availability of $mbuffercmd on target...\n"; } $avail{'targetmbuffer'} = `$targetssh $lscmd $mbuffercmd 2>/dev/null`; if ($avail{'targetmbuffer'} eq '') { - print "WARN: $mbuffercmd not available on target $t - sync will continue without target buffering.\n"; + if (!$quiet) { print "WARN: $mbuffercmd not available on target $t - sync will continue without target buffering.\n"; } $avail{'targetmbuffer'} = 0; } else { $avail{'targetmbuffer'} = 1; @@ -595,14 +656,14 @@ sub checkcommands { $avail{'localmbuffer'} = `$lscmd $mbuffercmd 2>/dev/null`; if ($avail{'localmbuffer'} eq '') { $avail{'localmbuffer'} = 0; - print "WARN: $mbuffercmd not available on local machine - sync will continue without local buffering.\n"; + if (!$quiet) { print "WARN: $mbuffercmd not available on local machine - sync will continue without local buffering.\n"; } } } if ($debug) { print "DEBUG: checking availability of $pvcmd on local machine...\n"; } $avail{'localpv'} = `$lscmd $pvcmd 2>/dev/null`; if ($avail{'localpv'} eq '') { - print "WARN: $pvcmd not available on local machine - sync will continue without progress bar.\n"; + if (!$quiet) { print "WARN: $pvcmd not available on local machine - sync will continue without progress bar.\n"; } $avail{'localpv'} = 0; } else { $avail{'localpv'} = 1; @@ -610,7 +671,7 @@ sub checkcommands { # check for ZFS resume feature support if ($resume) { - my $resumechkcmd = "$zfscmd get receive_resume_token -d 0"; + my $resumechkcmd = "$zfscmd get -d 0 receive_resume_token"; if ($debug) { print "DEBUG: checking availability of zfs resume feature on source...\n"; } $avail{'sourceresume'} = system("$sourcessh $resumechkcmd >/dev/null 2>&1"); @@ -730,7 +791,7 @@ sub getoldestsnapshot { # must not have had any snapshots on source - luckily, we already made one, amirite? if (defined ($args{'no-sync-snap'}) ) { # well, actually we set --no-sync-snap, so no we *didn't* already make one. Whoops. - die "CRIT: --no-sync-snap is set, and getoldestsnapshot() could not find any snapshots on source!\n"; + warn "CRIT: --no-sync-snap is set, and getoldestsnapshot() could not find any snapshots on source!\n"; } return 0; } @@ -739,7 +800,7 @@ sub getnewestsnapshot { my $snaps = shift; foreach my $snap ( sort { $snaps{'source'}{$b}{'creation'}<=>$snaps{'source'}{$a}{'creation'} } keys %{ $snaps{'source'} }) { # return on first snap found - it's the newest - print "NEWEST SNAPSHOT: $snap\n"; + if (!$quiet) { print "NEWEST SNAPSHOT: $snap\n"; } return $snap; } # must not have had any snapshots on source - looks like we'd better create one! @@ -752,6 +813,7 @@ sub getnewestsnapshot { # we also probably need an argument to mute this WARN, for people who deliberately exclude # datasets from recursive replication this way. warn "WARN: --no-sync-snap is set, and getnewestsnapshot() could not find any snapshots on source for current dataset. Continuing.\n"; + if ($exitcode < 2) { $exitcode = 2; } } return 0; } @@ -769,7 +831,7 @@ sub buildsynccmd { $synccmd = "$sendcmd |"; # avoid confusion - accept either source-bwlimit or target-bwlimit as the bandwidth limiting option here my $bwlimit = ''; - if (length $args{'bwlimit'}) { + if (length $args{'source-bwlimit'}) { $bwlimit = $args{'source-bwlimit'}; } elsif (length $args{'target-bwlimit'}) { $bwlimit = $args{'target-bwlimit'}; @@ -850,7 +912,7 @@ sub pruneoldsyncsnaps { # only prune snaps beginning with syncoid and our own hostname foreach my $snap(@snaps) { - if ($snap =~ /^syncoid_\Q$hostid\E/) { + if ($snap =~ /^syncoid_\Q$identifier$hostid\E/) { # no matter what, we categorically refuse to # prune the new sync snap we created for this run if ($snap ne $newsyncsnap) { @@ -875,7 +937,7 @@ sub pruneoldsyncsnaps { $prunecmd = escapeshellparam($prunecmd); } system("$rhost $prunecmd") == 0 - or warn "CRITICAL ERROR: $rhost $prunecmd failed: $?"; + or warn "WARNING: $rhost $prunecmd failed: $?"; $prunecmd = ''; $counter = 0; } @@ -906,6 +968,7 @@ sub getmatchingsnapshot { } # if we got this far, we failed to find a matching snapshot. + if ($exitcode < 2) { $exitcode = 2; } print "\n"; print "CRITICAL ERROR: Target $targetfs exists but has no snapshots matching with $sourcefs!\n"; @@ -936,10 +999,14 @@ sub newsyncsnap { if ($isroot) { $mysudocmd = ''; } else { $mysudocmd = $sudocmd; } my $hostid = hostname(); my %date = getdate(); - my $snapname = "syncoid\_$hostid\_$date{'stamp'}"; + my $snapname = "syncoid\_$identifier$hostid\_$date{'stamp'}"; my $snapcmd = "$rhost $mysudocmd $zfscmd snapshot $fsescaped\@$snapname\n"; - system($snapcmd) == 0 - or die "CRITICAL ERROR: $snapcmd failed: $?"; + system($snapcmd) == 0 or do { + warn "CRITICAL ERROR: $snapcmd failed: $?"; + if ($exitcode < 2) { $exitcode = 2; } + return 0; + }; + return $snapname; } @@ -1194,7 +1261,9 @@ syncoid - ZFS snapshot replication tool Options: --compress=FORMAT Compresses data during transfer. Currently accepted options are gzip, pigz-fast, pigz-slow, lzo (default) & none + --identifier=EXTRA Extra identifier which is included in the snapshot name. Can be used for replicating to multiple targets. --recursive|r Also transfers child datasets + --skip-parent Skips syncing of the parent dataset. Does nothing without '--recursive' option. --source-bwlimit= Bandwidth limit on the source transfer --target-bwlimit= Bandwidth limit on the target transfer --no-stream Replicates using newest snapshot instead of intermediates