Merge branch 'master' into reset-resume-state

This commit is contained in:
Christoph Klaffl 2018-08-08 00:43:00 +02:00
commit a9e540ebcd
No known key found for this signature in database
GPG Key ID: FC1C525C2A47CC28
4 changed files with 136 additions and 28 deletions

View File

@ -8,7 +8,7 @@ default for SSH transport since v1.4.6. Syncoid runs will fail if one of them
is not available on either end of the transport.
On Ubuntu: apt install pv lzop mbuffer
On CentOS: yum install lzo pv mbuffer lzop
On CentOS: yum install lzo pv mbuffer lzop perl-Data-Dumper
On FreeBSD: pkg install pv mbuffer lzop
FreeBSD notes: FreeBSD may place pv and lzop in somewhere other than

View File

@ -118,6 +118,32 @@ If ZFS supports resumeable send/receive streams on both the source and target th
As of 1.4.18, syncoid also automatically supports and enables resume of interrupted replication when both source and target support this feature.
##### Syncoid Dataset Properties
+ syncoid:sync
Available values:
+ `true` (default if unset)
This dataset will be synchronised to all hosts.
+ `false`
This dataset will not be synchronised to any hosts - it will be skipped. This can be useful for preventing certain datasets from being transferred when recursively handling a tree.
+ `host1,host2,...`
A comma separated list of hosts. This dataset will only be synchronised by hosts listed in the property.
_Note_: this check is performed by the host running `syncoid`, thus the local hostname must be present for inclusion during a push operation // the remote hostname must be present for a pull.
_Note_: this will also prevent syncoid from handling the dataset if given explicitly on the command line.
_Note_: syncing a child of a no-sync dataset will currently result in a critical error.
_Note_: empty properties will be handled as if they were unset.
##### Syncoid Command Line Options
+ [source]
@ -128,10 +154,18 @@ As of 1.4.18, syncoid also automatically supports and enables resume of interrup
This is the destination dataset. It can be either local or remote.
+ --identifier=
Adds the given identifier to the snapshot name after "syncoid_" prefix and before the hostname. This enables the use case of reliable replication to multiple targets from the same host. The following chars are allowed: a-z, A-Z, 0-9, _, -, : and . .
+ -r --recursive
This will also transfer child datasets.
+ --skip-parent
This will skip the syncing of the parent dataset. Does nothing without '--recursive' option.
+ --compress <compression type>
Currently accepted options: gzip, pigz-fast, pigz-slow, lzo (default) & none. If the selected compression method is unavailable on the source and destination, no compression will be used.

7
sanoid
View File

@ -976,6 +976,11 @@ sub check_zpool() {
## other cases
my ($dev, $sta) = /^\s+(\S+)\s+(\S+)/;
if (!defined($sta)) {
# cache and logs are special and don't have a status
next;
}
## pool online, not degraded thanks to dead/corrupted disk
if ($state eq "OK" && $sta eq "UNAVAIL") {
$state="WARNING";
@ -1111,7 +1116,7 @@ sub checklock {
# make sure lockfile contains something
if ( -z $lockfile) {
# zero size lockfile, something is wrong
die "ERROR: something is wrong! $lockfile is empty\n";
die "ERROR: something is wrong! $lockfile is empty\n";
}
# lockfile exists. read pid and mutex from it. see if it's our pid. if not, see if

121
syncoid
View File

@ -19,7 +19,7 @@ use Sys::Hostname;
my %args = ('sshkey' => '', 'sshport' => '', 'sshcipher' => '', 'sshoption' => [], 'target-bwlimit' => '', 'source-bwlimit' => '');
GetOptions(\%args, "no-command-checks", "monitor-version", "compress=s", "dumpsnaps", "recursive|r",
"source-bwlimit=s", "target-bwlimit=s", "sshkey=s", "sshport=i", "sshcipher|c=s", "sshoption|o=s@",
"debug", "quiet", "no-stream", "no-sync-snap", "no-resume", "exclude=s@") or pod2usage(2);
"debug", "quiet", "no-stream", "no-sync-snap", "no-resume", "exclude=s@", "skip-parent", "identifier=s") or pod2usage(2);
my %compressargs = %{compressargset($args{'compress'} || 'default')}; # Can't be done with GetOptions arg, as default still needs to be set
@ -71,6 +71,17 @@ if (length $args{'sshkey'}) {
}
my $sshoptions = join " ", map { "-o " . $_ } @{$args{'sshoption'}}; # deref required
my $identifier = "";
if (length $args{'identifier'}) {
if ($args{'identifier'} !~ /^[a-zA-Z0-9-_:.]+$/) {
# invalid extra identifier
print("CRITICAL: extra identifier contains invalid chars!\n");
pod2usage(2);
exit 127;
}
$identifier = "$args{'identifier'}_";
}
# figure out if source and/or target are remote.
$sshcmd = "$sshcmd $args{'sshcipher'} $sshoptions $args{'sshport'} $args{'sshkey'}";
if ($debug) { print "DEBUG: SSHCMD: $sshcmd\n"; }
@ -86,6 +97,7 @@ my $targetsudocmd = $targetisroot ? '' : $sudocmd;
my %avail = checkcommands();
my %snaps;
my $exitcode = 0;
## break here to call replication individually so that we ##
## can loop across children separately, for recursive ##
@ -116,7 +128,7 @@ if ($targethost ne '') {
close FH;
}
exit 0;
exit $exitcode;
##############################################################################
##############################################################################
@ -141,6 +153,11 @@ sub getchilddatasets {
my @children = <FH>;
close FH;
if (defined $args{'skip-parent'}) {
# parent dataset is the first element
shift @children;
}
if (defined $args{'exclude'}) {
my $excludes = $args{'exclude'};
foreach (@$excludes) {
@ -167,9 +184,27 @@ sub syncdataset {
if ($debug) { print "DEBUG: syncing source $sourcefs to target $targetfs.\n"; }
my $sync = getzfsvalue($sourcehost,$sourcefs,$sourceisroot,'syncoid:sync');
if ($sync eq 'true' || $sync eq '-' || $sync eq '') {
# empty is handled the same as unset (aka: '-')
# definitely sync this dataset - if a host is called 'true' or '-', then you're special
} elsif ($sync eq 'false') {
if (!$quiet) { print "INFO: Skipping dataset (syncoid:sync=false): $sourcefs...\n"; }
return 0;
} else {
my $hostid = hostname();
my @hosts = split(/,/,$sync);
if (!(grep $hostid eq $_, @hosts)) {
if (!$quiet) { print "INFO: Skipping dataset (syncoid:sync doesn't include $hostid): $sourcefs...\n"; }
return 0;
}
}
# make sure target is not currently in receive.
if (iszfsbusy($targethost,$targetfs,$targetisroot)) {
warn "Cannot sync now: $targetfs is already target of a zfs receive process.\n";
if ($exitcode < 1) { $exitcode = 1; }
return 0;
}
@ -215,11 +250,16 @@ sub syncdataset {
if (!defined $args{'no-sync-snap'}) {
# create a new syncoid snapshot on the source filesystem.
$newsyncsnap = newsyncsnap($sourcehost,$sourcefs,$sourceisroot);
if (!$newsyncsnap) {
# we already whined about the error
return 0;
}
} else {
# we don't want sync snapshots created, so use the newest snapshot we can find.
$newsyncsnap = getnewestsnapshot($sourcehost,$sourcefs,$sourceisroot);
if ($newsyncsnap eq 0) {
warn "CRITICAL: no snapshots exist on source $sourcefs, and you asked for --no-sync-snap.\n";
if ($exitcode < 1) { $exitcode = 1; }
return 0;
}
}
@ -248,6 +288,11 @@ sub syncdataset {
}
my $oldestsnap = getoldestsnapshot(\%snaps);
if (! $oldestsnap) {
if (defined ($args{'no-sync-snap'}) ) {
# we already whined about the missing snapshots
return 0;
}
# getoldestsnapshot() returned false, so use new sync snapshot
if ($debug) { print "DEBUG: getoldestsnapshot() returned false, so using $newsyncsnap.\n"; }
$oldestsnap = $newsyncsnap;
@ -276,10 +321,14 @@ sub syncdataset {
# make sure target is (still) not currently in receive.
if (iszfsbusy($targethost,$targetfs,$targetisroot)) {
warn "Cannot sync now: $targetfs is already target of a zfs receive process.\n";
if ($exitcode < 1) { $exitcode = 1; }
return 0;
}
system($synccmd) == 0
or die "CRITICAL ERROR: $synccmd failed: $?";
system($synccmd) == 0 or do {
warn "CRITICAL ERROR: $synccmd failed: $?";
if ($exitcode < 2) { $exitcode = 2; }
return 0;
};
# now do an -I to the new sync snapshot, assuming there were any snapshots
# other than the new sync snapshot to begin with, of course - and that we
@ -302,6 +351,7 @@ sub syncdataset {
# make sure target is (still) not currently in receive.
if (iszfsbusy($targethost,$targetfs,$targetisroot)) {
warn "Cannot sync now: $targetfs is already target of a zfs receive process.\n";
if ($exitcode < 1) { $exitcode = 1; }
return 0;
}
@ -309,9 +359,12 @@ sub syncdataset {
if ($debug) { print "DEBUG: $synccmd\n"; }
if ($oldestsnap ne $newsyncsnap) {
system($synccmd) == 0
or warn "CRITICAL ERROR: $synccmd failed: $?";
my $ret = system($synccmd);
if ($ret != 0) {
warn "CRITICAL ERROR: $synccmd failed: $?";
if ($exitcode < 1) { $exitcode = 1; }
return 0;
}
} else {
if (!$quiet) { print "INFO: no incremental sync needed; $oldestsnap is already the newest available snapshot.\n"; }
}
@ -335,8 +388,11 @@ sub syncdataset {
if (!$quiet) { print "Resuming interrupted zfs send/receive from $sourcefs to $targetfs (~ $disp_pvsize remaining):\n"; }
if ($debug) { print "DEBUG: $synccmd\n"; }
system("$synccmd") == 0
or die "CRITICAL ERROR: $synccmd failed: $?";
system("$synccmd") == 0 or do {
warn "CRITICAL ERROR: $synccmd failed: $?";
if ($exitcode < 2) { $exitcode = 2; }
return 0;
};
# a resumed transfer will only be done to the next snapshot,
# so do an normal sync cycle
@ -364,6 +420,7 @@ sub syncdataset {
# make sure target is (still) not currently in receive.
if (iszfsbusy($targethost,$targetfs,$targetisroot)) {
warn "Cannot sync now: $targetfs is already target of a zfs receive process.\n";
if ($exitcode < 1) { $exitcode = 1; }
return 0;
}
@ -393,16 +450,20 @@ sub syncdataset {
if ($debug) { print "DEBUG: $synccmd\n"; }
my $output = `$synccmd 2>&1`;
my $exitcode = $?;
my $ret = $?;
if ($exitcode != 0) {
if ($ret != 0) {
if (!$resume && $output =~ /\Qcontains partially-complete state\E/) {
if (!$quiet) { print "Resetting partially receive state\n"; }
resetreceivestate($targethost,$targetfs,$targetisroot);
system($synccmd) == 0
or die "CRITICAL ERROR: $synccmd failed: $?";
system("$synccmd") == 0 or do {
warn "CRITICAL ERROR: $synccmd failed: $?";
if ($exitcode < 2) { $exitcode = 2; }
return 0;
};
} else {
die "CRITICAL ERROR: $synccmd failed: $exitcode";
warn "CRITICAL ERROR: $synccmd failed: $ret";
if ($exitcode < 2) { $exitcode = 2; }
}
}
@ -574,7 +635,7 @@ sub checkcommands {
if ($debug) { print "DEBUG: checking availability of $mbuffercmd on source...\n"; }
$avail{'sourcembuffer'} = `$sourcessh $lscmd $mbuffercmd 2>/dev/null`;
if ($avail{'sourcembuffer'} eq '') {
print "WARN: $mbuffercmd not available on source $s - sync will continue without source buffering.\n";
if (!$quiet) { print "WARN: $mbuffercmd not available on source $s - sync will continue without source buffering.\n"; }
$avail{'sourcembuffer'} = 0;
} else {
$avail{'sourcembuffer'} = 1;
@ -583,7 +644,7 @@ sub checkcommands {
if ($debug) { print "DEBUG: checking availability of $mbuffercmd on target...\n"; }
$avail{'targetmbuffer'} = `$targetssh $lscmd $mbuffercmd 2>/dev/null`;
if ($avail{'targetmbuffer'} eq '') {
print "WARN: $mbuffercmd not available on target $t - sync will continue without target buffering.\n";
if (!$quiet) { print "WARN: $mbuffercmd not available on target $t - sync will continue without target buffering.\n"; }
$avail{'targetmbuffer'} = 0;
} else {
$avail{'targetmbuffer'} = 1;
@ -595,14 +656,14 @@ sub checkcommands {
$avail{'localmbuffer'} = `$lscmd $mbuffercmd 2>/dev/null`;
if ($avail{'localmbuffer'} eq '') {
$avail{'localmbuffer'} = 0;
print "WARN: $mbuffercmd not available on local machine - sync will continue without local buffering.\n";
if (!$quiet) { print "WARN: $mbuffercmd not available on local machine - sync will continue without local buffering.\n"; }
}
}
if ($debug) { print "DEBUG: checking availability of $pvcmd on local machine...\n"; }
$avail{'localpv'} = `$lscmd $pvcmd 2>/dev/null`;
if ($avail{'localpv'} eq '') {
print "WARN: $pvcmd not available on local machine - sync will continue without progress bar.\n";
if (!$quiet) { print "WARN: $pvcmd not available on local machine - sync will continue without progress bar.\n"; }
$avail{'localpv'} = 0;
} else {
$avail{'localpv'} = 1;
@ -610,7 +671,7 @@ sub checkcommands {
# check for ZFS resume feature support
if ($resume) {
my $resumechkcmd = "$zfscmd get receive_resume_token -d 0";
my $resumechkcmd = "$zfscmd get -d 0 receive_resume_token";
if ($debug) { print "DEBUG: checking availability of zfs resume feature on source...\n"; }
$avail{'sourceresume'} = system("$sourcessh $resumechkcmd >/dev/null 2>&1");
@ -730,7 +791,7 @@ sub getoldestsnapshot {
# must not have had any snapshots on source - luckily, we already made one, amirite?
if (defined ($args{'no-sync-snap'}) ) {
# well, actually we set --no-sync-snap, so no we *didn't* already make one. Whoops.
die "CRIT: --no-sync-snap is set, and getoldestsnapshot() could not find any snapshots on source!\n";
warn "CRIT: --no-sync-snap is set, and getoldestsnapshot() could not find any snapshots on source!\n";
}
return 0;
}
@ -739,7 +800,7 @@ sub getnewestsnapshot {
my $snaps = shift;
foreach my $snap ( sort { $snaps{'source'}{$b}{'creation'}<=>$snaps{'source'}{$a}{'creation'} } keys %{ $snaps{'source'} }) {
# return on first snap found - it's the newest
print "NEWEST SNAPSHOT: $snap\n";
if (!$quiet) { print "NEWEST SNAPSHOT: $snap\n"; }
return $snap;
}
# must not have had any snapshots on source - looks like we'd better create one!
@ -752,6 +813,7 @@ sub getnewestsnapshot {
# we also probably need an argument to mute this WARN, for people who deliberately exclude
# datasets from recursive replication this way.
warn "WARN: --no-sync-snap is set, and getnewestsnapshot() could not find any snapshots on source for current dataset. Continuing.\n";
if ($exitcode < 2) { $exitcode = 2; }
}
return 0;
}
@ -769,7 +831,7 @@ sub buildsynccmd {
$synccmd = "$sendcmd |";
# avoid confusion - accept either source-bwlimit or target-bwlimit as the bandwidth limiting option here
my $bwlimit = '';
if (length $args{'bwlimit'}) {
if (length $args{'source-bwlimit'}) {
$bwlimit = $args{'source-bwlimit'};
} elsif (length $args{'target-bwlimit'}) {
$bwlimit = $args{'target-bwlimit'};
@ -850,7 +912,7 @@ sub pruneoldsyncsnaps {
# only prune snaps beginning with syncoid and our own hostname
foreach my $snap(@snaps) {
if ($snap =~ /^syncoid_\Q$hostid\E/) {
if ($snap =~ /^syncoid_\Q$identifier$hostid\E/) {
# no matter what, we categorically refuse to
# prune the new sync snap we created for this run
if ($snap ne $newsyncsnap) {
@ -875,7 +937,7 @@ sub pruneoldsyncsnaps {
$prunecmd = escapeshellparam($prunecmd);
}
system("$rhost $prunecmd") == 0
or warn "CRITICAL ERROR: $rhost $prunecmd failed: $?";
or warn "WARNING: $rhost $prunecmd failed: $?";
$prunecmd = '';
$counter = 0;
}
@ -906,6 +968,7 @@ sub getmatchingsnapshot {
}
# if we got this far, we failed to find a matching snapshot.
if ($exitcode < 2) { $exitcode = 2; }
print "\n";
print "CRITICAL ERROR: Target $targetfs exists but has no snapshots matching with $sourcefs!\n";
@ -936,10 +999,14 @@ sub newsyncsnap {
if ($isroot) { $mysudocmd = ''; } else { $mysudocmd = $sudocmd; }
my $hostid = hostname();
my %date = getdate();
my $snapname = "syncoid\_$hostid\_$date{'stamp'}";
my $snapname = "syncoid\_$identifier$hostid\_$date{'stamp'}";
my $snapcmd = "$rhost $mysudocmd $zfscmd snapshot $fsescaped\@$snapname\n";
system($snapcmd) == 0
or die "CRITICAL ERROR: $snapcmd failed: $?";
system($snapcmd) == 0 or do {
warn "CRITICAL ERROR: $snapcmd failed: $?";
if ($exitcode < 2) { $exitcode = 2; }
return 0;
};
return $snapname;
}
@ -1194,7 +1261,9 @@ syncoid - ZFS snapshot replication tool
Options:
--compress=FORMAT Compresses data during transfer. Currently accepted options are gzip, pigz-fast, pigz-slow, lzo (default) & none
--identifier=EXTRA Extra identifier which is included in the snapshot name. Can be used for replicating to multiple targets.
--recursive|r Also transfers child datasets
--skip-parent Skips syncing of the parent dataset. Does nothing without '--recursive' option.
--source-bwlimit=<limit k|m|g|t> Bandwidth limit on the source transfer
--target-bwlimit=<limit k|m|g|t> Bandwidth limit on the target transfer
--no-stream Replicates using newest snapshot instead of intermediates