From c8b880c5e2ebb4771fa845af8e4cd006751d8970 Mon Sep 17 00:00:00 2001 From: Christoph Klaffl Date: Wed, 21 Nov 2018 18:01:40 +0100 Subject: [PATCH 1/2] implemented clone handling (try to recreate on target instead of full replication) --- syncoid | 116 +++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 93 insertions(+), 23 deletions(-) diff --git a/syncoid b/syncoid index 30aef49..e9e3f27 100755 --- a/syncoid +++ b/syncoid @@ -104,17 +104,59 @@ my $exitcode = 0; ## replication ## if (!defined $args{'recursive'}) { - syncdataset($sourcehost, $sourcefs, $targethost, $targetfs); + syncdataset($sourcehost, $sourcefs, $targethost, $targetfs, undef); } else { if ($debug) { print "DEBUG: recursive sync of $sourcefs.\n"; } my @datasets = getchilddatasets($sourcehost, $sourcefs, $sourceisroot); - foreach my $dataset(@datasets) { + + my @deferred; + + foreach my $datasetProperties(@datasets) { + my $dataset = $datasetProperties->{'name'}; + my $origin = $datasetProperties->{'origin'}; + if ($origin eq "-") { + $origin = undef; + } else { + # check if clone source is replicated too + my @values = split(/@/, $origin, 2); + my $srcdataset = $values[0]; + + my $found = 0; + foreach my $datasetProperties(@datasets) { + if ($datasetProperties->{'name'} eq $srcdataset) { + $found = 1; + last; + } + } + + if ($found == 0) { + # clone source is not replicated, do a full replication + $origin = undef; + } else { + # clone source is replicated, defer until all non clones are replicated + push @deferred, $datasetProperties; + next; + } + } + $dataset =~ s/\Q$sourcefs\E//; chomp $dataset; my $childsourcefs = $sourcefs . $dataset; my $childtargetfs = $targetfs . $dataset; # print "syncdataset($sourcehost, $childsourcefs, $targethost, $childtargetfs); \n"; - syncdataset($sourcehost, $childsourcefs, $targethost, $childtargetfs); + syncdataset($sourcehost, $childsourcefs, $targethost, $childtargetfs, $origin); + } + + # replicate cloned datasets and if this is the initial run, recreate them on the target + foreach my $datasetProperties(@deferred) { + my $dataset = $datasetProperties->{'name'}; + my $origin = $datasetProperties->{'origin'}; + + $dataset =~ s/\Q$sourcefs\E//; + chomp $dataset; + my $childsourcefs = $sourcefs . $dataset; + my $childtargetfs = $targetfs . $dataset; + syncdataset($sourcehost, $childsourcefs, $targethost, $childtargetfs, $origin); } } @@ -147,37 +189,51 @@ sub getchilddatasets { $fsescaped = escapeshellparam($fsescaped); } - my $getchildrencmd = "$rhost $mysudocmd $zfscmd list -o name -t filesystem,volume -Hr $fsescaped |"; + my $getchildrencmd = "$rhost $mysudocmd $zfscmd list -o name,origin -t filesystem,volume -Hr $fsescaped |"; if ($debug) { print "DEBUG: getting list of child datasets on $fs using $getchildrencmd...\n"; } - open FH, $getchildrencmd; - my @children = ; - close FH; - - if (defined $args{'skip-parent'}) { - # parent dataset is the first element - shift @children; + if (! open FH, $getchildrencmd) { + die "ERROR: list command failed!\n"; } - if (defined $args{'exclude'}) { - my $excludes = $args{'exclude'}; - foreach (@$excludes) { - for my $i ( 0 .. $#children ) { - if ($children[$i] =~ /$_/) { - if ($debug) { print "DEBUG: excluded $children[$i] because of $_\n"; } - undef $children[$i] + my @children; + my $first = 1; + + DATASETS: while() { + chomp; + + if (defined $args{'skip-parent'} && $first eq 1) { + # parent dataset is the first element + $first = 0; + next; + } + + my ($dataset, $origin) = /^([^\t]+)\t([^\t]+)/; + + if (defined $args{'exclude'}) { + my $excludes = $args{'exclude'}; + foreach (@$excludes) { + print("$dataset\n"); + if ($dataset =~ /$_/) { + if ($debug) { print "DEBUG: excluded $dataset because of $_\n"; } + next DATASETS; } } - - @children = grep{ defined }@children; } + + my %properties; + $properties{'name'} = $dataset; + $properties{'origin'} = $origin; + + push @children, \%properties; } + close FH; return @children; } sub syncdataset { - my ($sourcehost, $sourcefs, $targethost, $targetfs) = @_; + my ($sourcehost, $sourcefs, $targethost, $targetfs, $origin) = @_; my $sourcefsescaped = escapeshellparam($sourcefs); my $targetfsescaped = escapeshellparam($targetfs); @@ -305,11 +361,25 @@ sub syncdataset { my $sendcmd = "$sourcesudocmd $zfscmd send $sourcefsescaped\@$oldestsnapescaped"; my $recvcmd = "$targetsudocmd $zfscmd receive $receiveextraargs -F $targetfsescaped"; - my $pvsize = getsendsize($sourcehost,"$sourcefs\@$oldestsnap",0,$sourceisroot); + my $pvsize; + if (defined $origin) { + my $originescaped = escapeshellparam($origin); + $sendcmd = "$sourcesudocmd $zfscmd send -i $originescaped $sourcefsescaped\@$oldestsnapescaped"; + my $streamargBackup = $args{'streamarg'}; + $args{'streamarg'} = "-i"; + $pvsize = getsendsize($sourcehost,$origin,"$sourcefs\@$oldestsnap",$sourceisroot); + $args{'streamarg'} = $streamargBackup; + } else { + $pvsize = getsendsize($sourcehost,"$sourcefs\@$oldestsnap",0,$sourceisroot); + } + my $disp_pvsize = readablebytes($pvsize); if ($pvsize == 0) { $disp_pvsize = 'UNKNOWN'; } my $synccmd = buildsynccmd($sendcmd,$recvcmd,$pvsize,$sourceisroot,$targetisroot); if (!$quiet) { + if (defined $origin) { + print "INFO: Clone is recreated on target $targetfs based on $origin\n"; + } if (!defined ($args{'no-stream'}) ) { print "INFO: Sending oldest full snapshot $sourcefs\@$oldestsnap (~ $disp_pvsize) to new target filesystem:\n"; } else { @@ -396,7 +466,7 @@ sub syncdataset { # a resumed transfer will only be done to the next snapshot, # so do an normal sync cycle - return syncdataset($sourcehost, $sourcefs, $targethost, $targetfs); + return syncdataset($sourcehost, $sourcefs, $targethost, $targetfs, undef); } # find most recent matching snapshot and do an -I From 9d6cb42f4d2927c34d9d6344fbadb57f9f72caa1 Mon Sep 17 00:00:00 2001 From: Christoph Klaffl Date: Wed, 21 Nov 2018 18:08:38 +0100 Subject: [PATCH 2/2] added option to disable smart clone handling --- README.md | 5 +++++ syncoid | 6 ++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b833dec..ed7a107 100644 --- a/README.md +++ b/README.md @@ -198,6 +198,11 @@ As of 1.4.18, syncoid also automatically supports and enables resume of interrup This argument tells syncoid to not use resumeable zfs send/receive streams. ++ --no-clone-handling + + This argument tells syncoid to not recreate clones on the targe on initial sync and doing a normal replication instead. + + + --dumpsnaps This prints a list of snapshots during the run. diff --git a/syncoid b/syncoid index e9e3f27..c54c915 100755 --- a/syncoid +++ b/syncoid @@ -19,7 +19,8 @@ use Sys::Hostname; my %args = ('sshkey' => '', 'sshport' => '', 'sshcipher' => '', 'sshoption' => [], 'target-bwlimit' => '', 'source-bwlimit' => ''); GetOptions(\%args, "no-command-checks", "monitor-version", "compress=s", "dumpsnaps", "recursive|r", "source-bwlimit=s", "target-bwlimit=s", "sshkey=s", "sshport=i", "sshcipher|c=s", "sshoption|o=s@", - "debug", "quiet", "no-stream", "no-sync-snap", "no-resume", "exclude=s@", "skip-parent", "identifier=s") or pod2usage(2); + "debug", "quiet", "no-stream", "no-sync-snap", "no-resume", "exclude=s@", "skip-parent", "identifier=s", + "no-clone-handling") or pod2usage(2); my %compressargs = %{compressargset($args{'compress'} || 'default')}; # Can't be done with GetOptions arg, as default still needs to be set @@ -114,7 +115,7 @@ if (!defined $args{'recursive'}) { foreach my $datasetProperties(@datasets) { my $dataset = $datasetProperties->{'name'}; my $origin = $datasetProperties->{'origin'}; - if ($origin eq "-") { + if ($origin eq "-" || defined $args{'no-clone-handling'}) { $origin = undef; } else { # check if clone source is replicated too @@ -1320,3 +1321,4 @@ Options: --dumpsnaps Dumps a list of snapshots during the run --no-command-checks Do not check command existence before attempting transfer. Not recommended --no-resume Don't use the ZFS resume feature if available + --no-clone-handling Don't try to recreate clones on target