#!/usr/local/bin/perl
# Clone a directory on another directory
# Create/delete/update files as required, preserving timestamps.
# Usage: clonedir [-v][-n][-a] sourcedir targetdir
# -v: print messages about files copied and deleted
# -n: don't actually do any file actions.
# -a: copy everything (ignore the skip list).
# -x: cross file systems (default is to stay in same filesystem)
#
# Notes:
# (*) Use size and timestamp to see if a file needs to be updated
# (*) Preserve symbolic links (ie don't follow them)
# (*) Preserve named pipes, but skip other special files.
#
# NB: For restoring windows backups you also need sfn-fix
# together with a saved copy of the output of mdir -/ C:
# to resotre short file names after restoring the files.
# (The windows Registry is stuffed full of references 
# to files and directories by their short names.  Sigh.)
#

use strict;
use warnings;
use File::stat;
use Fcntl ':mode';

sub dir($);
sub clonedir($$);
sub copy($$);
sub remove($);
sub fast_copy_rcp($$);
sub fast_copy_sys($$);
sub df_host($);

# Top level files or directories to skip:
my @skipped = ();

my $slop = 120;	# Allowed time difference (to cope with clock differences)
my $doit = 1;
my $verbose = 0;
my $all = 0;
my $one_file_system = 1;
my $blocksize = 81920;

(my $myname = $0) =~ s|(.*/)*||;	# strip path component from name
my $Usage = "Usage: $myname [-v][-n] sourcedir targetdir\n";

# Check options:
while (@ARGV && ($ARGV[0] =~ /^-/)) {
  my $opt = shift(@ARGV);
  if ($opt eq "-v") {
    $verbose = 1;
  } elsif ($opt eq "-n") {
    $doit = 0;
  } elsif ($opt eq "-a") {
    $all = 1;
  } elsif ($opt eq "-x") {
    $one_file_system = 0;
  } else {
    die $Usage;
  }
}

# Check for two arguments:
@ARGV = qw(/home/martin /home/user1/martin) unless (@ARGV);
$ARGV[1] = "/home2/backup$ARGV[0]" if (@ARGV == 1);
die $Usage if (@ARGV != 2);

my $src = $ARGV[0];
die "Source directory `$src' not found: $!\n" unless (-d $src);
my $dst = $ARGV[1];
die "Destination directory `$dst' not found: $!\n" unless (-d $dst);
die "Destination directory `$dst' not writable: $!\n" unless (-w $dst);

my $src_host = df_host($src);
my $dst_host = df_host($dst);


my %skip = ();
foreach (@skipped) { $skip{"$src/$_"}++ };
%skip = () if $all;

my $base_dev = stat($src)->dev;
clonedir($src, $dst);

exit(0);

sub clonedir($$) {
  my ($src, $dst) = @_;
  # Check directory permissions ???
  my @src = grep { !$skip{"$src/$_"} } dir($src);
  my @dst = dir($dst);

  while (@src && @dst) {
    if ($src[0] lt $dst[0]) {
      my $file = shift(@src);
      copy("$src/$file", "$dst/$file");
    } elsif ($src[0] gt $dst[0]) {
      my $file = shift(@dst);
      remove("$dst/$file");
    } else {
      my $srcfile = "$src/" . shift(@src);
      my $dstfile = "$dst/" . shift(@dst);
      copy($srcfile, $dstfile);
    }
  }
  foreach my $file (@src) {
    copy("$src/$file", "$dst/$file");
  }
  foreach my $file (@dst) {
    remove("$dst/$file");
  }
}


sub copy($$) {
  my ($src, $dst) = @_;
  if (-l $src) {
    # Source file is a symbolic link
    # We don't bother about access modes and dates of symlinks,
    # just make sure that $dst is a symlink to the same file.
    # (NB: we don't convert absolute links "within" the cloned structure:
    # use relative links instead, if that is what you want).
    my $file = readlink($src);
    if (-l $dst && ($file eq readlink($dst))) {
      # dst is OK
    } else {
      remove($dst);
      print "Create symlink $dst\n            -> $file" if ($verbose);
      if ($doit) {
	symlink($file, $dst) or warn "Symlink $file, $dst failed: $!\n";
      }
      print "\n" if ($verbose);
    }
    return();
  }
  # Cache a single stat of both files:
  my $s = stat($src) or die "stat of `$src' failed: $!\n";
  return() if ($one_file_system && $s->dev ne $base_dev);
  my $d = stat($dst);
  if (S_ISREG($s->mode)) {
    # Source is a regular file
    if (defined($d) && S_ISREG($d->mode)
	 && ($s->size == $d->size) && (abs($s->mtime - $d->mtime) <= $slop)) {
      # Destination file is OK
      return();
    }
    print "Copy file $src\n       -> $dst" if ($verbose);
    return() unless ($doit);
    # Normal file copy:
    if (defined($d)) {
      if (S_ISREG($d->mode)) {
	unlink($dst) or warn "Can't unlink `$dst': $!\n";
      } else {
	remove($dst);
      }
    }
    if (($src_host || $dst_host) && $s->size > 1000000) {
      # Use rcp to copy large files accross the network
      # (due to the limitations of NFS):
      fast_copy_rcp($src, $dst);
    } else {
      # Use sysread/syswrite to copy small files (to avoid the fork overhead):
      fast_copy_sys($src, $dst);
      chown $s->uid, $s->gid, $dst if ($> == 0); 
      chmod $s->mode, $dst or die "Failed to chmod $dst: $!\n";
      utime $s->atime, $s->mtime, $dst or die "Failed to set utime $dst: $!\n";
    }
    print "\n" if ($verbose);
  } elsif (S_ISDIR($s->mode) && defined($d) && S_ISDIR($d->mode)) {
    # Both directories exist: clone src to dst:
    clonedir($src, $dst);
  } elsif (S_ISDIR($s->mode)) {
    remove($dst);
    print "Copy directory $src\n            -> $dst\n" if ($verbose);
    return() unless ($doit);
    mkdir $dst, 0700 or warn "\tmkdir $dst failed: $!\n";
    clonedir($src, $dst);
    chmod $s->mode, $dst;
    utime $s->atime, $s->mtime, $dst;
  } elsif (S_ISFIFO($s->mode)) {
    # Source file is a named pipe
    if (!defined($d) || !S_ISFIFO($d->mode)) {
      print "Create named pipe $dst" if ($verbose);
      return() unless ($doit);
      remove($dst);
      system "mknod $dst p";     
      chmod $s->mode, $dst;
      utime $s->atime, $s->mtime, $dst;
      print "\n" if ($verbose);
    }
  } else {
    # Character special, block special or socket:
    #warn "Files of type $src are not currently supported!\n";
  }
  return();
}


sub remove($) {
  my ($file) = @_;
  if (-l $file) {
    print "Delete symlink $file\n" if ($verbose);
    if ($doit) {
      unlink($file) or warn "Unlink of symlink `$file' failed: $!\n";
    }
    return();
  }
  return unless (-e $file);
  if (-d _) {
    print "Delete directory $file" if ($verbose);
    if ($doit) {
      system "/bin/rm", "-rf", $file;
      if (($? >> 8) > 0) { warn "Removal of directory `$file' failed: $!\n"; }
    }
    print "\n" if ($verbose);
  } else {
    print "Delete file $file\n" if ($verbose);
    if ($doit) {
      unlink $file or warn "Unlink of `$file' failed: $!\n";
    }
  }
}



# Read a directory and return a sorted list of files
# excluding . and ..
sub dir($) {
  my ($dir) = @_;
  local *DIR;
  opendir(DIR, $dir) or warn "Can't opendir `$dir': $!\n";
  my @result = sort grep { $_ ne "." && $_ ne ".." } readdir(DIR);
  closedir(DIR);
  return(@result);
}


sub fast_copy_rcp($$) {
  my ($src, $dst) = @_;
  $src = $src_host . $src;
  $dst = $dst_host . $dst;
  # We need to double-quote a remote filename: 
  $src =~ s/[ \"\'\*\?]/\\$&/g if ($src_host);
  $dst =~ s/[ \"\'\*\?]/\\$&/g if ($dst_host);
  system "rcp -p '$src' '$dst'";
  if (($? >> 8) > 0) { warn "rcp $src_host$src $dst_host$dst failed: $!\n"; }
}


sub fast_copy_sys($$) {
  my ($src, $dst) = @_;
  my ($buf, $len1, $len2);
  local (*IN, *OUT);
  open (IN,"<$src") or warn "Can't open input `$src': $!\n";
  open (OUT,">$dst") or warn "Can't open output `$dst': $!\n";
  for (;;) {
    $len1 = sysread(IN, $buf, $blocksize);
    warn "File $src read error: $!\n" unless defined($len1);
    last unless($len1);
    $len2 = syswrite(OUT, $buf, $len1);
    # Most likely cause is out of space, so die on these errors:
    die "File $dst write error: $!\n"  unless defined($len2);
    die "Only wrote $len2 of $len1 bytes to $dst\n" unless ($len2 == $len1);
  }      
  close IN;
  close OUT;
}


# Check if directory is NFS mounted (using df):
sub df_host($) {
  my ($dir) = @_;
  local $_;
  foreach (`df $dir`) {
    next if /^Filesystem/;
    return("") if /^\//;
    return($1) if /^(\w+:)\//;
  }
  die "df $dir failed: $!\n";
}
