#!/bin/bash
# Author: Theodore Zacharia
# V1.1 - 07/03/2019 - add more levels of copying
# V1.5 - 11/03/2019 - update with copy direction for more flexibility
# V1.6 - 02/05/2019 - update to support checksum vs date skipping of files, plus
#                     switch garbage collection from -c to -g flag
# V1.7 - 24/05/2019 - add tidying up features
# V1.8 - 09/04/2020 - add -D flag for days back, for speed
# V1.9 - 11/09/2020 - use config file to define drives
# V2.0 - 10/09/2022 - --noatime is deprecated, move to --open-noatime (need to find switch solution)
# V2.1 - 22/05/2024 - Added some help text to track rsync when doing lots of work
#
# synchronise two drives based on some set of directories
# NOTE: If there is an error on Linux with remote being read-only do the following:
# sudo dosfsck -a /dev/sdb1
#
# -a for actual run
# -1,-2,-3,-4,-5 for level of directories to cover
# l to use local (e.g. c) as master and ONLY from c to e
# r to use remote (e.g. e as master) and ONLY from e to c
# otherwise uses both drives
#
# The script will autmatically create a link in the users home called edrive, this will
# be targetted at the first USB storage device found, which will then be used to store data
#
# Originally needed to "ln -s /media/$USERNAME/STORE\ N\ GO/ ~/storengo" on Linux
#
# Add option to clean the GIT directories before copying
#
# Strategies for speedy copying:
# If remote device/drive handles timestamps differently to local it is possible the time
# based check will not be as accurate as the checksum based check, you could end up copying
# too many files.
# Use -c if your local device is fast and the remote is slow and you are using local as master
# the -c option is slower, but not as slow as copying too many files to a slower device
#

# ***** Globals
UNIXTYPE=`uname`
THISHOST=`hostname`
USERNAME=`whoami`
DOTIDY=0
FEAT1=""
FEAT2=""
DAYSBACK=0
DODELETE=""
TF=$$.list
MODWINDOW="--modify-window=5"
VERBOSE="-v"

# ***** Functions
_cleanexit ()
{
	if [ $# -gt 0 ]
	then
		echo "exit trapped, cleaning up first"
	fi

	rm -f $TF

	if [ $# -gt 0 ]
	then
		exit 1
	fi
}

_checklinks ()
{
	# check for existing link for edrive to external media
	if [ -L /home/$USERNAME/edrive ]
	then
		EXTDRIVE=$(df | grep /media/$USERNAME | awk '{for(i=6;i<=NF;i++){printf "%s ", $i}; printf ""}' | sed 's/^[ \t]*//;s/[ \t]*$//')
		LDRIVE=$(readlink -f /home/$USERNAME/edrive)
		if [ "$LDRIVE" = "$EXTDRIVE" ]
		then
			echo "Valid link to external drive $LDRIVE found at /home/$USERNAME/edrive"
		else
			echo "NEED to modify the link from $LDRIVE to $EXTDRIVE"
			unlink /home/$USERNAME/edrive
		fi
	fi

	# create link
	if [ ! -L /home/$USERNAME/edrive ]
	then
		#ln -s /media/$USERNAME/STORE\ N\ GO/ /home/$USERNAME/edrive
		EXTDRIVE=$(df | grep /media/$USERNAME | awk '{for(i=6;i<=NF;i++){printf "%s ", $i}; printf ""}' | sed 's/^[ \t]*//;s/[ \t]*$//')
		ln -s "$EXTDRIVE" /home/$USERNAME/edrive
	fi
}

_dotidy ()
{
	LLEVEL=$1
	LPATH=$2
	echo "resetting scripts to executable $LPATH"
	if [ "$DRYRUN" = "n" ]
	then
		echo "DRY RUN level $LLEVEL on $LPATH"
		sleep 2
#		find $LPATH -type d \( -path tdcs/workspace \) -prune -name "*.sh" -print
		find $LPATH -name "*.sh" -not -path "*/tdcs/workspace/*"
	else
#		find $LPATH -name "*.sh" -exec chmod ug+x {} \;
		find $LPATH -name "*.sh" -not -path "*/tdcs/workspace/*" -exec chmod ug+x {} \;
	fi
	#echo "deleting hidden comment dirs $LPATH"
}


# ***** Mainline
trap "_cleanexit 1" 2

# define os level variables
if [ "$UNIXTYPE" = "Linux" ]
then
	echo "Setting features for $UNIXTYPE"
	VERSINF=$(rsync -V 2>/dev/null)
	if [ $? -gt 0 ] ; then VERSINF=$(rsync --version) ; fi
	VERS=$(echo "$VERSINF" | head -1 | awk '{print $3}')
	case $VERS in
		3.2.?) FEAT1="--open-noatime" ;;
		*)FEAT1="--noatime" ;; # deprecated, switch off at some point 
	esac
fi

# define available drives based on machine

INIHOST=$THISHOST
DRIVE1=$(sed -nr "/^\[${INIHOST}\]/ { :l /^DRIVE1[ ]*=/ { s/.*=[ ]*//; p; q;}; n; b l;}" syncdrives.ini)
if [ ! -n "$DRIVE1" ] ; then INIHOST="default" ; fi

CHECKLINKS=$(sed -nr "/^\[${INIHOST}\]/ { :l /^CHECKLINKS[ ]*=/ { s/.*=[ ]*//; p; q;}; n; b l;}" syncdrives.ini)

if [ "$CHECKLINKS" = "true" ] ; then _checklinks ; fi

DRIVE1=`eval echo "$(sed -nr "/^\[${INIHOST}\]/ { :l /^DRIVE1[ ]*=/ { s/.*=[ ]*//; p; q;}; n; b l;}" syncdrives.ini)"`
DRIVE2=`eval echo "$(sed -nr "/^\[${INIHOST}\]/ { :l /^DRIVE2[ ]*=/ { s/.*=[ ]*//; p; q;}; n; b l;}" syncdrives.ini)"`
DRIVE3=`eval echo "$(sed -nr "/^\[${INIHOST}\]/ { :l /^DRIVE3[ ]*=/ { s/.*=[ ]*//; p; q;}; n; b l;}" syncdrives.ini)"`
DRIVE4=`eval echo "$(sed -nr "/^\[${INIHOST}\]/ { :l /^DRIVE4[ ]*=/ { s/.*=[ ]*//; p; q;}; n; b l;}" syncdrives.ini)"`


T1=`date`
DRYRUN=n
CHECKSUMSKIP=""
GITGC=0
DIRSET="ddrive"
EXCLUDEDIRSET=""

while getopts atvhgGc12345TdD: AOPT
do
case $AOPT in
	a) DRYRUN="";;
	v) VERBOSE="-vv";;
	g) GITGC=1 ;;	# Do git garbage collection FIRST 
	G) GITGC=2 ;;	# Do git garbage collection ONLY
	T) DOTIDY=1 ;;  # Do directory tidy
	D) FEAT1="--open-noatime"
	   FEAT1="--noatime" # deprecated, switch off at somepoint
	   FEAT2="--files-from=$TF"
	   DAYSBACK=$OPTARG ;; # set number of days back to check only on files
	d) DODELETE="--delete" ;;
	c) CHECKSUMSKIP="-c" ;;	# Skip files based on checksum
	1) DIRSETLOOKUP="DIRSET_1" ;;
	2) DIRSETLOOKUP="DIRSET_2" ;;
	3) DIRSETLOOKUP="DIRSET_3" ;;
	4) DIRSETLOOKUP="DIRSET_4" ;;
	5) DIRSETLOOKUP="DIRSET_5"
	# so do we change the default from being work to something else
	   DRIVE1=$(echo $DRIVE1 | sed "s/work//")
	   DRIVE2=$(echo $DRIVE2 | sed "s/work//")
	   DRIVE3=$(echo $DRIVE3 | sed "s/work//")
	   DRIVE4=$(echo $DRIVE4 | sed "s/work//")
	   ;; # this dir is at a higher level so needed to adjust
	h) echo "usage: $0 [-a] [-c] [-g|-G] [-D daysback] [-d] [-v] [-T] [-1|-2|-3|-4|-5] source_drive [dest_drive]"
	   echo "where: source drive = local|remote|storengo|lmachine|rmachine"
	   echo "       local will use the local machine as the master and PUSH changes to the stick"
	   echo "       remote will use the stick as the master and PULL changes to the local machine"
	   echo "       lmachine will use the local machine, main drive as master and PUSH to the secondary"
	   echo "           drive on the local machine"
	   echo "       rmachine will use the local machine, secondary drive as master and PUSH to the main"
	   echo "           drive on the local machine"
	   echo "-a is actually do, otherwise dry run only"
	   echo "-v increase verbose levels"
	   echo "-D daysback will ignore the rsync checking features and use files newer than the days specified"
	   echo "-d delete files in destination which are not in source"
	   echo "-1 is to copy: only the ddrive directory, current is: $DIRSET"
	   echo "-2 is to copy: ddrive and home directories, current is: $DIRSET"
	   echo "-3 is to copy: ddrive, home and work directories (but NOT workspace), current is: $DIRSET"
	   echo "-4 is to copy: ddrive, home and work directories, current is: $DIRSET"
	   echo "-5 is to copy: only the var directory, current is: $DIRSET"
	   echo "-g is do git garbage collection FIRST"
	   echo "-G is do git garbage collection ONLY"
	   echo "-c is to skip based on checksum, not mod-time and size "
	   echo "-T is to do tidy level 1, which sets .sh to executable again"
	   echo "e.g. to only copy from c to e: $0 -a c "
	   echo " "
	   echo "To copy 2 sets of drives and reset the shell scripts do:"
	   echo "./syncdrives.sh -a -3 remote"
	   echo "chmod ug+x *.sh"
	   echo "./syncdrives.sh -a -5 remote"
	   echo "./syncdrives.sh -a -T -5 local"
	   echo "./syncdrives.sh -a -T -3 local"
	   echo " "
	   echo "NOTE: If there is an error on Linux with remote being read-only do the following:"
	   echo "  sudo dosfsck -a /dev/sdb1"
	   echo
	   echo "To make one of the drives a master, do something like:"
	   echo "  find /drives/c/ddrive/ -exec touch {} \;"
	   echo " "
	   echo "To track progress of the rsync, if it is taking a long time try the following:"
	   echo "  sudo strace -e open,openat \$(ps -o lwp= -LC rsync | sed 's/^/-p/')"
	   echo " "
	   exit 1 ;;
	*) echo "$AOPT is an invalid option"
	   exit 2 ;;
esac
done

shift $((OPTIND-1))

if [ $# -lt 1 ]
then
	echo "invalid number of parameters"
	exit 1
fi

COPYDIRECTION=$1

if [ "$1" != "local" ] && [ "$1" != "remote" ] && [ "$1" != "lmachine" ] && [ "$1" != "rmachine" ]
then
	echo "Unknown drive configuration: $1"
	exit 1
fi

if [ $# -gt 1 ]
then
	DRIVE2=$2
	echo "copying to destination of $COPYDIRECTION"
fi

# now process the DIRSET

DIRSET=`eval echo "$(sed -nr "/^\[${DIRSETLOOKUP}\]/ { :l /^DIRSET[ ]*=/ { s/.*=[ ]*//; p; q;}; n; b l;}" syncdrives.ini)"`
EXCLUDEDIR=`eval echo "$(sed -nr "/^\[${DIRSETLOOKUP}\]/ { :l /^EXCLUDEDIR[ ]*=/ { s/.*=[ ]*//; p; q;}; n; b l;}" syncdrives.ini)"`
EXCLUDEDIRSET=`eval echo "$(sed -nr "/^\[${DIRSETLOOKUP}\]/ { :l /^EXCLUDEDIRSET[ ]*=/ { s/.*=[ ]*//; p; q;}; n; b l;}" syncdrives.ini)"`

if [ "$VERBOSE" = "-vv" ]
then
	echo "DRIVE1=$DRIVE1"
	echo "DRIVE2=$DRIVE2"
	echo "DRIVE3=$DRIVE3"
	echo "DRIVE4=$DRIVE4"
	echo "DIRSET=$DIRSET"
	echo "EXCLUDEDIR=$EXCLUDEDIR"
	echo "EXCLUDEDIRSET=$EXCLUDEDIRSET"
fi


# SPECIAL FEATURE
if [ $GITGC -gt 0 ]
then
	CWD1=`pwd`
	echo "**** Git Garbage Collection"
	BASED=$DRIVE1/work/tdcs/workspace
	cd $BASED
	for ADIR in `ls -d $BASED/*`
	do
		cd $ADIR
		echo "gc on $ADIR"
		git gc
		EL=$?
		if [ $EL -ne 0 ]
		then
			echo "failed on $ADIR, trying ONE level deeper"
			for BDIR in `ls -d $ADIR/*`
			do
				cd $BDIR
				echo "gc on $BDIR"
				git gc
				cd ..
			done
		fi
		cd ..
	done
	cd $CWD1
	if [ $GITGC -gt 1 ]
	then
		exit 0
	fi
fi

# DRYRUN is set to n (for yes, dry run) to match parameter from rsync
if [ "$DRYRUN" = "n" ]
then
	echo "**** DRY RUN"
	sleep 2
fi


echo "Will be operating on: $DIRSET"
sleep 2

for adir in $DIRSET
do
	echo "checking $adir"


	if [ $DAYSBACK -gt 0 ]
	then
		# need to handle multiple dirs
		find $DRIVE1/$adir/ -not -path "*/${EXCLUDEDIR}/*" -type f -ctime -$DAYSBACK | grep -v "~$" | grep -v $TF > $TF
		echo "expecting following files"
		cat $TF
		continue
	fi

	# change from -au to -rlptDu as when setting user fails on NFS/NTFS/FAT32 it also does not set the time
	if [ "$COPYDIRECTION" = "local" ]
	then
		echo "from $DRIVE1 to $DRIVE2"
		if [ $DOTIDY -gt 0 ]
		then
			_dotidy $DOTIDY $DRIVE1/$adir/
		else
			rsync $VERBOSE -rlptDu${DRYRUN} ${MODWINDOW} ${DODELETE} ${FEAT1} ${CHECKSUMSKIP} ${EXCLUDEDIRSET} $DRIVE1/$adir/ "$DRIVE2"/$adir
		fi
	fi

	if [ "$COPYDIRECTION" = "remote" ]
	then
		echo "from $DRIVE2 to $DRIVE1"
		rsync $VERBOSE -rlptDu${DRYRUN} ${MODWINDOW} ${DODELETE} ${FEAT1} ${CHECKSUMSKIP} ${EXCLUDEDIRSET} "$DRIVE2"/$adir/ $DRIVE1/$adir
	fi

	if [ "$COPYDIRECTION" = "lmachine" ]
	then
		echo "from $DRIVE1 to $DRIVE3"
		rsync $VERBOSE -rlptDu${DRYRUN} ${MODWINDOW} ${DODELETE} ${FEAT1} ${CHECKSUMSKIP} ${EXCLUDEDIRSET} "$DRIVE1/$adir/" $DRIVE3/$adir
	fi

	if [ "$COPYDIRECTION" = "rmachine" ]
	then
		echo "from $DRIVE3 to $DRIVE1"
		rsync $VERBOSE -rlptDu${DRYRUN} ${MODWINDOW} ${DODELETE} ${FEAT1} ${CHECKSUMSKIP} ${EXCLUDEDIRSET} "$DRIVE3/$adir/" $DRIVE1/$adir
	fi
done
T2=`date`

echo "Finished ... started $T1 completed $T2"
_cleanexit