#!/bin/bash
# Author: Theodore Zacharia
# V1.1 - 07/03/2019 - add more levels of copying
# V1.5 - 11/03/2019 - update with copy direction for more flexibility
# V1.6 - 02/05/2019 - update to support checksum vs date skipping of files, plus
#                     switch garbage collection from -c to -g flag
# V1.7 - 24/05/2019 - add tidying up features
# V1.8 - 09/04/2020 - add -D flag for days back, for speed
# V1.9 - 11/09/2020 - use config file to define drives
# V2.0 - 10/09/2022 - --noatime is deprecated, move to --open-noatime (need to find switch solution)
# V2.1 - 22/05/2024 - Added some help text to track rsync when doing lots of work
# V3.0 - 21/10/2025 - Change core concept to support local,stick,lan and anything as identifiers and specify
#                     source and destination so removing copydirection and replacing with
#                     copysource and copydest and extend number of dirsets
#
# synchronise two drives based on some set of directories
# NOTE: If there is an error on Linux with remote being read-only do the following:
# sudo dosfsck -a /dev/sdb1
#
# -a for actual run
# -1,-2,-3,-4,-5 for level of directories to cover
# l to use local (e.g. c) as master and ONLY from c to e
# r to use remote (e.g. e as master) and ONLY from e to c
# otherwise uses both drives
#
# The script will autmatically create a link in the users home called edrive, this will
# be targetted at the first USB storage device found, which will then be used to store data
#
# Originally needed to "ln -s /media/$USERNAME/STORE\ N\ GO/ ~/storengo" on Linux
#
# Add option to clean the GIT directories before copying
#
# Strategies for speedy copying:
# If remote device/drive handles timestamps differently to local it is possible the time
# based check will not be as accurate as the checksum based check, you could end up copying
# too many files.
# Use -c if your local device is fast and the remote is slow and you are using local as master
# the -c option is slower, but not as slow as copying too many files to a slower device
#

# ***** Globals
UNIXTYPE=`uname`
THISHOST=`hostname`
USERNAME=`whoami`
INI_FILE="syncdrives.ini"
DOTIDY=0
FEAT1=""
FEAT2=""
DAYSBACK=0
DODELETE=""
TF=$$.list
MODWINDOW="--modify-window=5"
VERBOSE="-v"
DEFAULTDEVICE=""
NEEDSUDO=0

# ***** Functions
_cleanexit ()
{
	if [ $# -gt 0 ]
	then
		echo "exit trapped, cleaning up first"
	fi

	rm -f $TF

	if [ $# -gt 0 ]
	then
		exit 1
	fi
}

_checklinks ()
{
	# only need to do this for stick mode
	if [ "${COPYDEST}" = "stick" ]
	then
		:	# need to check so fall thru
	else
		return 0
	fi

	# check for existing link for edrive to external media
	if [ -L /home/$USERNAME/edrive ]
	then
		EXTDRIVE=$(df | grep /media/$USERNAME | awk '{for(i=6;i<=NF;i++){printf "%s ", $i}; printf ""}' | sed 's/^[ \t]*//;s/[ \t]*$//')
		LDRIVE=$(readlink -f /home/$USERNAME/edrive)
		if [ "$LDRIVE" = "$EXTDRIVE" ]
		then
			echo "Valid link to external drive $LDRIVE found at /home/$USERNAME/edrive"
		else
			echo "NEED to modify the link from $LDRIVE to $EXTDRIVE"
			unlink /home/$USERNAME/edrive
		fi
	fi

	# create link
	if [ ! -L /home/$USERNAME/edrive ]
	then
		#ln -s /media/$USERNAME/STORE\ N\ GO/ /home/$USERNAME/edrive
		EXTDRIVE=$(df | grep /media/$USERNAME | awk '{for(i=6;i<=NF;i++){printf "%s ", $i}; printf ""}' | sed 's/^[ \t]*//;s/[ \t]*$//')
		ln -s "$EXTDRIVE" /home/$USERNAME/edrive
	fi
}

_dotidy ()
{
	LLEVEL=$1
	LPATH=$2
	echo "resetting scripts to executable $LPATH"
	if [ "$DRYRUN" = "n" ]
	then
		echo "DRY RUN level $LLEVEL on $LPATH"
		sleep 2
#		find $LPATH -type d \( -path tdcs/workspace \) -prune -name "*.sh" -print
		find $LPATH -name "*.sh" -not -path "*/tdcs/workspace/*"
	else
#		find $LPATH -name "*.sh" -exec chmod ug+x {} \;
		find $LPATH -name "*.sh" -not -path "*/tdcs/workspace/*" -exec chmod ug+x {} \;
	fi
	#echo "deleting hidden comment dirs $LPATH"
}

_cleangit()
{
	CWD1=`pwd`
	echo "**** Git Garbage Collection"
	BASED=${PATH_SRC}/work/tdcs/workspace
	cd $BASED
	for ADIR in `ls -d $BASED/*`
	do
		cd $ADIR
		echo "gc on $ADIR"
		git gc
		EL=$?
		if [ $EL -ne 0 ]
		then
			echo "failed on $ADIR, trying ONE level deeper"
			for BDIR in `ls -d $ADIR/*`
			do
				cd $BDIR
				echo "gc on $BDIR"
				git gc
				cd ..
			done
		fi
		cd ..
	done
	cd $CWD1
	if [ $GITGC -gt 1 ]
	then
		exit 0
	fi
}


# ***** Mainline
trap "_cleanexit 1" 2

# define os level variables
if [ "$UNIXTYPE" = "Linux" ]
then
	echo "Setting features for $UNIXTYPE"
	VERSINF=$(rsync -V 2>/dev/null)
	if [ $? -gt 0 ] ; then VERSINF=$(rsync --version) ; fi
	VERS=$(echo "$VERSINF" | head -1 | awk '{print $3}')
	case $VERS in
		3.2.?) FEAT1="--open-noatime" ;;
		*)FEAT1="--noatime" ;; # deprecated, switch off at some point 
	esac
fi

# define available drives based on machine

INIHOST=$THISHOST
DEFAULTDEVICE=$(sed -nr "/^\[${INIHOST}\]/ { :l /^DEFAULTDEVICE[ ]*=/ { s/.*=[ ]*//; p; q;}; n; b l;}" $INI_FILE)

# get a drive list dynamically
#awk -v section="$INIHOST" ' $0 ~ "\\[" section "\\]" { in_section=1; next } /^\[/{ in_section=0 } in_section && /^DRIVE_/ { split($0, a, "="); print a[1] } ' $INI_FILE

# Extract DRIVE_ lines from the given section
eval "$(
  awk -v section="$INIHOST" '
    $0 ~ "\\[" section "\\]" { in_section=1; next }
    /^\[/{ in_section=0 }
    in_section && /^DRIVE_/ {
        # remove optional quotes around values
        sub(/^"/, "", $2); sub(/"$/, "", $2)
        # print as shell assignment
        print $1
    }
  ' OFS="=" "$INI_FILE"
)"

#DRIVE_local=`eval echo "$(sed -nr "/^\[${INIHOST}\]/ { :l /^DRIVE_local[ ]*=/ { s/.*=[ ]*//; p; q;}; n; b l;}" $INI_FILE)"`
#DRIVE_stick=`eval echo "$(sed -nr "/^\[${INIHOST}\]/ { :l /^DRIVE_stick[ ]*=/ { s/.*=[ ]*//; p; q;}; n; b l;}" $INI_FILE)"`
#DRIVE_lan=`eval echo "$(sed -nr "/^\[${INIHOST}\]/ { :l /^DRIVE_lan[ ]*=/ { s/.*=[ ]*//; p; q;}; n; b l;}" $INI_FILE)"`
#DRIVE_lwork=`eval echo "$(sed -nr "/^\[${INIHOST}\]/ { :l /^DRIVE_lwork[ ]*=/ { s/.*=[ ]*//; p; q;}; n; b l;}" $INI_FILE)"`

CHECKLINKS=$(sed -nr "/^\[${INIHOST}\]/ { :l /^CHECKLINKS[ ]*=/ { s/.*=[ ]*//; p; q;}; n; b l;}" $INI_FILE)

T1=`date`
DRYRUN=n
CHECKSUMSKIP=""
GITGC=0
DIRSET="ddrive"
EXCLUDEDIRSET=""

while getopts atvhgGcs12345TdD: AOPT
do
case $AOPT in
	a) DRYRUN="";;
	v) VERBOSE="-vv";;
	g) GITGC=1 ;;	# Do git garbage collection FIRST 
	G) GITGC=2 ;;	# Do git garbage collection ONLY
	T) DOTIDY=1 ;;  # Do directory tidy
	D) FEAT1="--open-noatime"
	   FEAT1="--noatime" # deprecated, switch off at somepoint
	   FEAT2="--files-from=$TF"
	   DAYSBACK=$OPTARG ;; # set number of days back to check only on files
	d) DODELETE="--delete" ;;
	c) CHECKSUMSKIP="-c" ;;	# Skip files based on checksum
	s) NEEDSUDO=1 ;;
	1|2|3|4|5|6|7|8|9) DIRSETLOOKUP="DIRSET_$AOPT" ;;
	h) echo "usage: $0 [-a] [-c] [-s] [-d] [-D daysback] [-v] [-g|-G] [-T] <-1|-2|-3|...|-9> source_drive [dest_drive]"
	   echo "where: source drive where the files will come from"
	   echo "       dest_driver where the files will go to (there is a default which can be defined in the ini file)"
	   echo "                  both the above are defined in the ini file, e.g. DEST_local=mylocal, DRIVE_stick=/mnt/storengo"
	   echo "-a is actually do, otherwise dry run only"
	   echo "-c skip files based on checksum, not mod-time and size "
	   echo "-s request sudo rights for copy (need for some devices/lan)"
	   echo "-d delete files in destination which are not in source"
	   echo "-D daysback will ignore the rsync checking features and use files newer than the days specified"
	   echo "-v increase verbose levels"
	   echo "-g is do git garbage collection FIRST"
	   echo "-G is do git garbage collection ONLY"
	   echo "-T is to do tidy level 1, which sets .sh to executable again"
	   echo "-1...-9 is the dirset to copy, current is: $DIRSET (you MUST select one dirset to actual do anything)"
	   echo "e.g. To only copy from c to e: $0 -a -1 local"
	   echo "Copy different directories: $0 -a -3 local stick"
	   echo "set execute rights of scripts: chmod ug+x *.sh"
	   echo " "
	   echo "./syncdrives.sh -a -5 lan local"
	   echo "./syncdrives.sh -a -T -5 local stick"
	   echo "./syncdrives.sh -a -T -3 local"
	   echo "./syncdrives2.sh -a -1 -s local lan"
	   echo " "
	   echo "NOTE: If there is an error on Linux with remote being read-only do the following:"
	   echo "  sudo dosfsck -a /dev/sdb1"
	   echo
	   echo "To make one of the drives a master, do something like:"
	   echo "  find /drives/c/ddrive/ -exec touch {} \;"
	   echo " "
	   echo "To track progress of the rsync, if it is taking a long time try the following:"
	   echo "  sudo strace -e open,openat \$(ps -o lwp= -LC rsync | sed 's/^/-p/')"
	   echo " "
	   exit 1 ;;
	*) echo "$AOPT is an invalid option"
	   exit 2 ;;
esac
done

shift $((OPTIND-1))

if [ $# -lt 1 ]
then
	echo "invalid number of parameters"
	exit 1
fi

COPYSRC=$1

if [ $# -gt 1 ]
then
	COPYDEST=$2
else
	if [ ! -n "$DEFAULTDEVICE" ]
	then
		echo "There is no destination or DEFAULTDEVICE specified"
		exit 9
	fi
	COPYDEST=$DEFAULTDEVICE
fi

if [ "$CHECKLINKS" = "true" ] ; then _checklinks ; fi

# now process the DIRSET

DIRSET=`eval echo "$(sed -nr "/^\[${DIRSETLOOKUP}\]/ { :l /^DIRSET[ ]*=/ { s/.*=[ ]*//; p; q;}; n; b l;}" $INI_FILE)"`
EXCLUDEDIR=`eval echo "$(sed -nr "/^\[${DIRSETLOOKUP}\]/ { :l /^EXCLUDEDIR[ ]*=/ { s/.*=[ ]*//; p; q;}; n; b l;}" $INI_FILE)"`
EXCLUDEDIRSET=`eval echo "$(sed -nr "/^\[${DIRSETLOOKUP}\]/ { :l /^EXCLUDEDIRSET[ ]*=/ { s/.*=[ ]*//; p; q;}; n; b l;}" $INI_FILE)"`

# final calculation of src and dest
DRIVE_from="DRIVE_${COPYSRC}"
DRIVE_to="DRIVE_${COPYDEST}"

eval PATH_SRC=\$${DRIVE_from}
eval PATH_DEST=\$${DRIVE_to}

if [ "$VERBOSE" = "-vv" ]
then
	echo "copying from source of $COPYSRC"
	echo "copying to destination of $COPYDEST"
	echo "THISHOST=$INIHOST"
	echo "DEFAULTDEVICE=$DEFAULTDEVICE"
	echo "DIRSET=$DIRSET"
	echo "DRIVE_local=$DRIVE_local"
	echo "DRIVE_stick=$DRIVE_stick"
	echo "DRIVE_lan=$DRIVE_lan"
	echo "DRIVE_lwork=$DRIVE_lwork"
	echo "EXCLUDEDIR=$EXCLUDEDIR"
	echo "EXCLUDEDIRSET=$EXCLUDEDIRSET"
	echo "DRIVE_from=$DRIVE_from"
	echo "DRIVE_to=$DRIVE_to"
	echo "PATH_SRC=$PATH_SRC"
	echo "PATH_DEST=$PATH_DEST"
fi


if [ ! -n "$PATH_SRC" ] || [ ! -n "$PATH_DEST" ]
then
	echo "Missing from set of: PATH_SRC ($PATH_SRC) , PATH_DEST ($PATH_DEST)"
	_cleanexit
	exit 2
fi

# SPECIAL FEATURE
if [ $GITGC -gt 0 ]
then
	_cleangit
fi

# DRYRUN is set to n (for yes, dry run) to match parameter from rsync
if [ "$DRYRUN" = "n" ]
then
	echo "**** DRY RUN"
	sleep 2
fi

echo "Will be operating on: $DIRSET"
sleep 2

for adir in $DIRSET
do
	echo "checking $adir"

	if [ $DAYSBACK -gt 0 ]
	then
		# need to handle multiple dirs
		find $PATH_SRC/$adir/ -not -path "*/${EXCLUDEDIR}/*" -type f -ctime -$DAYSBACK | grep -v "~$" | grep -v $TF > $TF
		echo "expecting following files, this is info only, NOT actually copying ---------"
		cat $TF
		continue
	fi

	# change from -au to -rlptDu as when setting user fails on NFS/NTFS/FAT32 it also does not set the time
	echo "from $PATH_SRC to $PATH_DEST"
	if [ $DOTIDY -gt 0 ]
	then
		_dotidy $DOTIDY ${PATH_SRC}/$adir/
	else
		if [ $NEEDSUDO -eq 1 ]
		then
			sudo rsync $VERBOSE -rlptDu${DRYRUN} ${MODWINDOW} ${DODELETE} ${FEAT1} ${CHECKSUMSKIP} ${EXCLUDEDIRSET} ${PATH_SRC}/$adir/ "${PATH_DEST}"/$adir
		else
			rsync $VERBOSE -rlptDu${DRYRUN} ${MODWINDOW} ${DODELETE} ${FEAT1} ${CHECKSUMSKIP} ${EXCLUDEDIRSET} ${PATH_SRC}/$adir/ "${PATH_DEST}"/$adir
		fi
	fi

done
T2=`date`

echo "Finished ... started $T1 completed $T2"
_cleanexit