# Author: Theodore Zacharia
# V0.1: 05/01/2024 - Initial Release
# V0.2: 15/01/2024 - Add -i flag so ability to handle many urls as input
# V0.3: 25/02/2025 - Add extraction with ts based on src rathe rthan html links
#
# This script provides a utility to download .ts type files in an m3u8 collection via the page url
#

# *** Globals
TRACE=0
MYARG=""
ADD_NAME=""
ENDPOINTLIST_FILE=""
T1=$$.tmp
I1=$$.list.tmp

# *** Functions
_usage()
{
	echo "usage: $0 [-h] [-t] [-n additional_name] [-i endpointlistfile] positional_params"
	echo "where"
	cat <<- _EOF_
	  -h        Displays this help message
	  -t        Set trace on
	  -n		add a string to the begining of the filename
	  -i		input file with list of endpoints
_EOF_
}


# *** Mainline

# process input parameters
while getopts thn:i: AOPT
do
case $AOPT in
	t) TRACE=1 ;; # set TRACE mode
	n) ADD_NAME=$OPTARG ;;
	i) ENDPOINTLIST_FILE=$OPTARG ;;
	h) _usage
	   exit 1 ;;
	*) echo "$AOPT is an invalid option" >&2
	   exit 2 ;;
esac
done

shift $((OPTIND-1))

if [ $TRACE -gt 0 ]
then
	echo "Starting at $(date)"
	echo "TRACE=$TRACE" >&2
	echo "MYARG=$ADD_NAME" >&2
	echo "positional_args=$@" >&2
fi

if [ ! -n "$ENDPOINTLIST_FILE" ] && [ $# -lt 1 ]
then
	_usage
	exit 1
fi

if [ -n "$ENDPOINTLIST_FILE" ]
then
	cat $ENDPOINTLIST_FILE > $I1
else
	echo $@ > $I1
fi

while read PAGE_URL
do
	PAGE_URL=$(echo "$PAGE_URL" | sed 's#/$##')
	echo "PAGE_URL=$PAGE_URL"
	PAGE_NAME=${PAGE_URL##*/}
	echo "PAGE_NAME=$PAGE_NAME"

	wget -T 5 -t 2 -O $T1 $PAGE_URL

	grep -i m3u8 $T1 > $T1.1

	# there are different syntaxes to try

	cat $T1.1 | cut -d '(' -f2 | cut -d ')' -f1 | tr -d "'" > $T1.2

	# NOTE: There may actually be multiple finds of the m3u8, in which case this needs to be in a loop
	if [ $TRACE -gt 0 ]
	then
		echo "trying 1....."
		cat $T1.2
	fi

	# is there an href
	CT1=$(grep -c "href=" $T1.2)
	if [ $CT1 -gt 0 ]
	then
		cat $T1.2 | sed -r 's/.*href="([^"]+).*/\1/g' > $T1.3
		mv $T1.3 $T1.2
		# NOTE: There may actually be multiple finds of the m3u8, in which case this needs to be in a loop
		if [ $TRACE -gt 0 ]
		then
			echo "trying 2....."
			cat $T1.2
		fi
	fi

	# is there a src
	CT1=$(grep -c "src=" $T1.2)
	if [ $CT1 -gt 0 ]
	then
		cat $T1.2 | grep -o 'src="[^"]*"' |  sed 's/src="//; s/"//' > $T1.3
		mv $T1.3 $T1.2
		if [ $TRACE -gt 0 ]
		then
			echo "trying 3....."
			cat $T1.2
		fi
	fi

	if [ -n "$ADD_NAME" ]
	then
		SAVE_NAME="${ADD_NAME}_$PAGE_NAME"
	else
		SAVE_NAME=$PAGE_NAME
	fi

	M3U8_URL=$(cat $T1.2)


	if [ $TRACE -gt 0 ]
	then
		echo "URL is:"
		echo -n $M3U8_URL | od -c
	fi

	echo "downloading from $M3U8_URL"
	if [ $TRACE -gt 0 ]
	then
		echo "saving to $SAVE_NAME"
	fi

	if [ $TRACE -gt 0 ]
	then
		echo "ffmpeg -y -loglevel verbose -i "$M3U8_URL" -c copy -f mpegts ${SAVE_NAME}.mp4"
	fi

	ffmpeg -y -loglevel verbose -i $M3U8_URL -c copy -f mpegts ${SAVE_NAME}.mp4

	rm -f $T1 $T1.1 $T1.2
done<$I1


 rm -f $I1 $T1 $T1.1 $T1.2