bash-common/chertex.sh

#!/usr/bin/env bash
## Process *.Rnw files
## Written May 14, 2010
## Taha Ahmed

####################################################
# For now, MAKE SURE that the argument consists of
# a complete filename, with extension, and
# in the directory of the Rnw file
####################################################

# keep track of runtime of entire script
starttime=$(date +%s)

clear
echo "-----------------------------------------------------------------------"
echo "cheRTeX -- a script for processing R--Sweave/knitr--LaTeX/TikZ projects"
echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
echo "MMXVI -- taha@chepec.se -- CHEPEC doctoral degree project"
echo "-----------------------------------------------------------------------"

## If the file .latexmkrc exists in the current directory,
## set the flag ltxmkrc=TRUE
ltxmkrc=false
echo "--- Looking for .latexmkrc"
if [ -e .latexmkrc ]; then
  ltxmkrc=true
   echo "+++ LaTeXMK RC file invoked"
else
   echo "--- This job did not request LaTeXMK RC file"
fi

# define some constants
path_wd=${PWD}
dir_wd=$(basename $path_wd)
path_thesis="/media/bay/taha/chepec/thesis"

# define some file types
Rfiletype="R"
TeXfiletype="tex"
tikzfiles="*.tikz"
Rnwfiles="*.Rnw"
# aux files
aux_acn="*.acn"
aux_acr="*.acr"
aux_alg="*.alg"  # glossaries
aux_aux="*.aux"
aux_bbl="*.bbl"  # bibliography
aux_blg="*.blg"  # bibliography
aux_dep="*.dep"
aux_dpth="*.dpth"
aux_fdb="*.fdb*"
aux_fig="*.figlist"
aux_fls="*.fls"
aux_glg="*.glg"
aux_glo="*.glo"  # glossaries
aux_gls="*.gls*" # glossaries and bib2gls
aux_ist="*.ist"  # glossaries (makeindex style file)
aux_lob="*.lob"
aux_lof="*.lof"
aux_log="*.log"
aux_lor="*.lor"
aux_los="*.los"
aux_lot="*.lot"
aux_out="*.out"
aux_lox="*.lox"
aux_make="*.makefile"
aux_map="*.map"
aux_run="*.run*"
aux_slg="*.slg"
aux_slo="*.slo"
aux_sls="*.sls"
aux_tikz="*-tikzDictionary"
aux_tdo="*.tdo"
aux_toc="*.toc"
aux_xdy="*.xdy"  # glossaries (xindy)
# all aux files in a string
auxfiles="${aux_acn} ${aux_acr} ${aux_alg} ${aux_aux} ${aux_bbl} ${aux_blg} ${aux_dep} ${aux_dpth} ${aux_fdb} ${aux_fig} ${aux_fls} ${aux_glg} ${aux_glo} ${aux_gls} ${aux_ist} ${aux_lob} ${aux_lof} ${aux_log} ${aux_lor} ${aux_los} ${aux_lot} ${aux_out} ${aux_lox} ${aux_make} ${aux_map} ${aux_run} ${aux_slg} ${aux_slo} ${aux_sls} ${aux_tikz} ${aux_tdo} ${aux_toc} ${aux_xdy}"


# depending on number of args and their content, do different things...
if [ $# -eq 1 ]; then
	# Check if the argument contains a filetype
	# (assumes a complete filename was passed)
	jobfilename=$1
	jobfiletype=${jobfilename#*.} # File extension
	jobname=${jobfilename%.*} # Filename without extension part

	echo "<cheRTeX> Detected filename: " $jobname
	echo "<cheRTeX> Detected extension:" $jobfiletype

	# Verify that the file extension is "[Rr][Nn][Ww]"
	if [[ $jobfiletype == "Rnw" || $jobfiletype == "rnw" || $jobfiletype == "RNW" ]]; then
		# File extension is indeed "[Rr][Nn][Ww]"
		echo "<cheRTeX> Detected *.Rnw extension"
		jobfiletype="Rnw"
	else
		# File extension is NOT "[Rr][Nn][Ww]"
		echo "<cheRTeX> This script only handles *.Rnw files"
		echo "<cheRTeX> Terminating..."
		exit 1
	fi

	# Introducing a short delay to enable on-screen reading of previous echo
	simpledelay.sh 2
	# no need to echo it
	# echo "Delay completed"


	#### Special treatment for sample-matrix.Rnw
   # If $jobname is sample-matrix, restart Shiny and term this script
   if [[ $jobname == "sample-matrix" ]]; then
	   echo "<sample-matrix> -------------------------------"
      echo "<sample-matrix> Restarting Shiny"
      echo "<sample-matrix> -------------------------------"

      # kill Shiny
      pkill -f "shiny::runApp"
      # start Shiny
      bash -c "/media/bay/taha/chepec/chetex/common/bash/shiny-matrix.sh" &
      # terminate this script
      echo "<cheRTeX> Terminating..."
      simpledelay.sh 2
   	exit 0
   fi

	#### Special treatment for thesis
   if [[ $path_wd == "$path_thesis" && $jobname == "$dir_wd" ]]; then
      # Fetch external assets by reading any assets.external files in assets/ tree
      # NOTE: be careful NOT to leave empty lines in your assets.external files
      echo "<thesis> -------------------------------"
      echo "<thesis> Getting external assets"
      echo "<thesis> -------------------------------"
      # find all files named "assets.external" in the assets/ tree
      assetsexternalfiles=$(find "$path_wd/assets/" -type f -name "assets.external")
      for externalfilepath in $assetsexternalfiles; do
         # this weird-looking while-loop reads the assets.external file line-by-line and copies each asset into the target path inside the thesis' assets/ tree
         # https://stackoverflow.com/questions/10929453/read-a-file-line-by-line-assigning-the-value-to-a-variable
         while IFS='' read -r asset || [[ -n "$asset" ]]; do
            assetpathtarget=$(dirname "$externalfilepath")
            echo "<thesis> Copying $asset to $assetpathtarget"
            # cp but don't overwrite existing files
            cp --preserve=timestamps --no-clobber $asset $assetpathtarget
            # except we want to overwrite the BibTeX library files (inside the assets/references/ directory), we'll do that by checking the target dirname and only running the destructive cp operation if its "references"
            assetdirnametarget=$(basename "$assetpathtarget")
            if [[ $assetdirnametarget == "references" ]]; then
               echo "<thesis> Overwriting BibTeX libraries in assets/references/"
               cp --preserve=timestamps $asset $assetpathtarget
            fi
         done < "$externalfilepath"
      done

      # Create low-res photos on-the-fly from existing photos/
      echo "<thesis> -------------------------------"
      echo "<thesis> Create low-res photos tree"
      echo "<thesis> -------------------------------"
      # copy existing photos to assets/photos/.lowres/ path
      # to save time, rsync only if highres photo has more recent timestamp (otherwise, keep lowres photo without overwriting)
      # Note: rsync usually looks at file timestamp and size, and if either has changed, copies the file (simplified explanation)
      # in this case, I'd like rsync to only compare timestamps and disregard size
      # rsync can't do that. We need to use a different tool. See e.g.
      # https://superuser.com/questions/260092/rsync-switch-to-only-compare-timestamps
      # copy only the "large" photos that have file modtimes more recent than the last time this operation was run
      photoslastrun="$path_wd/assets/photos/.lowres/lastrun"
      if [ ! -f "$photoslastrun" ]; then
         # if, for some reason, the lastrun file does not exist
         # copy over everything and then create the file
         rsync -av "$path_wd/assets/photos/*" "$path_wd/assets/photos/.lowres/" --exclude "$path_wd/assets/photos/.lowres/"
         touch "$photoslastrun"
      fi
      # cd and use --parents arg to preserve directory structure in .lowres target
      cd "$path_wd/assets/photos"
      newphotos="$(find . -type f -cnewer $photoslastrun ! -path './.lowres/*')"
      if [ -n "$newphotos" ]; then
         for newphoto in $newphotos; do
            cp --parents $newphoto .lowres/
         done
      fi
      # revert the effects of cd above. Redirect to null suppresses the output.
      cd - >/dev/null
      # in the low-res tree, find any photo larger than specific size (500kB)
      largephotos="$(find $path_wd/assets/photos/.lowres/ -size +500k)"
      for largephotofilename in $largephotos; do
         # for the next statement to work reliably, we should probably convert other formats to JPEG
         # detect file extension, and based on it, convert to jpg using mogrify
         largephotobase=$(basename -- "$largephotofilename") # just the filename (with extension, sans parents)
         largephototype=${largephotobase#*.} # file extension only
         largephotoname=${largephotofilename%.*} # path without extension
         largephotobasename=${largephotobase%.*} # basename without extension
         # some filetypes don't fare well when converted to jpeg, so we will only run the 
         # forced conversion and shrinking unless the file extension is one of the following:
         # "" (empty string, ie no file extension)
         # SVG
         # PDF
         if [ ! "$largephototype" == "" ] || [ ! "$largephototype" == "svg" ] || [ ! "$largephototype" == "SVG" ] || [ ! "$largephototype" == "pdf" ] || [ ! "$largephototype" == "PDF" ]; then
            # if the photo is not already jpeg, convert it to jpeg
            if [ ! "$largephototype" == "jpg" ] && [ ! "$largephototype" == "jpeg" ] && [ ! "$largephototype" == "JPG" ] && [ ! "$largephototype" == "JPEG" ]; then
               echo "<thesis> Converting $largephotobase to JPG format"
               mogrify -format jpg $largephotofilename
               # remove the now unnecessary non-jpg file from .lowres/
               rm "$largephotofilename"
            fi
            # convert photo in-place (overwrite) with new one roughly 300kb in size
            # https://stackoverflow.com/questions/6917219/imagemagick-scale-jpeg-image-with-a-maximum-file-size
            echo "<thesis> Shrinking $largephotobasename.jpg"
            convert $largephotoname.jpg -define jpeg:extent=300kb $largephotoname.jpg
         fi
      done
      # update the modification and access time on the photosastrun file
      touch "$photoslastrun"

   fi
   # short delay to enable on-screen reading of previous echo
	simpledelay.sh 2


   ## Handle knitr or pgfSweave jobs (each requires separate treatment)
   ## But how should we tell the difference between them?
   ## There is no obvious way to tell the difference (apart from reading the *.Rnw file)
   ## IN ALL KNITR DIRECTORIES, CREATE A FILE NAMED: .knitme
   # If the file .knitme exists in the current directory,
   # run knitr commands, otherwise run pgfsweave commands
   echo "--- Looking for .knitme"
   if [ -e .knitme ]; then
      # Run knitr commands for this job
      echo "<cheRTeX> -----------------------"
      echo "<cheRTeX> This is a job for knitr"
      echo "<cheRTeX> -----------------------"

      # Knit
	   echo "<cheRTeX> Knitting..."
      Rscript -e "library(knitr); library(methods); knit('$jobname.$jobfiletype')"

      # Introduce delay to give time to read Rscript exit status
   	echo "<cheRTeX> -----------------------"
   	echo "<cheRTeX> Rscript knitr completed"
   	echo "<cheRTeX> -----------------------"
   	simpledelay.sh 2
   else
      # Run pgfSweave commands
      echo "<cheRTeX> ---------------------------"
      echo "<cheRTeX> This is a job for pgfSweave"
      echo "<cheRTeX> ---------------------------"

      # Tangle
	   echo "<cheRTeX> Tangling..."
   	R CMD Stangle $jobname.$jobfiletype
	   # Weave
	   echo "<cheRTeX> Weaving..."
   	R CMD pgfsweave --graphics-only $jobname.$jobfiletype

	   # Introduce delay to give time to read R CMD exit status
   	echo "<cheRTeX> -------------------------"
   	echo "<cheRTeX> R CMD pgfsweave completed"
   	echo "<cheRTeX> -------------------------"
   	simpledelay.sh 2
   fi

	# Run vc script if vc exists in working directory
	echo "<cheRTeX> Running vc script"
	if [ -f vc ]; then
      ./vc
   fi

	# Run pdflatex, bibtex, and company
   if $ltxmkrc; then
      echo "<cheRTeX> Calling LaTeXMK with RC file"
      simpledelay.sh 2
      latexmk -r .latexmkrc -pdf -bibtex $jobname
   else
	   echo "<cheRTeX> Calling LaTeXMK"
      simpledelay.sh 2
	   latexmk -pdf -bibtex $jobname
   fi

else
	# Either no arguments, or more than one argument
	if [ $# -eq 0 ]; then
		# Zero arguments. Present a menu of choices
		echo "This is cheRTeX POST-PROCESSING" # only one choice for now
		echo "<1> 'pdf-all'  -- Process all .tikz files to pdf graphics"
		echo "<2> 'clean-up' -- Remove all auxiliary files"
		echo "<3> 'wipe-dir' -- Remove all non-essential files and subdirectories"
		echo "Any other input exits the program"
		read usrchoice

		## Determine number of .Rnw files in current directory
		#Rnwfileno=$(ls -1 $Rnwfiles | wc -l)
		#echo "No of .Rnw files: $Rnwfileno"
		#
		## Check if number of .Rnw files larger than one
		#if [ $Rnwfileno -gt 1 ]; then
		#	# If larger than one, ask for user input
		#	# Indicates more than one Rnw file in current directory.
		#	# This introduces a naming ambiguity.
		#	# Resolve by asking user for current jobname
		#	echo "Found $Rnwfileno .Rnw files in current directory"
		#	echo "Please specify the jobname"
		#	read jobname
		#	if [ -z "$jobname" ]; then
		#		# string is null
		#		echo "Specified jobname cannot be parsed. Terminating..."
		#		exit 1
		#	fi
		#else
		## There is exactly one *.Rnw file is current directory
		## Fetch the jobname from the .Rnw filename by stripping off the file extension
		#	Rnwfilename=$(ls -1 $Rnwfiles)
		#	jobname=${Rnwfilename%.*}
		#fi
		#
		#echo "Jobname set to: $jobname"

		if [[ $usrchoice == "pdf-all" || $usrchoice == "1" ]]; then
			echo "<1> 'pdf-all' chosen"
			# This for loop ONLY USED to determine number of *.tikz files in directory
			for tikzfiles in "$tikzfiles"; do tikzfilenumber=${#tikzfiles}; done
			echo "cheRTeX detected $tikzfilenumber TikZ files for processing"
			echo "Starting TikZ file processing..."
			simpledelay.sh 2

			for tikzfilename in $tikzfiles; do
				# Call tikz2pdf
				echo "<Executing> tikz2pdf $tikzfilename"
				tikz2pdf --once $tikzfilename
			done
			echo "Completed TikZ file processing"
		fi

      if [[ $usrchoice == "clean-up" || $usrchoice == "2" ]]; then
         echo "<2> 'clean-up' chosen"
         rm $auxfiles
         # Still, a rather crude way of cleaning up...
      fi

      if [[ $usrchoice == "wipe-dir" || $usrchoice == "3" ]]; then
         echo "<3> 'wipe-dir' chosen"
         ## Remove all but non-essential files
         # get the name of the current directory
         currdirname=${PWD##*/}
         # get a timestamp
         timestamp=$(date +%s)
         # create a unique tmp-dir name
         tmpdirname="${timestamp}-${currdirname}"
         # make a directory in chepec/tmp with the name of the current dir
         mkdir /media/bay/taha/chepec/tmp/$tmpdirname
         # Copy the contents of the current directory to the tmp/$currdirname directory
         cp * -R /media/bay/taha/chepec/tmp/$tmpdirname
         # Empty the current directory of all contents
         rm * -R # note: hidden files and subdir unaffected
         # Return the stuff we want to keep after wipe-dir
         # (we are of course assuming that the following 4 file(type)s always exist)
         # (if in fact they do not exist, use conditional statements instead (see below)
         cp /media/bay/taha/chepec/tmp/$tmpdirname/*.Rnw .
         cp /media/bay/taha/chepec/tmp/$tmpdirname/vc .
         cp /media/bay/taha/chepec/tmp/$tmpdirname/vc.tex .
         cp /media/bay/taha/chepec/tmp/$tmpdirname/vc-git.awk .
         ## Return stuff that may not always exist (check first...)
         ## The use of conditionals is mainly to avoid annoying "file does not exist" messages...
         # Return *.Rproj file (removal is unnecessary and makes RStudio less useful)
         Rprojfiles=`ls -1 /media/bay/taha/chepec/tmp/$tmpdirname/*.Rproj 2>/dev/null | wc -l`
         if [ $Rprojfiles != 0 ]; then
            cp /media/bay/taha/chepec/tmp/$tmpdirname/*.Rproj .
         fi
         # Return *.rda files (considering peak-data files, which "cost" a lot to create)
         rdafiles=`ls -1 /media/bay/taha/chepec/tmp/$tmpdirname/*.rda 2>/dev/null | wc -l`
         if [ $rdafiles != 0 ]; then
            cp /media/bay/taha/chepec/tmp/$tmpdirname/*.rda .
         fi
         # Return *.Rmd files (R markdown source files)
         Rmdfiles=`ls -1 /media/bay/taha/chepec/tmp/$tmpdirname/*.Rmd 2>/dev/null | wc -l`
         if [ $Rmdfiles != 0 ]; then
            cp /media/bay/taha/chepec/tmp/$tmpdirname/*.Rmd .
         fi
         # Return *.css files (css files) [for sample-matrix]
         cssfiles=`ls -1 /media/bay/taha/chepec/tmp/$tmpdirname/*.css 2>/dev/null | wc -l`
         if [ $cssfiles != 0 ]; then
            cp /media/bay/taha/chepec/tmp/$tmpdirname/*.css .
         fi
         # Return .knitme file [empty file used to indicate knitr jobs]
         knitmefile=`ls -1 /media/bay/taha/chepec/tmp/$tmpdirname/.knitme 2>/dev/null | wc -l`
         if [ $knitmefile != 0 ]; then
            cp /media/bay/taha/chepec/tmp/$tmpdirname/.knitme .
         fi
      fi

		echo "Terminating..."

		exit 0
	fi

	## Here is the wild land of more than one *.Rnw file in current directory

	echo "<cheRTeX> This script can be run with one argument is process mode,"
	echo "<cheRTeX> or with zero arguments in post-processing mode."
	echo "<cheRTeX> Terminating..."
	exit 1
fi


# Depending on whether the clock uses summer or wintertime, the date string length
# will differ by one (CEST vs CET).
# Just to be neat, we will take this into consideration when constructing the
# "job completed" block below.
cetcest=$(date +%Z)

# keep track of runtime of entire script
endtime=$(date +%s)
runtime=$(( $endtime - $starttime ))

# send push message via Gotify CLI
# if runtime is longer than X minutes (suitable limit perhaps 3 min)
if (( $runtime > 180 )); then
	gotify push --quiet --title "$dir_wd" --priority 5 "chertex.sh $@ \nCompleted in $runtime s"
fi

echo "-------------------------------------"
# the padding for runtime makes the formatting work
# three digits for seconds is enough for just above 15 minutes
printf "=== chertex.sh completed in %03d s ===\n" $runtime
if [[ $cetcest == "CET" ]]; then
	echo "=== $(date)  ==="
else
	echo "=== $(date) ==="
fi
echo "-------------------------------------"
simpledelay.sh 3

exit 0