#!/usr/bin/env bash
## Written May 14, 2010
## Taha Ahmed
# This is the first bash script where I implemented the ideas outlined by
# https://betterdev.blog/minimal-safe-bash-script-template/
# I don't understand the point of traps, so I skipped "-E"
# https://vaneyckt.io/posts/safer_bash_scripts_with_set_euxo_pipefail/
set -euo pipefail
usage( ) {
cat <<EOF
Usage: $( basename " ${ BASH_SOURCE [0] } " ) <flags> job.Rnw
Compiles R Sweave documents using knitr and LaTeXMK.
Filename extension must be .Rnw.
Available flags:
+ --help --> display this usage guide
+ --no-alert --> disable gotify alert ( --silent also works)
+ --menu --> enter "menu" mode where auxiliary commands can be run ( --aux)
This script treats certain filenames in a special manner:
+ "thesis" --> triggers a chain of special commands, see source code.
In addition, the internal execution of this script is modified by the existence of
certain files in the working directory:
+ ./.knitme --> use knitr::knit( ) instead of pgfSweave( ) .
+ ./.latexmkrc --> apply RC file to latexmk command.
+ ./vc --> run vc script ( integrates git with LaTeX, deprecated by gitinfo2, kept for backwards support) .
EOF
exit
}
# keep track of runtime of entire script
starttime = $( date +%s)
# load colours (this script depends on colours being defined!)
# TO-DO: to make this dependency non-critical, consider making all invocations of colour
# optional (depending on whether colours.sh was found, or a CLI flag perhaps)
if [ -f "/home/taha/.local/bin/echo-colours.sh" ] ; then
. "/home/taha/.local/bin/echo-colours.sh"
fi
msg( ) {
echo >& 2 -e " ${ 1 - } "
}
# Examples:
# die "some message"
# die "some message and wait 6 seconds before exiting" 6
# die "some message and exit immediately" 0
die( ) {
local msg = $1
msg " $msg "
# short delay to aid reading last message in case terminal closes on exit
# if $2 was provided, set it as delay
# if $2 is unset or null, use a 3 second delay
simpledelay.sh ${ 2 :- 3 }
exit 0
}
parse_params( ) {
while :; do
case " ${ 1 - } " in
-h | --help) usage ; ;
-v | --verbose) set -x ; ;
--aux | --menu) show_menu = true ; ;
--no-alert | --silent) disable_alert = true ; ;
-?*) die " Unknown option: $1 " ; ;
*) break ; ;
esac
shift
done
args = ( " $@ " )
# If no args were given, show aux menu mode
# note that flags don't count as args, so as long as no jobname was given
# (irrespective of provided flags), aux menu will be entered
[ [ ${# args [@] } -eq 0 ] ] && show_menu = true
return 0
}
show_menu = false
# gotify alert is enabled by default (assuming the job takes longer than X seconds)
disable_alert = false
parse_params " $@ "
# setup_colors
clear
msg "-----------------------------------------------------------------------"
msg "cheRTeX -- a script for processing R--Sweave/knitr--LaTeX/TikZ projects"
msg "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
msg "MMXX -- taha@chepec.se -- CHEPEC doctoral degree project"
msg "-----------------------------------------------------------------------"
## If the file .latexmkrc exists in the current directory, set ltxmkrc=TRUE
ltxmkrc = false
msg "--- Looking for .latexmkrc"
if [ -e .latexmkrc ] ; then
ltxmkrc = true
msg "+++ LaTeXMK RC file invoked"
else
msg "--- This job did not request LaTeXMK RC file"
fi
# define some constants
path_wd = ${ PWD }
dir_wd = $( basename $path_wd )
path_thesis = "/media/bay/taha/chepec/thesis"
temp_folder = "/media/bay/taha/chepec/tmp"
# define some file types
Rfiletype = "R"
TeXfiletype = "tex"
tikzfiles = "*.tikz"
Rnwfiles = "*.Rnw"
# aux files
aux_4ct = "*.4ct" # TOC entries for HTML output
aux_4tc = "*.4tc" # TOC entries for HTML output
aux_acn = "*.acn"
aux_acr = "*.acr"
aux_alg = "*.alg" # glossaries
aux_aux = "*.aux"
aux_bbl = "*.bbl" # bibliography
aux_blg = "*.blg" # bibliography
aux_dep = "*.dep"
aux_dpth = "*.dpth"
aux_fdb = "*.fdb*"
aux_fig = "*.figlist"
aux_fls = "*.fls"
aux_glg = "*.glg"
aux_glo = "*.glo" # glossaries
aux_gls = "*.gls*" # glossaries and bib2gls
aux_ist = "*.ist" # glossaries (makeindex style file)
aux_lg = "*.lg" # tex4ht
aux_lob = "*.lob"
aux_lof = "*.lof"
aux_log = "*.log"
aux_lol = "*.lol" # list of hyperlinks (custom, used in thesis)
aux_lor = "*.lor"
aux_los = "*.los"
aux_lot = "*.lot"
aux_maf = "*.maf" # minitoc
aux_mtc = "*.mtc*" # minitoc
aux_out = "*.out" # hyperref
aux_lox = "*.lox"
aux_make = "*.makefile"
aux_map = "*.map"
aux_run = "*.run*"
aux_slg = "*.slg"
aux_slo = "*.slo"
aux_sls = "*.sls"
aux_stc = "*.stc*" # minitoc
aux_tikz = "*-tikzDictionary"
aux_tdo = "*.tdo"
aux_tmp = "*.tmp" # tex4ht
aux_toc = "*.toc"
aux_xdy = "*.xdy" # glossaries (xindy)
aux_xref = "*.xref" # pandoc or tex4ht cross-refs
# all aux files in a single string
auxfiles = " ${ aux_4ct } ${ aux_4tc } ${ aux_acn } ${ aux_acr } ${ aux_alg } ${ aux_aux } ${ aux_bbl } ${ aux_blg } ${ aux_dep } ${ aux_dpth } ${ aux_fdb } ${ aux_fig } ${ aux_fls } ${ aux_glg } ${ aux_glo } ${ aux_gls } ${ aux_ist } ${ aux_lg } ${ aux_lob } ${ aux_lof } ${ aux_log } ${ aux_lol } ${ aux_lor } ${ aux_los } ${ aux_lot } ${ aux_maf } ${ aux_out } ${ aux_lox } ${ aux_make } ${ aux_map } ${ aux_mtc } ${ aux_run } ${ aux_slg } ${ aux_slo } ${ aux_sls } ${ aux_stc } ${ aux_tikz } ${ aux_tdo } ${ aux_toc } ${ aux_xdy } ${ aux_xref } ${ aux_tmp } "
if [ [ $show_menu = = "false" ] ] ; then
# Check if the argument contains a filetype
# (assumes a complete filename was passed)
jobfilename = ${ args [0] }
jobfiletype = ${ jobfilename #*. } # File extension
jobname = ${ jobfilename %.* } # Filename without extension part
msg " <cheRTeX> Detected filename: $jobname "
msg " <cheRTeX> Detected extension: $jobfiletype "
# Verify that the file extension is "[Rr][Nn][Ww]"
if [ [ $jobfiletype = = "Rnw" || $jobfiletype = = "rnw" || $jobfiletype = = "RNW" ] ] ; then
# File extension is indeed "[Rr][Nn][Ww]"
msg "<cheRTeX> Detected *.Rnw extension"
jobfiletype = "Rnw"
else
# File extension is NOT "[Rr][Nn][Ww]"
die "This script only supports *.Rnw files"
fi
# Introducing a short delay to enable on-screen reading of previous echo
simpledelay.sh 2
#### Special treatment for thesis
if [ [ $path_wd = = " $path_thesis " && $jobname = = " $dir_wd " ] ] ; then
# Restore thesis.bcf from latest commit if modified in workspace
# Background: any LaTeX run that finishes uncleanly (due to errors, etc.) leaves
# the bcf file in an incomplete state. Since we have at least one plot in the thesis
# that reads the bcf file, this in turn causes the subsequent chertex compilation
# to fail. This mess could be automatically avoided if we pre-emptively check
# if thesis.bcf is listed in the output of `git status --short`, and if so `git restore` it.
# https://stackoverflow.com/a/25149786/1198249
if [ [ ` git status --porcelain | grep $jobname .bcf` ] ] ; then
msg "<thesis> -------------------------------"
msg "<thesis> Restoring thesis.bcf"
msg "<thesis> -------------------------------"
git -C $path_wd restore $jobname .bcf
simpledelay.sh 2
fi
# Fetch external assets by reading any assets.external files in assets/ tree
# NOTE: be careful NOT to leave empty lines in your assets.external files
msg "<thesis> -------------------------------"
msg "<thesis> Getting external assets"
msg "<thesis> -------------------------------"
# find all files named "assets.external" in the assets/ tree
assetsexternalfiles = $( find " $path_wd /assets/ " -type f -name "assets.external" )
for externalfilepath in $assetsexternalfiles ; do
# https://stackoverflow.com/questions/10929453/read-a-file-line-by-line-assigning-the-value-to-a-variable
while IFS = '' read -r asset || [ [ -n " $asset " ] ] ; do
# $asset contains one line from the current external.assets
# assetpathtarget is the directory of the current external.assets file
assetpathtarget = $( dirname " $externalfilepath " )
# if $asset contains a space, the second element should be considered a local target folder for the copy operation
# https://www.tutorialkart.com/bash-shell-scripting/bash-split-string/
# https://stackoverflow.com/a/30212526
# Using space as separator was not working if the path contains spaces
# (I tried surrounding each path with "" or escaping each space with backslash, did not help)
# Rather than rewriting this part, I'll change to an IFS char that's unlikely to clash with any path specification. This way, spaces in paths should not need any changes.
IFS = '>' # reset IFS
# asset is read into an array as tokens separated by IFS
read -ra asset_array <<< " $asset "
# sanity check for array length
if [ ${# asset_array [@] } -gt 2 ] ; then
die " <thesis> Cannot handle more than one $IFS character per line "
fi
if [ ${# asset_array [@] } -gt 1 ] ; then
# msg "Placing this asset into subdirectory ${asset_array[1]}"
# redefine assetpathtarget to include the local subdir
assetpathtarget = " $assetpathtarget / ${ asset_array [1] } "
# also redefine $asset so we keep only the asset path
asset = " ${ asset_array [0] } "
# create the local subdir inside assets/<current/
mkdir -p " $assetpathtarget " # -p suppresses error if dir already exists
fi
msg " <thesis> Copying $asset to $assetpathtarget "
# cp but don't overwrite existing files
cp --preserve= timestamps --no-clobber --recursive $asset $assetpathtarget
# except we want to overwrite the biblatex library file (inside the assets/references/ directory)
# as well as the zotero.sqlite file, we'll do that by checking the target dirname and only run
# the copy operation (which will overwrite stuff) if it is "references"
assetdirnametarget = $( basename " $assetpathtarget " )
if [ [ $assetdirnametarget = = "references" ] ] ; then
msg "<thesis> Overwriting Zotero biblatex library and database in assets/references/"
cp --preserve= timestamps $asset $assetpathtarget
fi
done < " $externalfilepath "
done
# Create low-res photos on-the-fly from existing photos/
msg "<thesis> -------------------------------"
msg "<thesis> Create low-res photos tree"
msg "<thesis> -------------------------------"
# copy existing photos to assets/photos/.lowres/ path
# to save time, rsync only if highres photo has more recent timestamp (otherwise, keep lowres photo without overwriting)
# Note: rsync usually looks at file timestamp and size, and if either has changed, copies the file (simplified explanation)
# in this case, I'd like rsync to only compare timestamps and disregard size
# rsync can't do that. We need to use a different tool. See e.g.
# https://superuser.com/questions/260092/rsync-switch-to-only-compare-timestamps
# copy only the "large" photos that have file modtimes more recent than the last time this operation was run
photoslastrun = " $path_thesis /assets/photos/.lowres/lastrun "
if [ ! -f " $photoslastrun " ] ; then
# if, for some reason, the lastrun file does not exist
# copy over everything and then create the file
# (except for the .lowres tree itself, and any assets.external files)
rsync -av " $path_thesis /assets/photos/ " " $path_thesis /assets/photos/.lowres/ " --exclude ".lowres/" --exclude "assets.external"
touch " $photoslastrun "
fi
# Detect new photos and copy them into .lowres tree
# https://stackoverflow.com/questions/9612090/how-to-loop-through-file-names-returned-by-find
# https://stackoverflow.com/questions/5241625/find-and-copy-files
cd " $path_thesis /assets/photos "
find . -type f -cnewer $photoslastrun ! -path "./.lowres/*" ! -name "*.external" -print -exec cp --parents "{}" .lowres \;
# revert the effects of cd above. Redirect to null suppresses the output.
cd - >/dev/null
# Convert all non-JPG images (except for PDF, SVG, and other non-images) to JPG
msg "<thesis> Convert all non-JPG images to JPG"
find " $path_thesis /assets/photos/.lowres/ " -type f ! -name "*.pdf" ! -name "*.svg" ! -name "*.external" ! -name "lastrun" ! -name "*.jpg" -print -exec mogrify -format jpg "{}" \;
# Remove the now redundant non-JPG files in .lowres/
msg "<thesis> Delete the redundant non-JPG files in .lowres/ tree"
find " $path_thesis /assets/photos/.lowres/ " -type f ! -name "*.pdf" ! -name "*.svg" ! -name "*.external" ! -name "lastrun" ! -name "*.jpg" -print -exec rm "{}" \;
# Shrink image filesize in-place to roughly 300kb in size
msg "<thesis> Shrink images in .lowres/ tree to <=300K"
# https://stackoverflow.com/questions/6917219/imagemagick-scale-jpeg-image-with-a-maximum-file-size
find " $path_thesis /assets/photos/.lowres/ " -size +500k -type f -name "*.jpg" -print -exec convert "{}" -define jpeg:extent= 300kb "{}" \;
# update the modification and access time on the photosastrun file
touch " $photoslastrun "
fi
## Handle knitr or pgfSweave jobs (each requires separate treatment)
## But how should we tell the difference between them?
## There is no obvious way to tell the difference (apart from reading the *.Rnw file)
## IN ALL KNITR DIRECTORIES, CREATE A FILE NAMED: .knitme
# If the file .knitme exists in the current directory,
# run knitr commands, otherwise run pgfsweave commands
msg "--- Looking for .knitme"
if [ -e .knitme ] ; then
# Run knitr commands for this job
msg "<cheRTeX> -----------------------"
msg "<cheRTeX> This is a job for knitr"
msg "<cheRTeX> -----------------------"
# Knit
msg "<cheRTeX> Knitting..."
Rscript -e " library(knitr); library(methods); knit(' $jobname . $jobfiletype ') "
# Introduce delay to give time to read Rscript exit status
msg "<cheRTeX> -----------------------"
msg "<cheRTeX> Rscript knitr completed"
msg "<cheRTeX> -----------------------"
simpledelay.sh 2
else
# Run pgfSweave commands
msg "<cheRTeX> ---------------------------"
msg "<cheRTeX> This is a job for pgfSweave"
msg "<cheRTeX> ---------------------------"
# Tangle
msg "<cheRTeX> Tangling..."
R CMD Stangle $jobname .$jobfiletype
# Weave
msg "<cheRTeX> Weaving..."
R CMD pgfsweave --graphics-only $jobname .$jobfiletype
# Introduce delay to give time to read R CMD exit status
msg "<cheRTeX> -------------------------"
msg "<cheRTeX> R CMD pgfsweave completed"
msg "<cheRTeX> -------------------------"
simpledelay.sh 2
fi
# Run vc script if vc exists in working directory
msg "<cheRTeX> Running vc script..."
if [ -f vc ] ; then
./vc
fi
# Run pdflatex, bibtex, and company
# Leaving this if-else for historical reasons, and also to print the message
if $ltxmkrc ; then
msg " ${ On_Cyan } <cheRTeX> Calling LaTeXMK, detected .latemxkrc file ${ Color_Off } "
simpledelay.sh 2
# note: latexmk does not need us to invoke "-r .latexmkrc", it finds and uses
# the RC file automatically (as evidenced by latexmk's output)
latexmk -pdf -bibtex $jobname
else
msg " ${ On_Cyan } <cheRTeX> Calling LaTeXMK ${ Color_Off } "
simpledelay.sh 2
latexmk -pdf -bibtex $jobname
fi
else
# Zero arguments (the case of more than one argument is handled in parse_params above)
# In this case, present a menu of choices
msg "This is cheRTeX POST-PROCESSING" # only one choice for now
msg "<1> 'pdf-all' -- Process all .tikz files to pdf graphics"
msg "<2> 'clean-up' -- Remove all auxiliary files"
msg "<3> 'wipe-dir' -- Remove all non-essential files and subdirectories"
msg "Any other input exits the program"
read usrchoice
if [ [ $usrchoice = = "pdf-all" || $usrchoice = = "1" ] ] ; then
msg "<1> 'pdf-all' chosen"
# This for loop ONLY USED to determine number of *.tikz files in directory
for tikzfiles in " $tikzfiles " ; do tikzfilenumber = ${# tikzfiles } ; done
msg " cheRTeX detected $tikzfilenumber TikZ files for processing "
msg "Starting TikZ file processing..."
simpledelay.sh 2
for tikzfilename in $tikzfiles ; do
# Call tikz2pdf
msg " <Executing> tikz2pdf $tikzfilename "
tikz2pdf --once $tikzfilename
done
msg "Completed TikZ file processing"
fi
if [ [ $usrchoice = = "clean-up" || $usrchoice = = "2" ] ] ; then
msg "<2> 'clean-up' chosen"
# in $auxfiles, strip "*." or "*" at beginning of each word and "*" at the
# end of any word, and replace space between items with pipe symbol
# using the built-in bash ${$VAR//find/replace} takes us part-way
# $ echo "${auxfiles//\*./}"
# acn acr alg aux bbl blg dep dpth fdb* figlist fls glg glo gls* ist lob lof log lol lor los lot maf out lox makefile map mtc* run* slg slo sls stc* *-tikzDictionary tdo toc xdy
# https://www.baeldung.com/linux/process-a-bash-variable-with-sed
# first, replace "*." or "*-" with empty string, then replace "* " with ".* " (to suit
# regex syntax in the subsequent find command), finally replace each inter-word
# spaces with pipe symbol. Also, and this is important, "*-tikzDictionary" needs to
# be cleaned up otherwise trips up the -iregex parameter! (don't leave "*-" in the string)
# https://unix.stackexchange.com/a/15337
# $ echo "$auxfiles" | sed -r "s/\*\.//g" | sed -r "s/\* /\.\* /g" | sed -r "s/ /|/g"
# acn|acr|alg|aux|bbl|blg|dep|dpth|fdb.*|figlist|fls|glg|glo|gls.*|ist|lob|lof|log|lol|lor|los|lot|maf|out|lox|makefile|map|mtc.*|run.*|slg|slo|sls|stc.*|*-tikzDictionary|tdo|toc|xdy
auxregex = $( echo " $auxfiles " | sed -r "s/\*[\.-]//g" | sed -r "s/\* /\.\* /g" | sed -r "s/ /|/g" )
# "rm -v" provides nice output of which files were cleaned up
cd " $path_wd " && find . -maxdepth 1 -type f -regextype posix-extended -iregex " .*\.( $auxregex ) " -exec rm -v "{}" \; && cd $OLDPWD
fi
if [ [ $usrchoice = = "wipe-dir" || $usrchoice = = "3" ] ] ; then
msg "<3> 'wipe-dir' chosen"
## Remove all but non-essential files
# get a timestamp
timestamp = $( date +%s)
# create a unique tmp-dir name
tmpdirname = " ${ timestamp } - ${ dir_wd } "
# make a directory in chepec/tmp with the name of the current dir
mkdir $temp_folder /$tmpdirname
# Copy the contents of the current directory to the tmp/$currdirname directory
cp * -R $temp_folder /$tmpdirname
# Empty the current directory of all contents
rm * -R # note: hidden files and subdir unaffected
# Return the stuff we want to keep after wipe-dir
# (we are of course assuming that the following 4 file(type)s always exist)
# (if in fact they do not exist, use conditional statements instead (see below)
cp $temp_folder /$tmpdirname /*.Rnw .
cp $temp_folder /$tmpdirname /vc .
cp $temp_folder /$tmpdirname /vc.tex .
cp $temp_folder /$tmpdirname /vc-git.awk .
## Return stuff that may not always exist (check first...)
## The use of conditionals is mainly to avoid annoying "file does not exist" messages...
# Return *.Rproj file (removal is unnecessary and makes RStudio less useful)
Rprojfiles = ` ls -1 $temp_folder /$tmpdirname /*.Rproj 2>/dev/null | wc -l`
if [ $Rprojfiles != 0 ] ; then
cp $temp_folder /$tmpdirname /*.Rproj .
fi
# Return *.rda files (considering peak-data files, which "cost" a lot to create)
rdafiles = ` ls -1 $temp_folder /$tmpdirname /*.rda 2>/dev/null | wc -l`
if [ $rdafiles != 0 ] ; then
cp $temp_folder /$tmpdirname /*.rda .
fi
# Return *.Rmd files (R markdown source files)
Rmdfiles = ` ls -1 $temp_folder /$tmpdirname /*.Rmd 2>/dev/null | wc -l`
if [ $Rmdfiles != 0 ] ; then
cp $temp_folder /$tmpdirname /*.Rmd .
fi
# Return *.css files (css files) [for sample-matrix]
cssfiles = ` ls -1 $temp_folder /$tmpdirname /*.css 2>/dev/null | wc -l`
if [ $cssfiles != 0 ] ; then
cp $temp_folder /$tmpdirname /*.css .
fi
# Return .knitme file [empty file used to indicate knitr jobs]
knitmefile = ` ls -1 $temp_folder /$tmpdirname /.knitme 2>/dev/null | wc -l`
if [ $knitmefile != 0 ] ; then
cp $temp_folder /$tmpdirname /.knitme .
fi
fi
die "All done. Exiting..." 0
fi
# Depending on whether the clock uses summer or wintertime, the date string length
# will differ by one (CEST vs CET).
# Just to be neat, we will take this into consideration when constructing the
# "job completed" block below.
cetcest = $( date +%Z)
# keep track of runtime of entire script
endtime = $( date +%s)
runtime = $(( $endtime - $starttime ))
# send push message to Gotify server
# if runtime is longer than X minutes (suitable limit perhaps 3 min)
if ( ( $runtime > 180 ) ) && [ [ $disable_alert = = "false" ] ] ; then
# POST request to Gotify server works without needing Gotify CLI on this box
# NOTE, multi-line bash command fails if interrupted by comment lines!
# Hide CURL response (-o /dev/null) and progress bar (--silent)
# https://gotify.net/docs/pushmsg
curl -X POST "https://gotify.chepec.se/message?token=A8nO3zYJ-R1wG__" \
-F " message=chertex.sh $@ . Completed in $runtime s " \
-F " title= $dir_wd " -F "priority=5" \
-o /dev/null --silent
# https://stackoverflow.com/questions/3872427/how-to-send-line-break-with-curl
# I could not make multi-line message (to Gotify) work here, however I tried. Giving up for now.
msg "Push notification sent to Gotify"
fi
msg " ${ On_Cyan } ------------------------------------- ${ Color_Off } "
# the padding for runtime makes the formatting work
# three digits for seconds is enough for just above 15 minutes
printf " ${ On_Cyan } === chertex.sh completed in %03d s === ${ Color_Off } \n " $runtime 1>& 2
if [ [ $cetcest = = "CET" ] ] ; then
msg " ${ On_Cyan } === $( date) === ${ Color_Off } "
else
msg " ${ On_Cyan } === $( date) === ${ Color_Off } "
fi
msg " ${ On_Cyan } ------------------------------------- ${ Color_Off } "
simpledelay.sh 3
exit 0