You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

501 lines
22 KiB

#!/usr/bin/env bash
## Written May 14, 2010
## Taha Ahmed
# This is the first bash script where I implemented the ideas outlined by
# I don't understand the point of traps, so I skipped "-E"
set -euo pipefail
usage() {
cat <<EOF
Usage: $(basename "${BASH_SOURCE[0]}") <flags> job.Rnw
Compiles R Sweave documents using knitr and LaTeXMK.
Filename extension must be .Rnw.
Available flags:
+ --help --> display this usage guide
+ --no-alert --> disable gotify alert (--silent also works)
+ --menu --> enter "menu" mode where auxiliary commands can be run (--aux)
This script treats certain filenames in a special manner:
+ "thesis" --> triggers a chain of special commands, see source code.
In addition, the internal execution of this script is modified by the existence of
certain files in the working directory:
+ ./.knitme --> use knitr::knit() instead of pgfSweave().
+ ./.latexmkrc --> apply RC file to latexmk command.
+ ./vc --> run vc script (integrates git with LaTeX, deprecated by gitinfo2, kept for backwards support).
# keep track of runtime of entire script
starttime=$(date +%s)
# load colours (this script depends on colours being defined!)
# TO-DO: to make this dependency non-critical, consider making all invocations of colour
# optional (depending on whether was found, or a CLI flag perhaps)
if [ -f "/home/taha/.local/bin/" ]; then
. "/home/taha/.local/bin/"
msg() {
echo >&2 -e "${1-}"
# Examples:
# die "some message"
# die "some message and wait 6 seconds before exiting" 6
# die "some message and exit immediately" 0
die() {
local msg=$1
msg "$msg"
# short delay to aid reading last message in case terminal closes on exit
# if $2 was provided, set it as delay
# if $2 is unset or null, use a 3 second delay ${2:-3}
exit 0
parse_params() {
while :; do
case "${1-}" in
-h | --help) usage ;;
-v | --verbose) set -x ;;
--aux | --menu) show_menu=true ;;
--no-alert | --silent) disable_alert=true ;;
-?*) die "Unknown option: $1" ;;
*) break ;;
# If no args were given, show aux menu mode
# note that flags don't count as args, so as long as no jobname was given
# (irrespective of provided flags), aux menu will be entered
[[ ${#args[@]} -eq 0 ]] && show_menu=true
return 0
# gotify alert is enabled by default (assuming the job takes longer than X seconds)
parse_params "$@"
# setup_colors
msg "-----------------------------------------------------------------------"
msg "cheRTeX -- a script for processing R--Sweave/knitr--LaTeX/TikZ projects"
msg "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
msg "MMXX -- -- CHEPEC doctoral degree project"
msg "-----------------------------------------------------------------------"
## If the file .latexmkrc exists in the current directory, set ltxmkrc=TRUE
msg "--- Looking for .latexmkrc"
if [ -e .latexmkrc ]; then
msg "+++ LaTeXMK RC file invoked"
msg "--- This job did not request LaTeXMK RC file"
# define some constants
dir_wd=$(basename $path_wd)
# define some file types
# aux files
aux_4ct="*.4ct" # TOC entries for HTML output
aux_4tc="*.4tc" # TOC entries for HTML output
aux_alg="*.alg" # glossaries
aux_bbl="*.bbl" # bibliography
aux_blg="*.blg" # bibliography
aux_glo="*.glo" # glossaries
aux_gls="*.gls*" # glossaries and bib2gls
aux_ist="*.ist" # glossaries (makeindex style file)
aux_lg="*.lg" # tex4ht
aux_lol="*.lol" # list of hyperlinks (custom, used in thesis)
aux_maf="*.maf" # minitoc
aux_mtc="*.mtc*" # minitoc
aux_out="*.out" # hyperref
aux_stc="*.stc*" # minitoc
aux_tmp="*.tmp" # tex4ht
aux_xdy="*.xdy" # glossaries (xindy)
aux_xref="*.xref" # pandoc or tex4ht cross-refs
# all aux files in a single string
auxfiles="${aux_4ct} ${aux_4tc} ${aux_acn} ${aux_acr} ${aux_alg} ${aux_aux} ${aux_bbl} ${aux_blg} ${aux_dep} ${aux_dpth} ${aux_fdb} ${aux_fig} ${aux_fls} ${aux_glg} ${aux_glo} ${aux_gls} ${aux_ist} ${aux_lg} ${aux_lob} ${aux_lof} ${aux_log} ${aux_lol} ${aux_lor} ${aux_los} ${aux_lot} ${aux_maf} ${aux_out} ${aux_lox} ${aux_make} ${aux_map} ${aux_mtc} ${aux_run} ${aux_slg} ${aux_slo} ${aux_sls} ${aux_stc} ${aux_tikz} ${aux_tdo} ${aux_toc} ${aux_xdy} ${aux_xref} ${aux_tmp}"
if [[ $show_menu == "false" ]]; then
# Check if the argument contains a filetype
# (assumes a complete filename was passed)
jobfiletype=${jobfilename#*.} # File extension
jobname=${jobfilename%.*} # Filename without extension part
msg "<cheRTeX> Detected filename: $jobname"
msg "<cheRTeX> Detected extension: $jobfiletype"
# Verify that the file extension is "[Rr][Nn][Ww]"
if [[ $jobfiletype == "Rnw" || $jobfiletype == "rnw" || $jobfiletype == "RNW" ]]; then
# File extension is indeed "[Rr][Nn][Ww]"
msg "<cheRTeX> Detected *.Rnw extension"
# File extension is NOT "[Rr][Nn][Ww]"
die "This script only supports *.Rnw files"
# Introducing a short delay to enable on-screen reading of previous echo 2
#### Special treatment for thesis
if [[ $path_wd == "$path_thesis" && $jobname == "$dir_wd" ]]; then
# Restore thesis.bcf from latest commit if modified in workspace
# Background: any LaTeX run that finishes uncleanly (due to errors, etc.) leaves
# the bcf file in an incomplete state. Since we have at least one plot in the thesis
# that reads the bcf file, this in turn causes the subsequent chertex compilation
# to fail. This mess could be automatically avoided if we pre-emptively check
# if thesis.bcf is listed in the output of `git status --short`, and if so `git restore` it.
if [[ `git status --porcelain | grep $jobname.bcf` ]]; then
msg "<thesis> -------------------------------"
msg "<thesis> Restoring thesis.bcf"
msg "<thesis> -------------------------------"
git -C $path_wd restore $jobname.bcf 2
# Fetch external assets by reading any assets.external files in assets/ tree
# NOTE: be careful NOT to leave empty lines in your assets.external files
msg "<thesis> -------------------------------"
msg "<thesis> Getting external assets"
msg "<thesis> -------------------------------"
# find all files named "assets.external" in the assets/ tree
assetsexternalfiles=$(find "$path_wd/assets/" -type f -name "assets.external")
for externalfilepath in $assetsexternalfiles; do
while IFS='' read -r asset || [[ -n "$asset" ]]; do
# $asset contains one line from the current external.assets
# assetpathtarget is the directory of the current external.assets file
assetpathtarget=$(dirname "$externalfilepath")
# if $asset contains a space, the second element should be considered a local target folder for the copy operation
# Using space as separator was not working if the path contains spaces
# (I tried surrounding each path with "" or escaping each space with backslash, did not help)
# Rather than rewriting this part, I'll change to an IFS char that's unlikely to clash with any path specification. This way, spaces in paths should not need any changes.
IFS='>' # reset IFS
# asset is read into an array as tokens separated by IFS
read -ra asset_array <<< "$asset"
# sanity check for array length
if [ ${#asset_array[@]} -gt 2 ]; then
die "<thesis> Cannot handle more than one $IFS character per line"
if [ ${#asset_array[@]} -gt 1 ]; then
# msg "Placing this asset into subdirectory ${asset_array[1]}"
# redefine assetpathtarget to include the local subdir
# also redefine $asset so we keep only the asset path
# create the local subdir inside assets/<current/
mkdir -p "$assetpathtarget" # -p suppresses error if dir already exists
msg "<thesis> Copying $asset to $assetpathtarget"
# cp but don't overwrite existing files
cp --preserve=timestamps --no-clobber --recursive $asset $assetpathtarget
# except we want to overwrite the biblatex library file (inside the assets/references/ directory)
# as well as the zotero.sqlite file, we'll do that by checking the target dirname and only run
# the copy operation (which will overwrite stuff) if it is "references"
assetdirnametarget=$(basename "$assetpathtarget")
if [[ $assetdirnametarget == "references" ]]; then
msg "<thesis> Overwriting Zotero biblatex library and database in assets/references/"
cp --preserve=timestamps $asset $assetpathtarget
done < "$externalfilepath"
# Create low-res photos on-the-fly from existing photos/
msg "<thesis> -------------------------------"
msg "<thesis> Create low-res photos tree"
msg "<thesis> -------------------------------"
# copy existing photos to assets/photos/.lowres/ path
# to save time, rsync only if highres photo has more recent timestamp (otherwise, keep lowres photo without overwriting)
# Note: rsync usually looks at file timestamp and size, and if either has changed, copies the file (simplified explanation)
# in this case, I'd like rsync to only compare timestamps and disregard size
# rsync can't do that. We need to use a different tool. See e.g.
# copy only the "large" photos that have file modtimes more recent than the last time this operation was run
if [ ! -f "$photoslastrun" ]; then
# if, for some reason, the lastrun file does not exist
# copy over everything and then create the file
# (except for the .lowres tree itself, and any assets.external files)
rsync -av "$path_thesis/assets/photos/" "$path_thesis/assets/photos/.lowres/" --exclude ".lowres/" --exclude "assets.external"
touch "$photoslastrun"
# Detect new photos and copy them into .lowres tree
cd "$path_thesis/assets/photos"
find . -type f -cnewer $photoslastrun ! -path "./.lowres/*" ! -name "*.external" -print -exec cp --parents "{}" .lowres \;
# revert the effects of cd above. Redirect to null suppresses the output.
cd - >/dev/null
# Convert all non-JPG images (except for PDF, SVG, and other non-images) to JPG
msg "<thesis> Convert all non-JPG images to JPG"
find "$path_thesis/assets/photos/.lowres/" -type f ! -name "*.pdf" ! -name "*.svg" ! -name "*.external" ! -name "lastrun" ! -name "*.jpg" -print -exec mogrify -format jpg "{}" \;
# Remove the now redundant non-JPG files in .lowres/
msg "<thesis> Delete the redundant non-JPG files in .lowres/ tree"
find "$path_thesis/assets/photos/.lowres/" -type f ! -name "*.pdf" ! -name "*.svg" ! -name "*.external" ! -name "lastrun" ! -name "*.jpg" -print -exec rm "{}" \;
# Shrink image filesize in-place to roughly 300kb in size
msg "<thesis> Shrink images in .lowres/ tree to <=300K"
find "$path_thesis/assets/photos/.lowres/" -size +500k -type f -name "*.jpg" -print -exec convert "{}" -define jpeg:extent=300kb "{}" \;
# update the modification and access time on the photosastrun file
touch "$photoslastrun"
## Handle knitr or pgfSweave jobs (each requires separate treatment)
## But how should we tell the difference between them?
## There is no obvious way to tell the difference (apart from reading the *.Rnw file)
# If the file .knitme exists in the current directory,
# run knitr commands, otherwise run pgfsweave commands
msg "--- Looking for .knitme"
if [ -e .knitme ]; then
# Run knitr commands for this job
msg "<cheRTeX> -----------------------"
msg "<cheRTeX> This is a job for knitr"
msg "<cheRTeX> -----------------------"
# Knit
msg "<cheRTeX> Knitting..."
Rscript -e "library(knitr); library(methods); knit('$jobname.$jobfiletype')"
# Introduce delay to give time to read Rscript exit status
msg "<cheRTeX> -----------------------"
msg "<cheRTeX> Rscript knitr completed"
msg "<cheRTeX> -----------------------" 2
# Run pgfSweave commands
msg "<cheRTeX> ---------------------------"
msg "<cheRTeX> This is a job for pgfSweave"
msg "<cheRTeX> ---------------------------"
# Tangle
msg "<cheRTeX> Tangling..."
R CMD Stangle $jobname.$jobfiletype
# Weave
msg "<cheRTeX> Weaving..."
R CMD pgfsweave --graphics-only $jobname.$jobfiletype
# Introduce delay to give time to read R CMD exit status
msg "<cheRTeX> -------------------------"
msg "<cheRTeX> R CMD pgfsweave completed"
msg "<cheRTeX> -------------------------" 2
# Run vc script if vc exists in working directory
msg "<cheRTeX> Running vc script..."
if [ -f vc ]; then
# Run pdflatex, bibtex, and company
# Leaving this if-else for historical reasons, and also to print the message
if $ltxmkrc; then
msg "${On_Cyan}<cheRTeX> Calling LaTeXMK, detected .latemxkrc file${Color_Off}" 2
# note: latexmk does not need us to invoke "-r .latexmkrc", it finds and uses
# the RC file automatically (as evidenced by latexmk's output)
latexmk -pdf -bibtex $jobname
msg "${On_Cyan}<cheRTeX> Calling LaTeXMK${Color_Off}" 2
latexmk -pdf -bibtex $jobname
# Zero arguments (the case of more than one argument is handled in parse_params above)
# In this case, present a menu of choices
msg "This is cheRTeX POST-PROCESSING" # only one choice for now
msg "<1> 'pdf-all' -- Process all .tikz files to pdf graphics"
msg "<2> 'clean-up' -- Remove all auxiliary files"
msg "<3> 'wipe-dir' -- Remove all non-essential files and subdirectories"
msg "Any other input exits the program"
read usrchoice
if [[ $usrchoice == "pdf-all" || $usrchoice == "1" ]]; then
msg "<1> 'pdf-all' chosen"
# This for loop ONLY USED to determine number of *.tikz files in directory
for tikzfiles in "$tikzfiles"; do tikzfilenumber=${#tikzfiles}; done
msg "cheRTeX detected $tikzfilenumber TikZ files for processing"
msg "Starting TikZ file processing..." 2
for tikzfilename in $tikzfiles; do
# Call tikz2pdf
msg "<Executing> tikz2pdf $tikzfilename"
tikz2pdf --once $tikzfilename
msg "Completed TikZ file processing"
if [[ $usrchoice == "clean-up" || $usrchoice == "2" ]]; then
msg "<2> 'clean-up' chosen"
# in $auxfiles, strip "*." or "*" at beginning of each word and "*" at the
# end of any word, and replace space between items with pipe symbol
# using the built-in bash ${$VAR//find/replace} takes us part-way
# $ echo "${auxfiles//\*./}"
# acn acr alg aux bbl blg dep dpth fdb* figlist fls glg glo gls* ist lob lof log lol lor los lot maf out lox makefile map mtc* run* slg slo sls stc* *-tikzDictionary tdo toc xdy
# first, replace "*." or "*-" with empty string, then replace "* " with ".* " (to suit
# regex syntax in the subsequent find command), finally replace each inter-word
# spaces with pipe symbol. Also, and this is important, "*-tikzDictionary" needs to
# be cleaned up otherwise trips up the -iregex parameter! (don't leave "*-" in the string)
# $ echo "$auxfiles" | sed -r "s/\*\.//g" | sed -r "s/\* /\.\* /g" | sed -r "s/ /|/g"
# acn|acr|alg|aux|bbl|blg|dep|dpth|fdb.*|figlist|fls|glg|glo|gls.*|ist|lob|lof|log|lol|lor|los|lot|maf|out|lox|makefile|map|mtc.*|run.*|slg|slo|sls|stc.*|*-tikzDictionary|tdo|toc|xdy
auxregex=$(echo "$auxfiles" | sed -r "s/\*[\.-]//g" | sed -r "s/\* /\.\* /g" | sed -r "s/ /|/g")
# "rm -v" provides nice output of which files were cleaned up
cd "$path_wd" && find . -maxdepth 1 -type f -regextype posix-extended -iregex ".*\.($auxregex)" -exec rm -v "{}" \; && cd $OLDPWD
if [[ $usrchoice == "wipe-dir" || $usrchoice == "3" ]]; then
msg "<3> 'wipe-dir' chosen"
## Remove all but non-essential files
# get a timestamp
timestamp=$(date +%s)
# create a unique tmp-dir name
# make a directory in chepec/tmp with the name of the current dir
mkdir $temp_folder/$tmpdirname
# Copy the contents of the current directory to the tmp/$currdirname directory
cp * -R $temp_folder/$tmpdirname
# Empty the current directory of all contents
rm * -R # note: hidden files and subdir unaffected
# Return the stuff we want to keep after wipe-dir
# (we are of course assuming that the following 4 file(type)s always exist)
# (if in fact they do not exist, use conditional statements instead (see below)
cp $temp_folder/$tmpdirname/*.Rnw .
cp $temp_folder/$tmpdirname/vc .
cp $temp_folder/$tmpdirname/vc.tex .
cp $temp_folder/$tmpdirname/vc-git.awk .
## Return stuff that may not always exist (check first...)
## The use of conditionals is mainly to avoid annoying "file does not exist" messages...
# Return *.Rproj file (removal is unnecessary and makes RStudio less useful)
Rprojfiles=`ls -1 $temp_folder/$tmpdirname/*.Rproj 2>/dev/null | wc -l`
if [ $Rprojfiles != 0 ]; then
cp $temp_folder/$tmpdirname/*.Rproj .
# Return *.rda files (considering peak-data files, which "cost" a lot to create)
rdafiles=`ls -1 $temp_folder/$tmpdirname/*.rda 2>/dev/null | wc -l`
if [ $rdafiles != 0 ]; then
cp $temp_folder/$tmpdirname/*.rda .
# Return *.Rmd files (R markdown source files)
Rmdfiles=`ls -1 $temp_folder/$tmpdirname/*.Rmd 2>/dev/null | wc -l`
if [ $Rmdfiles != 0 ]; then
cp $temp_folder/$tmpdirname/*.Rmd .
# Return *.css files (css files) [for sample-matrix]
cssfiles=`ls -1 $temp_folder/$tmpdirname/*.css 2>/dev/null | wc -l`
if [ $cssfiles != 0 ]; then
cp $temp_folder/$tmpdirname/*.css .
# Return .knitme file [empty file used to indicate knitr jobs]
knitmefile=`ls -1 $temp_folder/$tmpdirname/.knitme 2>/dev/null | wc -l`
if [ $knitmefile != 0 ]; then
cp $temp_folder/$tmpdirname/.knitme .
die "All done. Exiting..." 0
# Depending on whether the clock uses summer or wintertime, the date string length
# will differ by one (CEST vs CET).
# Just to be neat, we will take this into consideration when constructing the
# "job completed" block below.
cetcest=$(date +%Z)
# keep track of runtime of entire script
endtime=$(date +%s)
runtime=$(( $endtime - $starttime ))
# send push message to Gotify server
# if runtime is longer than X minutes (suitable limit perhaps 3 min)
if (( $runtime > 180 )) && [[ $disable_alert == "false" ]]; then
# POST request to Gotify server works without needing Gotify CLI on this box
# NOTE, multi-line bash command fails if interrupted by comment lines!
# Hide CURL response (-o /dev/null) and progress bar (--silent)
curl -X POST "" \
-F " $@. Completed in $runtime s" \
-F "title=$dir_wd" -F "priority=5" \
-o /dev/null --silent
# I could not make multi-line message (to Gotify) work here, however I tried. Giving up for now.
msg "Push notification sent to Gotify"
msg "${On_Cyan}-------------------------------------${Color_Off}"
# the padding for runtime makes the formatting work
# three digits for seconds is enough for just above 15 minutes
printf "${On_Cyan}=== completed in %03d s ===${Color_Off}\n" $runtime 1>&2
if [[ $cetcest == "CET" ]]; then
msg "${On_Cyan}=== $(date) ===${Color_Off}"
msg "${On_Cyan}=== $(date) ===${Color_Off}"
msg "${On_Cyan}-------------------------------------${Color_Off}" 3
exit 0