#!/usr/bin/env bash ## Written May 14, 2010 ## Taha Ahmed # This is the first bash script where I implemented the ideas outlined by # https://betterdev.blog/minimal-safe-bash-script-template/ # I don't understand the point of traps, so I skipped "-E" # https://vaneyckt.io/posts/safer_bash_scripts_with_set_euxo_pipefail/ set -euo pipefail usage() { cat < job.Rnw Compiles R Sweave documents using knitr and LaTeXMK. Filename extension must be .Rnw. Available flags: + --help --> display this usage guide + --no-alert --> disable gotify alert (--silent also works) + --menu --> enter "menu" mode where auxiliary commands can be run (--aux) This script treats certain filenames in a special manner: + "thesis" --> triggers a chain of special commands, see source code. In addition, the internal execution of this script is modified by the existence of certain files in the working directory: + ./.knitme --> use knitr::knit() instead of pgfSweave(). + ./.latexmkrc --> apply RC file to latexmk command. + ./vc --> run vc script (integrates git with LaTeX, deprecated by gitinfo2, kept for backwards support). EOF exit } # keep track of runtime of entire script starttime=$(date +%s) # load colours (this script depends on colours being defined!) # TO-DO: to make this dependency non-critical, consider making all invocations of colour # optional (depending on whether colours.sh was found, or a CLI flag perhaps) if [ -f "/home/taha/.local/bin/echo-colours.sh" ]; then . "/home/taha/.local/bin/echo-colours.sh" fi msg() { echo >&2 -e "${1-}" } # Examples: # die "some message" # die "some message and wait 6 seconds before exiting" 6 # die "some message and exit immediately" 0 die() { local msg=$1 msg "$msg" # short delay to aid reading last message in case terminal closes on exit # if $2 was provided, set it as delay # if $2 is unset or null, use a 3 second delay simpledelay.sh ${2:-3} exit 0 } parse_params() { while :; do case "${1-}" in -h | --help) usage ;; -v | --verbose) set -x ;; --aux | --menu) show_menu=true ;; --no-alert | --silent) disable_alert=true ;; -?*) die "Unknown option: $1" ;; *) break ;; esac shift done args=("$@") # If no args were given, show aux menu mode # note that flags don't count as args, so as long as no jobname was given # (irrespective of provided flags), aux menu will be entered [[ ${#args[@]} -eq 0 ]] && show_menu=true return 0 } show_menu=false # gotify alert is enabled by default (assuming the job takes longer than X seconds) disable_alert=false parse_params "$@" # setup_colors clear msg "-----------------------------------------------------------------------" msg "cheRTeX -- a script for processing R--Sweave/knitr--LaTeX/TikZ projects" msg "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^" msg "MMXX -- taha@chepec.se -- CHEPEC doctoral degree project" msg "-----------------------------------------------------------------------" ## If the file .latexmkrc exists in the current directory, set ltxmkrc=TRUE ltxmkrc=false msg "--- Looking for .latexmkrc" if [ -e .latexmkrc ]; then ltxmkrc=true msg "+++ LaTeXMK RC file invoked" else msg "--- This job did not request LaTeXMK RC file" fi # define some constants path_wd=${PWD} dir_wd=$(basename $path_wd) path_thesis="/media/bay/taha/chepec/thesis" temp_folder="/media/bay/taha/chepec/tmp" # define some file types Rfiletype="R" TeXfiletype="tex" tikzfiles="*.tikz" Rnwfiles="*.Rnw" # aux files aux_4ct="*.4ct" # TOC entries for HTML output aux_4tc="*.4tc" # TOC entries for HTML output aux_acn="*.acn" aux_acr="*.acr" aux_alg="*.alg" # glossaries aux_aux="*.aux" aux_bbl="*.bbl" # bibliography aux_blg="*.blg" # bibliography aux_dep="*.dep" aux_dpth="*.dpth" aux_fdb="*.fdb*" aux_fig="*.figlist" aux_fls="*.fls" aux_glg="*.glg" aux_glo="*.glo" # glossaries aux_gls="*.gls*" # glossaries and bib2gls aux_ist="*.ist" # glossaries (makeindex style file) aux_lg="*.lg" # tex4ht aux_lob="*.lob" aux_lof="*.lof" aux_log="*.log" aux_lol="*.lol" # list of hyperlinks (custom, used in thesis) aux_lor="*.lor" aux_los="*.los" aux_lot="*.lot" aux_maf="*.maf" # minitoc aux_mtc="*.mtc*" # minitoc aux_out="*.out" # hyperref aux_lox="*.lox" aux_make="*.makefile" aux_map="*.map" aux_run="*.run*" aux_slg="*.slg" aux_slo="*.slo" aux_sls="*.sls" aux_stc="*.stc*" # minitoc aux_tikz="*-tikzDictionary" aux_tdo="*.tdo" aux_tmp="*.tmp" # tex4ht aux_toc="*.toc" aux_xdy="*.xdy" # glossaries (xindy) aux_xref="*.xref" # pandoc or tex4ht cross-refs # all aux files in a single string auxfiles="${aux_4ct} ${aux_4tc} ${aux_acn} ${aux_acr} ${aux_alg} ${aux_aux} ${aux_bbl} ${aux_blg} ${aux_dep} ${aux_dpth} ${aux_fdb} ${aux_fig} ${aux_fls} ${aux_glg} ${aux_glo} ${aux_gls} ${aux_ist} ${aux_lg} ${aux_lob} ${aux_lof} ${aux_log} ${aux_lol} ${aux_lor} ${aux_los} ${aux_lot} ${aux_maf} ${aux_out} ${aux_lox} ${aux_make} ${aux_map} ${aux_mtc} ${aux_run} ${aux_slg} ${aux_slo} ${aux_sls} ${aux_stc} ${aux_tikz} ${aux_tdo} ${aux_toc} ${aux_xdy} ${aux_xref} ${aux_tmp}" if [[ $show_menu == "false" ]]; then # Check if the argument contains a filetype # (assumes a complete filename was passed) jobfilename=${args[0]} jobfiletype=${jobfilename#*.} # File extension jobname=${jobfilename%.*} # Filename without extension part msg " Detected filename: $jobname" msg " Detected extension: $jobfiletype" # Verify that the file extension is "[Rr][Nn][Ww]" if [[ $jobfiletype == "Rnw" || $jobfiletype == "rnw" || $jobfiletype == "RNW" ]]; then # File extension is indeed "[Rr][Nn][Ww]" msg " Detected *.Rnw extension" jobfiletype="Rnw" else # File extension is NOT "[Rr][Nn][Ww]" die "This script only supports *.Rnw files" fi # Introducing a short delay to enable on-screen reading of previous echo simpledelay.sh 2 #### Special treatment for thesis if [[ $path_wd == "$path_thesis" && $jobname == "$dir_wd" ]]; then # Restore thesis.bcf from latest commit if modified in workspace # Background: any LaTeX run that finishes uncleanly (due to errors, etc.) leaves # the bcf file in an incomplete state. Since we have at least one plot in the thesis # that reads the bcf file, this in turn causes the subsequent chertex compilation # to fail. This mess could be automatically avoided if we pre-emptively check # if thesis.bcf is listed in the output of `git status --short`, and if so `git restore` it. # https://stackoverflow.com/a/25149786/1198249 if [[ `git status --porcelain | grep $jobname.bcf` ]]; then msg " -------------------------------" msg " Restoring thesis.bcf" msg " -------------------------------" git -C $path_wd restore $jobname.bcf simpledelay.sh 2 fi # Fetch external assets by reading any assets.external files in assets/ tree # NOTE: be careful NOT to leave empty lines in your assets.external files msg " -------------------------------" msg " Getting external assets" msg " -------------------------------" # find all files named "assets.external" in the assets/ tree assetsexternalfiles=$(find "$path_wd/assets/" -type f -name "assets.external") for externalfilepath in $assetsexternalfiles; do # https://stackoverflow.com/questions/10929453/read-a-file-line-by-line-assigning-the-value-to-a-variable while IFS='' read -r asset || [[ -n "$asset" ]]; do # $asset contains one line from the current external.assets # assetpathtarget is the directory of the current external.assets file assetpathtarget=$(dirname "$externalfilepath") # if $asset contains a space, the second element should be considered a local target folder for the copy operation # https://www.tutorialkart.com/bash-shell-scripting/bash-split-string/ # https://stackoverflow.com/a/30212526 # Using space as separator was not working if the path contains spaces # (I tried surrounding each path with "" or escaping each space with backslash, did not help) # Rather than rewriting this part, I'll change to an IFS char that's unlikely to clash with any path specification. This way, spaces in paths should not need any changes. IFS='>' # reset IFS # asset is read into an array as tokens separated by IFS read -ra asset_array <<< "$asset" # sanity check for array length if [ ${#asset_array[@]} -gt 2 ]; then die " Cannot handle more than one $IFS character per line" fi if [ ${#asset_array[@]} -gt 1 ]; then # msg "Placing this asset into subdirectory ${asset_array[1]}" # redefine assetpathtarget to include the local subdir assetpathtarget="$assetpathtarget/${asset_array[1]}" # also redefine $asset so we keep only the asset path asset="${asset_array[0]}" # create the local subdir inside assets/ Copying $asset to $assetpathtarget" # cp but don't overwrite existing files cp --preserve=timestamps --no-clobber --recursive $asset $assetpathtarget # except we want to overwrite the biblatex library file (inside the assets/references/ directory) # as well as the zotero.sqlite file, we'll do that by checking the target dirname and only run # the copy operation (which will overwrite stuff) if it is "references" assetdirnametarget=$(basename "$assetpathtarget") if [[ $assetdirnametarget == "references" ]]; then msg " Overwriting Zotero biblatex library and database in assets/references/" cp --preserve=timestamps $asset $assetpathtarget fi done < "$externalfilepath" done # Create low-res photos on-the-fly from existing photos/ msg " -------------------------------" msg " Create low-res photos tree" msg " -------------------------------" # copy existing photos to assets/photos/.lowres/ path # to save time, rsync only if highres photo has more recent timestamp (otherwise, keep lowres photo without overwriting) # Note: rsync usually looks at file timestamp and size, and if either has changed, copies the file (simplified explanation) # in this case, I'd like rsync to only compare timestamps and disregard size # rsync can't do that. We need to use a different tool. See e.g. # https://superuser.com/questions/260092/rsync-switch-to-only-compare-timestamps # copy only the "large" photos that have file modtimes more recent than the last time this operation was run photoslastrun="$path_thesis/assets/photos/.lowres/lastrun" if [ ! -f "$photoslastrun" ]; then # if, for some reason, the lastrun file does not exist # copy over everything and then create the file # (except for the .lowres tree itself, and any assets.external files) rsync -av "$path_thesis/assets/photos/" "$path_thesis/assets/photos/.lowres/" --exclude ".lowres/" --exclude "assets.external" touch "$photoslastrun" fi # Detect new photos and copy them into .lowres tree # https://stackoverflow.com/questions/9612090/how-to-loop-through-file-names-returned-by-find # https://stackoverflow.com/questions/5241625/find-and-copy-files cd "$path_thesis/assets/photos" find . -type f -cnewer $photoslastrun ! -path "./.lowres/*" ! -name "*.external" -print -exec cp --parents "{}" .lowres \; # revert the effects of cd above. Redirect to null suppresses the output. cd - >/dev/null # Convert all non-JPG images (except for PDF, SVG, and other non-images) to JPG msg " Convert all non-JPG images to JPG" find "$path_thesis/assets/photos/.lowres/" -type f ! -name "*.pdf" ! -name "*.svg" ! -name "*.external" ! -name "lastrun" ! -name "*.jpg" -print -exec mogrify -format jpg "{}" \; # Remove the now redundant non-JPG files in .lowres/ msg " Delete the redundant non-JPG files in .lowres/ tree" find "$path_thesis/assets/photos/.lowres/" -type f ! -name "*.pdf" ! -name "*.svg" ! -name "*.external" ! -name "lastrun" ! -name "*.jpg" -print -exec rm "{}" \; # Shrink image filesize in-place to roughly 300kb in size msg " Shrink images in .lowres/ tree to <=300K" # https://stackoverflow.com/questions/6917219/imagemagick-scale-jpeg-image-with-a-maximum-file-size find "$path_thesis/assets/photos/.lowres/" -size +500k -type f -name "*.jpg" -print -exec convert "{}" -define jpeg:extent=300kb "{}" \; # update the modification and access time on the photosastrun file touch "$photoslastrun" fi ## Handle knitr or pgfSweave jobs (each requires separate treatment) ## But how should we tell the difference between them? ## There is no obvious way to tell the difference (apart from reading the *.Rnw file) ## IN ALL KNITR DIRECTORIES, CREATE A FILE NAMED: .knitme # If the file .knitme exists in the current directory, # run knitr commands, otherwise run pgfsweave commands msg "--- Looking for .knitme" if [ -e .knitme ]; then # Run knitr commands for this job msg " -----------------------" msg " This is a job for knitr" msg " -----------------------" # Knit msg " Knitting..." Rscript -e "library(knitr); library(methods); knit('$jobname.$jobfiletype')" # Introduce delay to give time to read Rscript exit status msg " -----------------------" msg " Rscript knitr completed" msg " -----------------------" simpledelay.sh 2 else # Run pgfSweave commands msg " ---------------------------" msg " This is a job for pgfSweave" msg " ---------------------------" # Tangle msg " Tangling..." R CMD Stangle $jobname.$jobfiletype # Weave msg " Weaving..." R CMD pgfsweave --graphics-only $jobname.$jobfiletype # Introduce delay to give time to read R CMD exit status msg " -------------------------" msg " R CMD pgfsweave completed" msg " -------------------------" simpledelay.sh 2 fi # Run vc script if vc exists in working directory msg " Running vc script..." if [ -f vc ]; then ./vc fi # Run pdflatex, bibtex, and company # Leaving this if-else for historical reasons, and also to print the message if $ltxmkrc; then msg "${On_Cyan} Calling LaTeXMK, detected .latemxkrc file${Color_Off}" simpledelay.sh 2 # note: latexmk does not need us to invoke "-r .latexmkrc", it finds and uses # the RC file automatically (as evidenced by latexmk's output) latexmk -pdf -bibtex $jobname else msg "${On_Cyan} Calling LaTeXMK${Color_Off}" simpledelay.sh 2 latexmk -pdf -bibtex $jobname fi else # Zero arguments (the case of more than one argument is handled in parse_params above) # In this case, present a menu of choices msg "This is cheRTeX POST-PROCESSING" # only one choice for now msg "<1> 'pdf-all' -- Process all .tikz files to pdf graphics" msg "<2> 'clean-up' -- Remove all auxiliary files" msg "<3> 'wipe-dir' -- Remove all non-essential files and subdirectories" msg "Any other input exits the program" read usrchoice if [[ $usrchoice == "pdf-all" || $usrchoice == "1" ]]; then msg "<1> 'pdf-all' chosen" # This for loop ONLY USED to determine number of *.tikz files in directory for tikzfiles in "$tikzfiles"; do tikzfilenumber=${#tikzfiles}; done msg "cheRTeX detected $tikzfilenumber TikZ files for processing" msg "Starting TikZ file processing..." simpledelay.sh 2 for tikzfilename in $tikzfiles; do # Call tikz2pdf msg " tikz2pdf $tikzfilename" tikz2pdf --once $tikzfilename done msg "Completed TikZ file processing" fi if [[ $usrchoice == "clean-up" || $usrchoice == "2" ]]; then msg "<2> 'clean-up' chosen" # in $auxfiles, strip "*." or "*" at beginning of each word and "*" at the # end of any word, and replace space between items with pipe symbol # using the built-in bash ${$VAR//find/replace} takes us part-way # $ echo "${auxfiles//\*./}" # acn acr alg aux bbl blg dep dpth fdb* figlist fls glg glo gls* ist lob lof log lol lor los lot maf out lox makefile map mtc* run* slg slo sls stc* *-tikzDictionary tdo toc xdy # https://www.baeldung.com/linux/process-a-bash-variable-with-sed # first, replace "*." or "*-" with empty string, then replace "* " with ".* " (to suit # regex syntax in the subsequent find command), finally replace each inter-word # spaces with pipe symbol. Also, and this is important, "*-tikzDictionary" needs to # be cleaned up otherwise trips up the -iregex parameter! (don't leave "*-" in the string) # https://unix.stackexchange.com/a/15337 # $ echo "$auxfiles" | sed -r "s/\*\.//g" | sed -r "s/\* /\.\* /g" | sed -r "s/ /|/g" # acn|acr|alg|aux|bbl|blg|dep|dpth|fdb.*|figlist|fls|glg|glo|gls.*|ist|lob|lof|log|lol|lor|los|lot|maf|out|lox|makefile|map|mtc.*|run.*|slg|slo|sls|stc.*|*-tikzDictionary|tdo|toc|xdy auxregex=$(echo "$auxfiles" | sed -r "s/\*[\.-]//g" | sed -r "s/\* /\.\* /g" | sed -r "s/ /|/g") # "rm -v" provides nice output of which files were cleaned up cd "$path_wd" && find . -maxdepth 1 -type f -regextype posix-extended -iregex ".*\.($auxregex)" -exec rm -v "{}" \; && cd $OLDPWD fi if [[ $usrchoice == "wipe-dir" || $usrchoice == "3" ]]; then msg "<3> 'wipe-dir' chosen" ## Remove all but non-essential files # get a timestamp timestamp=$(date +%s) # create a unique tmp-dir name tmpdirname="${timestamp}-${dir_wd}" # make a directory in chepec/tmp with the name of the current dir mkdir $temp_folder/$tmpdirname # Copy the contents of the current directory to the tmp/$currdirname directory cp * -R $temp_folder/$tmpdirname # Empty the current directory of all contents rm * -R # note: hidden files and subdir unaffected # Return the stuff we want to keep after wipe-dir # (we are of course assuming that the following 4 file(type)s always exist) # (if in fact they do not exist, use conditional statements instead (see below) cp $temp_folder/$tmpdirname/*.Rnw . cp $temp_folder/$tmpdirname/vc . cp $temp_folder/$tmpdirname/vc.tex . cp $temp_folder/$tmpdirname/vc-git.awk . ## Return stuff that may not always exist (check first...) ## The use of conditionals is mainly to avoid annoying "file does not exist" messages... # Return *.Rproj file (removal is unnecessary and makes RStudio less useful) Rprojfiles=`ls -1 $temp_folder/$tmpdirname/*.Rproj 2>/dev/null | wc -l` if [ $Rprojfiles != 0 ]; then cp $temp_folder/$tmpdirname/*.Rproj . fi # Return *.rda files (considering peak-data files, which "cost" a lot to create) rdafiles=`ls -1 $temp_folder/$tmpdirname/*.rda 2>/dev/null | wc -l` if [ $rdafiles != 0 ]; then cp $temp_folder/$tmpdirname/*.rda . fi # Return *.Rmd files (R markdown source files) Rmdfiles=`ls -1 $temp_folder/$tmpdirname/*.Rmd 2>/dev/null | wc -l` if [ $Rmdfiles != 0 ]; then cp $temp_folder/$tmpdirname/*.Rmd . fi # Return *.css files (css files) [for sample-matrix] cssfiles=`ls -1 $temp_folder/$tmpdirname/*.css 2>/dev/null | wc -l` if [ $cssfiles != 0 ]; then cp $temp_folder/$tmpdirname/*.css . fi # Return .knitme file [empty file used to indicate knitr jobs] knitmefile=`ls -1 $temp_folder/$tmpdirname/.knitme 2>/dev/null | wc -l` if [ $knitmefile != 0 ]; then cp $temp_folder/$tmpdirname/.knitme . fi fi die "All done. Exiting..." 0 fi # Depending on whether the clock uses summer or wintertime, the date string length # will differ by one (CEST vs CET). # Just to be neat, we will take this into consideration when constructing the # "job completed" block below. cetcest=$(date +%Z) # keep track of runtime of entire script endtime=$(date +%s) runtime=$(( $endtime - $starttime )) # send push message to Gotify server # if runtime is longer than X minutes (suitable limit perhaps 3 min) if (( $runtime > 180 )) && [[ $disable_alert == "false" ]]; then # POST request to Gotify server works without needing Gotify CLI on this box # NOTE, multi-line bash command fails if interrupted by comment lines! # Hide CURL response (-o /dev/null) and progress bar (--silent) # https://gotify.net/docs/pushmsg curl -X POST "https://gotify.chepec.se/message?token=A8nO3zYJ-R1wG__" \ -F "message=chertex.sh $@. Completed in $runtime s" \ -F "title=$dir_wd" -F "priority=5" \ -o /dev/null --silent # https://stackoverflow.com/questions/3872427/how-to-send-line-break-with-curl # I could not make multi-line message (to Gotify) work here, however I tried. Giving up for now. msg "Push notification sent to Gotify" fi msg "${On_Cyan}-------------------------------------${Color_Off}" # the padding for runtime makes the formatting work # three digits for seconds is enough for just above 15 minutes printf "${On_Cyan}=== chertex.sh completed in %03d s ===${Color_Off}\n" $runtime 1>&2 if [[ $cetcest == "CET" ]]; then msg "${On_Cyan}=== $(date) ===${Color_Off}" else msg "${On_Cyan}=== $(date) ===${Color_Off}" fi msg "${On_Cyan}-------------------------------------${Color_Off}" simpledelay.sh 3 exit 0