You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

430 lines
17 KiB
Bash

#!/usr/bin/env bash
## Process *.Rnw files
## Written May 14, 2010
## Taha Ahmed
####################################################
# For now, MAKE SURE that the argument consists of
# a complete filename, with extension, and
# in the directory of the Rnw file
####################################################
# keep track of runtime of entire script
starttime=$(date +%s)
clear
echo "-----------------------------------------------------------------------"
echo "cheRTeX -- a script for processing R--Sweave/knitr--LaTeX/TikZ projects"
echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
echo "MMXVI -- taha@chepec.se -- CHEPEC doctoral degree project"
echo "-----------------------------------------------------------------------"
## If the file .latexmkrc exists in the current directory,
## set the flag ltxmkrc=TRUE
ltxmkrc=false
echo "--- Looking for .latexmkrc"
if [ -e .latexmkrc ]; then
ltxmkrc=true
echo "+++ LaTeXMK RC file invoked"
else
echo "--- This job did not request LaTeXMK RC file"
fi
# define some constants
path_wd=${PWD}
dir_wd=$(basename $path_wd)
path_thesis="/media/bay/taha/chepec/thesis"
# define some file types
Rfiletype="R"
TeXfiletype="tex"
tikzfiles="*.tikz"
Rnwfiles="*.Rnw"
# aux files
aux_acn="*.acn"
aux_acr="*.acr"
aux_alg="*.alg" # glossaries
aux_aux="*.aux"
aux_bbl="*.bbl" # bibliography
aux_blg="*.blg" # bibliography
aux_dep="*.dep"
aux_dpth="*.dpth"
aux_fdb="*.fdb*"
aux_fig="*.figlist"
aux_fls="*.fls"
aux_glg="*.glg"
aux_glo="*.glo" # glossaries
aux_gls="*.gls*" # glossaries and bib2gls
aux_ist="*.ist" # glossaries (makeindex style file)
aux_lob="*.lob"
aux_lof="*.lof"
aux_log="*.log"
aux_lor="*.lor"
aux_los="*.los"
aux_lot="*.lot"
aux_out="*.out"
aux_lox="*.lox"
aux_make="*.makefile"
aux_map="*.map"
aux_run="*.run*"
aux_slg="*.slg"
aux_slo="*.slo"
aux_sls="*.sls"
aux_tikz="*-tikzDictionary"
aux_tdo="*.tdo"
aux_toc="*.toc"
aux_xdy="*.xdy" # glossaries (xindy)
# all aux files in a string
auxfiles="${aux_acn} ${aux_acr} ${aux_alg} ${aux_aux} ${aux_bbl} ${aux_blg} ${aux_dep} ${aux_dpth} ${aux_fdb} ${aux_fig} ${aux_fls} ${aux_glg} ${aux_glo} ${aux_gls} ${aux_ist} ${aux_lob} ${aux_lof} ${aux_log} ${aux_lor} ${aux_los} ${aux_lot} ${aux_out} ${aux_lox} ${aux_make} ${aux_map} ${aux_run} ${aux_slg} ${aux_slo} ${aux_sls} ${aux_tikz} ${aux_tdo} ${aux_toc} ${aux_xdy}"
# depending on number of args and their content, do different things...
if [ $# -eq 1 ]; then
# Check if the argument contains a filetype
# (assumes a complete filename was passed)
jobfilename=$1
jobfiletype=${jobfilename#*.} # File extension
jobname=${jobfilename%.*} # Filename without extension part
echo "<cheRTeX> Detected filename: " $jobname
echo "<cheRTeX> Detected extension:" $jobfiletype
# Verify that the file extension is "[Rr][Nn][Ww]"
if [[ $jobfiletype == "Rnw" || $jobfiletype == "rnw" || $jobfiletype == "RNW" ]]; then
# File extension is indeed "[Rr][Nn][Ww]"
echo "<cheRTeX> Detected *.Rnw extension"
jobfiletype="Rnw"
else
# File extension is NOT "[Rr][Nn][Ww]"
echo "<cheRTeX> This script only handles *.Rnw files"
echo "<cheRTeX> Terminating..."
exit 1
fi
# Introducing a short delay to enable on-screen reading of previous echo
simpledelay.sh 2
# no need to echo it
# echo "Delay completed"
#### Special treatment for sample-matrix.Rnw
# If $jobname is sample-matrix, restart Shiny and term this script
if [[ $jobname == "sample-matrix" ]]; then
echo "<sample-matrix> -------------------------------"
echo "<sample-matrix> Restarting Shiny"
echo "<sample-matrix> -------------------------------"
# kill Shiny
pkill -f "shiny::runApp"
# start Shiny
bash -c "/media/bay/taha/chepec/chetex/common/bash/shiny-matrix.sh" &
# terminate this script
echo "<cheRTeX> Terminating..."
simpledelay.sh 2
exit 0
fi
#### Special treatment for thesis
if [[ $path_wd == "$path_thesis" && $jobname == "$dir_wd" ]]; then
# Fetch external assets by reading any assets.external files in assets/ tree
# NOTE: be careful NOT to leave empty lines in your assets.external files
echo "<thesis> -------------------------------"
echo "<thesis> Getting external assets"
echo "<thesis> -------------------------------"
# find all files named "assets.external" in the assets/ tree
assetsexternalfiles=$(find "$path_wd/assets/" -type f -name "assets.external")
for externalfilepath in $assetsexternalfiles; do
# this weird-looking while-loop reads the assets.external file line-by-line and copies each asset into the target path inside the thesis' assets/ tree
# https://stackoverflow.com/questions/10929453/read-a-file-line-by-line-assigning-the-value-to-a-variable
while IFS='' read -r asset || [[ -n "$asset" ]]; do
assetpathtarget=$(dirname "$externalfilepath")
echo "<thesis> Copying $asset to $assetpathtarget"
# cp but don't overwrite existing files
cp --preserve=timestamps --no-clobber $asset $assetpathtarget
# except we want to overwrite the BibTeX library files (inside the assets/references/ directory), we'll do that by checking the target dirname and only running the destructive cp operation if its "references"
assetdirnametarget=$(basename "$assetpathtarget")
if [[ $assetdirnametarget == "references" ]]; then
echo "<thesis> Overwriting BibTeX libraries in assets/references/"
cp --preserve=timestamps $asset $assetpathtarget
fi
done < "$externalfilepath"
done
# Create low-res photos on-the-fly from existing photos/
echo "<thesis> -------------------------------"
echo "<thesis> Create low-res photos tree"
echo "<thesis> -------------------------------"
# copy existing photos to assets/photos/.lowres/ path
# to save time, rsync only if highres photo has more recent timestamp (otherwise, keep lowres photo without overwriting)
# Note: rsync usually looks at file timestamp and size, and if either has changed, copies the file (simplified explanation)
# in this case, I'd like rsync to only compare timestamps and disregard size
# rsync can't do that. We need to use a different tool. See e.g.
# https://superuser.com/questions/260092/rsync-switch-to-only-compare-timestamps
# copy only the "large" photos that have file modtimes more recent than the last time this operation was run
photoslastrun="$path_wd/assets/photos/.lowres/lastrun"
if [ ! -f "$photoslastrun" ]; then
# if, for some reason, the lastrun file does not exist
# copy over everything and then create the file
rsync -av "$path_wd/assets/photos/*" "$path_wd/assets/photos/.lowres/" --exclude "$path_wd/assets/photos/.lowres/"
touch "$photoslastrun"
fi
# cd and use --parents arg to preserve directory structure in .lowres target
cd "$path_wd/assets/photos"
newphotos="$(find . -type f -cnewer $photoslastrun ! -path './.lowres/*')"
if [ -n "$newphotos" ]; then
for newphoto in $newphotos; do
cp --parents $newphoto .lowres/
done
fi
# revert the effects of cd above. Redirect to null suppresses the output.
cd - >/dev/null
# in the low-res tree, find any photo larger than specific size (500kB)
largephotos="$(find $path_wd/assets/photos/.lowres/ -size +500k)"
for largephotofilename in $largephotos; do
# for the next statement to work reliably, we should probably convert other formats to JPEG
# detect file extension, and based on it, convert to jpg using mogrify
largephotobase=$(basename -- "$largephotofilename") # just the filename (with extension, sans parents)
largephototype=${largephotobase#*.} # file extension only
largephotoname=${largephotofilename%.*} # path without extension
largephotobasename=${largephotobase%.*} # basename without extension
# some filetypes don't fare well when converted to jpeg, so we will only run the
# forced conversion and shrinking unless the file extension is one of the following:
# "" (empty string, ie no file extension)
# SVG
# PDF
if [ ! "$largephototype" == "" ] || [ ! "$largephototype" == "svg" ] || [ ! "$largephototype" == "SVG" ] || [ ! "$largephototype" == "pdf" ] || [ ! "$largephototype" == "PDF" ]; then
# if the photo is not already jpeg, convert it to jpeg
if [ ! "$largephototype" == "jpg" ] && [ ! "$largephototype" == "jpeg" ] && [ ! "$largephototype" == "JPG" ] && [ ! "$largephototype" == "JPEG" ]; then
echo "<thesis> Converting $largephotobase to JPG format"
mogrify -format jpg $largephotofilename
# remove the now unnecessary non-jpg file from .lowres/
rm "$largephotofilename"
fi
# convert photo in-place (overwrite) with new one roughly 300kb in size
# https://stackoverflow.com/questions/6917219/imagemagick-scale-jpeg-image-with-a-maximum-file-size
echo "<thesis> Shrinking $largephotobasename.jpg"
convert $largephotoname.jpg -define jpeg:extent=300kb $largephotoname.jpg
fi
done
# update the modification and access time on the photosastrun file
touch "$photoslastrun"
fi
# short delay to enable on-screen reading of previous echo
simpledelay.sh 2
## Handle knitr or pgfSweave jobs (each requires separate treatment)
## But how should we tell the difference between them?
## There is no obvious way to tell the difference (apart from reading the *.Rnw file)
## IN ALL KNITR DIRECTORIES, CREATE A FILE NAMED: .knitme
# If the file .knitme exists in the current directory,
# run knitr commands, otherwise run pgfsweave commands
echo "--- Looking for .knitme"
if [ -e .knitme ]; then
# Run knitr commands for this job
echo "<cheRTeX> -----------------------"
echo "<cheRTeX> This is a job for knitr"
echo "<cheRTeX> -----------------------"
# Knit
echo "<cheRTeX> Knitting..."
Rscript -e "library(knitr); library(methods); knit('$jobname.$jobfiletype')"
# Introduce delay to give time to read Rscript exit status
echo "<cheRTeX> -----------------------"
echo "<cheRTeX> Rscript knitr completed"
echo "<cheRTeX> -----------------------"
simpledelay.sh 2
else
# Run pgfSweave commands
echo "<cheRTeX> ---------------------------"
echo "<cheRTeX> This is a job for pgfSweave"
echo "<cheRTeX> ---------------------------"
# Tangle
echo "<cheRTeX> Tangling..."
R CMD Stangle $jobname.$jobfiletype
# Weave
echo "<cheRTeX> Weaving..."
R CMD pgfsweave --graphics-only $jobname.$jobfiletype
# Introduce delay to give time to read R CMD exit status
echo "<cheRTeX> -------------------------"
echo "<cheRTeX> R CMD pgfsweave completed"
echo "<cheRTeX> -------------------------"
simpledelay.sh 2
fi
# Run vc script if vc exists in working directory
echo "<cheRTeX> Running vc script"
if [ -f vc ]; then
./vc
fi
# Run pdflatex, bibtex, and company
if $ltxmkrc; then
echo "<cheRTeX> Calling LaTeXMK with RC file"
simpledelay.sh 2
latexmk -r .latexmkrc -pdf -bibtex $jobname
else
echo "<cheRTeX> Calling LaTeXMK"
simpledelay.sh 2
latexmk -pdf -bibtex $jobname
fi
else
# Either no arguments, or more than one argument
if [ $# -eq 0 ]; then
# Zero arguments. Present a menu of choices
echo "This is cheRTeX POST-PROCESSING" # only one choice for now
echo "<1> 'pdf-all' -- Process all .tikz files to pdf graphics"
echo "<2> 'clean-up' -- Remove all auxiliary files"
echo "<3> 'wipe-dir' -- Remove all non-essential files and subdirectories"
echo "Any other input exits the program"
read usrchoice
## Determine number of .Rnw files in current directory
#Rnwfileno=$(ls -1 $Rnwfiles | wc -l)
#echo "No of .Rnw files: $Rnwfileno"
#
## Check if number of .Rnw files larger than one
#if [ $Rnwfileno -gt 1 ]; then
# # If larger than one, ask for user input
# # Indicates more than one Rnw file in current directory.
# # This introduces a naming ambiguity.
# # Resolve by asking user for current jobname
# echo "Found $Rnwfileno .Rnw files in current directory"
# echo "Please specify the jobname"
# read jobname
# if [ -z "$jobname" ]; then
# # string is null
# echo "Specified jobname cannot be parsed. Terminating..."
# exit 1
# fi
#else
## There is exactly one *.Rnw file is current directory
## Fetch the jobname from the .Rnw filename by stripping off the file extension
# Rnwfilename=$(ls -1 $Rnwfiles)
# jobname=${Rnwfilename%.*}
#fi
#
#echo "Jobname set to: $jobname"
if [[ $usrchoice == "pdf-all" || $usrchoice == "1" ]]; then
echo "<1> 'pdf-all' chosen"
# This for loop ONLY USED to determine number of *.tikz files in directory
for tikzfiles in "$tikzfiles"; do tikzfilenumber=${#tikzfiles}; done
echo "cheRTeX detected $tikzfilenumber TikZ files for processing"
echo "Starting TikZ file processing..."
simpledelay.sh 2
for tikzfilename in $tikzfiles; do
# Call tikz2pdf
echo "<Executing> tikz2pdf $tikzfilename"
tikz2pdf --once $tikzfilename
done
echo "Completed TikZ file processing"
fi
if [[ $usrchoice == "clean-up" || $usrchoice == "2" ]]; then
echo "<2> 'clean-up' chosen"
rm $auxfiles
# Still, a rather crude way of cleaning up...
fi
if [[ $usrchoice == "wipe-dir" || $usrchoice == "3" ]]; then
echo "<3> 'wipe-dir' chosen"
## Remove all but non-essential files
# get the name of the current directory
currdirname=${PWD##*/}
# get a timestamp
timestamp=$(date +%s)
# create a unique tmp-dir name
tmpdirname="${timestamp}-${currdirname}"
# make a directory in chepec/tmp with the name of the current dir
mkdir /media/bay/taha/chepec/tmp/$tmpdirname
# Copy the contents of the current directory to the tmp/$currdirname directory
cp * -R /media/bay/taha/chepec/tmp/$tmpdirname
# Empty the current directory of all contents
rm * -R # note: hidden files and subdir unaffected
# Return the stuff we want to keep after wipe-dir
# (we are of course assuming that the following 4 file(type)s always exist)
# (if in fact they do not exist, use conditional statements instead (see below)
cp /media/bay/taha/chepec/tmp/$tmpdirname/*.Rnw .
cp /media/bay/taha/chepec/tmp/$tmpdirname/vc .
cp /media/bay/taha/chepec/tmp/$tmpdirname/vc.tex .
cp /media/bay/taha/chepec/tmp/$tmpdirname/vc-git.awk .
## Return stuff that may not always exist (check first...)
## The use of conditionals is mainly to avoid annoying "file does not exist" messages...
# Return *.Rproj file (removal is unnecessary and makes RStudio less useful)
Rprojfiles=`ls -1 /media/bay/taha/chepec/tmp/$tmpdirname/*.Rproj 2>/dev/null | wc -l`
if [ $Rprojfiles != 0 ]; then
cp /media/bay/taha/chepec/tmp/$tmpdirname/*.Rproj .
fi
# Return *.rda files (considering peak-data files, which "cost" a lot to create)
rdafiles=`ls -1 /media/bay/taha/chepec/tmp/$tmpdirname/*.rda 2>/dev/null | wc -l`
if [ $rdafiles != 0 ]; then
cp /media/bay/taha/chepec/tmp/$tmpdirname/*.rda .
fi
# Return *.Rmd files (R markdown source files)
Rmdfiles=`ls -1 /media/bay/taha/chepec/tmp/$tmpdirname/*.Rmd 2>/dev/null | wc -l`
if [ $Rmdfiles != 0 ]; then
cp /media/bay/taha/chepec/tmp/$tmpdirname/*.Rmd .
fi
# Return *.css files (css files) [for sample-matrix]
cssfiles=`ls -1 /media/bay/taha/chepec/tmp/$tmpdirname/*.css 2>/dev/null | wc -l`
if [ $cssfiles != 0 ]; then
cp /media/bay/taha/chepec/tmp/$tmpdirname/*.css .
fi
# Return .knitme file [empty file used to indicate knitr jobs]
knitmefile=`ls -1 /media/bay/taha/chepec/tmp/$tmpdirname/.knitme 2>/dev/null | wc -l`
if [ $knitmefile != 0 ]; then
cp /media/bay/taha/chepec/tmp/$tmpdirname/.knitme .
fi
fi
echo "Terminating..."
exit 0
fi
## Here is the wild land of more than one *.Rnw file in current directory
echo "<cheRTeX> This script can be run with one argument is process mode,"
echo "<cheRTeX> or with zero arguments in post-processing mode."
echo "<cheRTeX> Terminating..."
exit 1
fi
# Depending on whether the clock uses summer or wintertime, the date string length
# will differ by one (CEST vs CET).
# Just to be neat, we will take this into consideration when constructing the
# "job completed" block below.
cetcest=$(date +%Z)
# keep track of runtime of entire script
endtime=$(date +%s)
runtime=$(( $endtime - $starttime ))
# send push message via Gotify CLI
# if runtime is longer than X minutes (suitable limit perhaps 3 min)
if (( $runtime > 180 )); then
gotify push --quiet --title "$dir_wd" --priority 5 "chertex.sh $@ \nCompleted in $runtime s"
fi
echo "-------------------------------------"
# the padding for runtime makes the formatting work
# three digits for seconds is enough for just above 15 minutes
printf "=== chertex.sh completed in %03d s ===\n" $runtime
if [[ $cetcest == "CET" ]]; then
echo "=== $(date) ==="
else
echo "=== $(date) ==="
fi
echo "-------------------------------------"
simpledelay.sh 3
exit 0