You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
438 lines
18 KiB
Bash
438 lines
18 KiB
Bash
#!/usr/bin/env bash
|
|
## Process *.Rnw files
|
|
## Written May 14, 2010
|
|
## Taha Ahmed
|
|
|
|
####################################################
|
|
# For now, MAKE SURE that the argument consists of
|
|
# a complete filename, with extension, and
|
|
# in the directory of the Rnw file
|
|
####################################################
|
|
|
|
# keep track of runtime of entire script
|
|
starttime=$(date +%s)
|
|
|
|
clear
|
|
echo "-----------------------------------------------------------------------"
|
|
echo "cheRTeX -- a script for processing R--Sweave/knitr--LaTeX/TikZ projects"
|
|
echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
|
|
echo "MMXVI -- taha@chepec.se -- CHEPEC doctoral degree project"
|
|
echo "-----------------------------------------------------------------------"
|
|
|
|
## If the file .latexmkrc exists in the current directory,
|
|
## set the flag ltxmkrc=TRUE
|
|
ltxmkrc=false
|
|
echo "--- Looking for .latexmkrc"
|
|
if [ -e .latexmkrc ]; then
|
|
ltxmkrc=true
|
|
echo "+++ LaTeXMK RC file invoked"
|
|
else
|
|
echo "--- This job did not request LaTeXMK RC file"
|
|
fi
|
|
|
|
# define some constants
|
|
path_wd=${PWD}
|
|
dir_wd=$(basename $path_wd)
|
|
path_thesis="/media/bay/taha/chepec/thesis"
|
|
|
|
# define some file types
|
|
Rfiletype="R"
|
|
TeXfiletype="tex"
|
|
tikzfiles="*.tikz"
|
|
Rnwfiles="*.Rnw"
|
|
# aux files
|
|
aux_acn="*.acn"
|
|
aux_acr="*.acr"
|
|
aux_alg="*.alg" # glossaries
|
|
aux_aux="*.aux"
|
|
aux_bbl="*.bbl" # bibliography
|
|
aux_blg="*.blg" # bibliography
|
|
aux_dep="*.dep"
|
|
aux_dpth="*.dpth"
|
|
aux_fdb="*.fdb*"
|
|
aux_fig="*.figlist"
|
|
aux_fls="*.fls"
|
|
aux_glg="*.glg"
|
|
aux_glo="*.glo" # glossaries
|
|
aux_gls="*.gls*" # glossaries and bib2gls
|
|
aux_ist="*.ist" # glossaries (makeindex style file)
|
|
aux_lob="*.lob"
|
|
aux_lof="*.lof"
|
|
aux_log="*.log"
|
|
aux_lor="*.lor"
|
|
aux_los="*.los"
|
|
aux_lot="*.lot"
|
|
aux_maf="*.maf" # minitoc
|
|
aux_mtc="*.mtc*" # minitoc
|
|
aux_out="*.out"
|
|
aux_lox="*.lox"
|
|
aux_make="*.makefile"
|
|
aux_map="*.map"
|
|
aux_run="*.run*"
|
|
aux_slg="*.slg"
|
|
aux_slo="*.slo"
|
|
aux_sls="*.sls"
|
|
aux_tikz="*-tikzDictionary"
|
|
aux_tdo="*.tdo"
|
|
aux_toc="*.toc"
|
|
aux_xdy="*.xdy" # glossaries (xindy)
|
|
# all aux files in a string
|
|
auxfiles="${aux_acn} ${aux_acr} ${aux_alg} ${aux_aux} ${aux_bbl} ${aux_blg} ${aux_dep} ${aux_dpth} ${aux_fdb} ${aux_fig} ${aux_fls} ${aux_glg} ${aux_glo} ${aux_gls} ${aux_ist} ${aux_lob} ${aux_lof} ${aux_log} ${aux_lor} ${aux_los} ${aux_lot} ${aux_maf} ${aux_out} ${aux_lox} ${aux_make} ${aux_map} ${aux_mtc} ${aux_run} ${aux_slg} ${aux_slo} ${aux_sls} ${aux_tikz} ${aux_tdo} ${aux_toc} ${aux_xdy}"
|
|
|
|
|
|
# depending on number of args and their content, do different things...
|
|
if [ $# -eq 1 ]; then
|
|
# Check if the argument contains a filetype
|
|
# (assumes a complete filename was passed)
|
|
jobfilename=$1
|
|
jobfiletype=${jobfilename#*.} # File extension
|
|
jobname=${jobfilename%.*} # Filename without extension part
|
|
|
|
echo "<cheRTeX> Detected filename: " $jobname
|
|
echo "<cheRTeX> Detected extension:" $jobfiletype
|
|
|
|
# Verify that the file extension is "[Rr][Nn][Ww]"
|
|
if [[ $jobfiletype == "Rnw" || $jobfiletype == "rnw" || $jobfiletype == "RNW" ]]; then
|
|
# File extension is indeed "[Rr][Nn][Ww]"
|
|
echo "<cheRTeX> Detected *.Rnw extension"
|
|
jobfiletype="Rnw"
|
|
else
|
|
# File extension is NOT "[Rr][Nn][Ww]"
|
|
echo "<cheRTeX> This script only handles *.Rnw files"
|
|
echo "<cheRTeX> Terminating..."
|
|
exit 1
|
|
fi
|
|
|
|
# Introducing a short delay to enable on-screen reading of previous echo
|
|
simpledelay.sh 2
|
|
# no need to echo it
|
|
# echo "Delay completed"
|
|
|
|
|
|
#### Special treatment for sample-matrix.Rnw
|
|
# If $jobname is sample-matrix, restart Shiny and term this script
|
|
if [[ $jobname == "sample-matrix" ]]; then
|
|
echo "<sample-matrix> -------------------------------"
|
|
echo "<sample-matrix> Restarting Shiny"
|
|
echo "<sample-matrix> -------------------------------"
|
|
|
|
# kill Shiny
|
|
pkill -f "shiny::runApp"
|
|
# start Shiny
|
|
bash -c "/media/bay/taha/chepec/chetex/common/bash/shiny-matrix.sh" &
|
|
# terminate this script
|
|
echo "<cheRTeX> Terminating..."
|
|
simpledelay.sh 2
|
|
exit 0
|
|
fi
|
|
|
|
#### Special treatment for thesis
|
|
if [[ $path_wd == "$path_thesis" && $jobname == "$dir_wd" ]]; then
|
|
# Fetch external assets by reading any assets.external files in assets/ tree
|
|
# NOTE: be careful NOT to leave empty lines in your assets.external files
|
|
echo "<thesis> -------------------------------"
|
|
echo "<thesis> Getting external assets"
|
|
echo "<thesis> -------------------------------"
|
|
# find all files named "assets.external" in the assets/ tree
|
|
assetsexternalfiles=$(find "$path_wd/assets/" -type f -name "assets.external")
|
|
for externalfilepath in $assetsexternalfiles; do
|
|
# https://stackoverflow.com/questions/10929453/read-a-file-line-by-line-assigning-the-value-to-a-variable
|
|
while IFS='' read -r asset || [[ -n "$asset" ]]; do
|
|
# $asset contains one line from the current external.assets
|
|
# assetpathtarget is the directory of the current external.assets file
|
|
assetpathtarget=$(dirname "$externalfilepath")
|
|
# if $asset contains a space, the second element should be considered a local target folder for the copy operation
|
|
# https://www.tutorialkart.com/bash-shell-scripting/bash-split-string/
|
|
# https://stackoverflow.com/a/30212526
|
|
# Using space as separator was not working if the path contains spaces
|
|
# (I tried surrounding each path with "" or escaping each space with backslash, did not help)
|
|
# Rather than rewriting this part, I'll change to an IFS char that's unlikely to clash with any path specification. This way, spaces in paths should not need any changes.
|
|
IFS='>' # reset IFS
|
|
# asset is read into an array as tokens separated by IFS
|
|
read -ra asset_array <<< "$asset"
|
|
# sanity check for array length
|
|
if [ ${#asset_array[@]} -gt 2 ]; then
|
|
echo "<thesis> Cannot handle more than one $IFS character per line"
|
|
echo "<cheRTeX> Terminating..."
|
|
simpledelay.sh 2
|
|
exit 1
|
|
fi
|
|
if [ ${#asset_array[@]} -gt 1 ]; then
|
|
# echo "Placing this asset into subdirectory ${asset_array[1]}"
|
|
# redefine assetpathtarget to include the local subdir
|
|
assetpathtarget="$assetpathtarget/${asset_array[1]}"
|
|
# also redefine $asset so we keep only the asset path
|
|
asset="${asset_array[0]}"
|
|
# create the local subdir inside assets/<current/
|
|
mkdir -p "$assetpathtarget" # -p suppresses error if dir already exists
|
|
fi
|
|
echo "<thesis> Copying $asset to $assetpathtarget"
|
|
# cp but don't overwrite existing files
|
|
cp --preserve=timestamps --no-clobber --recursive $asset $assetpathtarget
|
|
# except we want to overwrite the BibTeX library files (inside the assets/references/ directory), we'll do that by checking the target dirname and only running the destructive cp operation if its "references"
|
|
assetdirnametarget=$(basename "$assetpathtarget")
|
|
if [[ $assetdirnametarget == "references" ]]; then
|
|
echo "<thesis> Overwriting BibTeX libraries in assets/references/"
|
|
cp --preserve=timestamps $asset $assetpathtarget
|
|
fi
|
|
done < "$externalfilepath"
|
|
done
|
|
|
|
# Create low-res photos on-the-fly from existing photos/
|
|
echo "<thesis> -------------------------------"
|
|
echo "<thesis> Create low-res photos tree"
|
|
echo "<thesis> -------------------------------"
|
|
# copy existing photos to assets/photos/.lowres/ path
|
|
# to save time, rsync only if highres photo has more recent timestamp (otherwise, keep lowres photo without overwriting)
|
|
# Note: rsync usually looks at file timestamp and size, and if either has changed, copies the file (simplified explanation)
|
|
# in this case, I'd like rsync to only compare timestamps and disregard size
|
|
# rsync can't do that. We need to use a different tool. See e.g.
|
|
# https://superuser.com/questions/260092/rsync-switch-to-only-compare-timestamps
|
|
# copy only the "large" photos that have file modtimes more recent than the last time this operation was run
|
|
photoslastrun="$path_thesis/assets/photos/.lowres/lastrun"
|
|
if [ ! -f "$photoslastrun" ]; then
|
|
# if, for some reason, the lastrun file does not exist
|
|
# copy over everything and then create the file
|
|
# (except for the .lowres tree itself, and any assets.external files)
|
|
rsync -av "$path_thesis/assets/photos/" "$path_thesis/assets/photos/.lowres/" --exclude ".lowres/" --exclude "assets.external"
|
|
touch "$photoslastrun"
|
|
fi
|
|
# Detect new photos and copy them into .lowres tree
|
|
# https://stackoverflow.com/questions/9612090/how-to-loop-through-file-names-returned-by-find
|
|
# https://stackoverflow.com/questions/5241625/find-and-copy-files
|
|
cd "$path_thesis/assets/photos"
|
|
find . -type f -cnewer $photoslastrun ! -path "./.lowres/*" ! -name "*.external" -print -exec cp --parents "{}" .lowres \;
|
|
# revert the effects of cd above. Redirect to null suppresses the output.
|
|
cd - >/dev/null
|
|
# Convert all non-JPG images (except for PDF, SVG, and other non-images) to JPG
|
|
echo "<thesis> Convert all non-JPG images to JPG"
|
|
find "$path_thesis/assets/photos/.lowres/" -type f ! -name "*.pdf" ! -name "*.svg" ! -name "*.external" ! -name "lastrun" ! -name "*.jpg" -print -exec mogrify -format jpg "{}" \;
|
|
# Remove the now redundant non-JPG files in .lowres/
|
|
echo "<thesis> Delete the redundant non-JPG files in .lowres/ tree"
|
|
find "$path_thesis/assets/photos/.lowres/" -type f ! -name "*.pdf" ! -name "*.svg" ! -name "*.external" ! -name "lastrun" ! -name "*.jpg" -print -exec rm "{}" \;
|
|
# Shrink image filesize in-place to roughly 300kb in size
|
|
echo "<thesis> Shrink images in .lowres/ tree to <=300K"
|
|
# https://stackoverflow.com/questions/6917219/imagemagick-scale-jpeg-image-with-a-maximum-file-size
|
|
find "$path_thesis/assets/photos/.lowres/" -size +500k -type f -name "*.jpg" -print -exec convert "{}" -define jpeg:extent=300kb "{}" \;
|
|
# update the modification and access time on the photosastrun file
|
|
touch "$photoslastrun"
|
|
|
|
fi
|
|
# short delay to enable on-screen reading of previous echo
|
|
simpledelay.sh 2
|
|
|
|
|
|
## Handle knitr or pgfSweave jobs (each requires separate treatment)
|
|
## But how should we tell the difference between them?
|
|
## There is no obvious way to tell the difference (apart from reading the *.Rnw file)
|
|
## IN ALL KNITR DIRECTORIES, CREATE A FILE NAMED: .knitme
|
|
# If the file .knitme exists in the current directory,
|
|
# run knitr commands, otherwise run pgfsweave commands
|
|
echo "--- Looking for .knitme"
|
|
if [ -e .knitme ]; then
|
|
# Run knitr commands for this job
|
|
echo "<cheRTeX> -----------------------"
|
|
echo "<cheRTeX> This is a job for knitr"
|
|
echo "<cheRTeX> -----------------------"
|
|
|
|
# Knit
|
|
echo "<cheRTeX> Knitting..."
|
|
Rscript -e "library(knitr); library(methods); knit('$jobname.$jobfiletype')"
|
|
|
|
# Introduce delay to give time to read Rscript exit status
|
|
echo "<cheRTeX> -----------------------"
|
|
echo "<cheRTeX> Rscript knitr completed"
|
|
echo "<cheRTeX> -----------------------"
|
|
simpledelay.sh 2
|
|
else
|
|
# Run pgfSweave commands
|
|
echo "<cheRTeX> ---------------------------"
|
|
echo "<cheRTeX> This is a job for pgfSweave"
|
|
echo "<cheRTeX> ---------------------------"
|
|
|
|
# Tangle
|
|
echo "<cheRTeX> Tangling..."
|
|
R CMD Stangle $jobname.$jobfiletype
|
|
# Weave
|
|
echo "<cheRTeX> Weaving..."
|
|
R CMD pgfsweave --graphics-only $jobname.$jobfiletype
|
|
|
|
# Introduce delay to give time to read R CMD exit status
|
|
echo "<cheRTeX> -------------------------"
|
|
echo "<cheRTeX> R CMD pgfsweave completed"
|
|
echo "<cheRTeX> -------------------------"
|
|
simpledelay.sh 2
|
|
fi
|
|
|
|
# Run vc script if vc exists in working directory
|
|
echo "<cheRTeX> Running vc script"
|
|
if [ -f vc ]; then
|
|
./vc
|
|
fi
|
|
|
|
# Run pdflatex, bibtex, and company
|
|
if $ltxmkrc; then
|
|
echo "<cheRTeX> Calling LaTeXMK with RC file"
|
|
simpledelay.sh 2
|
|
latexmk -r .latexmkrc -pdf -bibtex $jobname
|
|
else
|
|
echo "<cheRTeX> Calling LaTeXMK"
|
|
simpledelay.sh 2
|
|
latexmk -pdf -bibtex $jobname
|
|
fi
|
|
|
|
else
|
|
# Either no arguments, or more than one argument
|
|
if [ $# -eq 0 ]; then
|
|
# Zero arguments. Present a menu of choices
|
|
echo "This is cheRTeX POST-PROCESSING" # only one choice for now
|
|
echo "<1> 'pdf-all' -- Process all .tikz files to pdf graphics"
|
|
echo "<2> 'clean-up' -- Remove all auxiliary files"
|
|
echo "<3> 'wipe-dir' -- Remove all non-essential files and subdirectories"
|
|
echo "Any other input exits the program"
|
|
read usrchoice
|
|
|
|
## Determine number of .Rnw files in current directory
|
|
#Rnwfileno=$(ls -1 $Rnwfiles | wc -l)
|
|
#echo "No of .Rnw files: $Rnwfileno"
|
|
#
|
|
## Check if number of .Rnw files larger than one
|
|
#if [ $Rnwfileno -gt 1 ]; then
|
|
# # If larger than one, ask for user input
|
|
# # Indicates more than one Rnw file in current directory.
|
|
# # This introduces a naming ambiguity.
|
|
# # Resolve by asking user for current jobname
|
|
# echo "Found $Rnwfileno .Rnw files in current directory"
|
|
# echo "Please specify the jobname"
|
|
# read jobname
|
|
# if [ -z "$jobname" ]; then
|
|
# # string is null
|
|
# echo "Specified jobname cannot be parsed. Terminating..."
|
|
# exit 1
|
|
# fi
|
|
#else
|
|
## There is exactly one *.Rnw file is current directory
|
|
## Fetch the jobname from the .Rnw filename by stripping off the file extension
|
|
# Rnwfilename=$(ls -1 $Rnwfiles)
|
|
# jobname=${Rnwfilename%.*}
|
|
#fi
|
|
#
|
|
#echo "Jobname set to: $jobname"
|
|
|
|
if [[ $usrchoice == "pdf-all" || $usrchoice == "1" ]]; then
|
|
echo "<1> 'pdf-all' chosen"
|
|
# This for loop ONLY USED to determine number of *.tikz files in directory
|
|
for tikzfiles in "$tikzfiles"; do tikzfilenumber=${#tikzfiles}; done
|
|
echo "cheRTeX detected $tikzfilenumber TikZ files for processing"
|
|
echo "Starting TikZ file processing..."
|
|
simpledelay.sh 2
|
|
|
|
for tikzfilename in $tikzfiles; do
|
|
# Call tikz2pdf
|
|
echo "<Executing> tikz2pdf $tikzfilename"
|
|
tikz2pdf --once $tikzfilename
|
|
done
|
|
echo "Completed TikZ file processing"
|
|
fi
|
|
|
|
if [[ $usrchoice == "clean-up" || $usrchoice == "2" ]]; then
|
|
echo "<2> 'clean-up' chosen"
|
|
rm $auxfiles
|
|
# Still, a rather crude way of cleaning up...
|
|
fi
|
|
|
|
if [[ $usrchoice == "wipe-dir" || $usrchoice == "3" ]]; then
|
|
echo "<3> 'wipe-dir' chosen"
|
|
## Remove all but non-essential files
|
|
# get the name of the current directory
|
|
currdirname=${PWD##*/}
|
|
# get a timestamp
|
|
timestamp=$(date +%s)
|
|
# create a unique tmp-dir name
|
|
tmpdirname="${timestamp}-${currdirname}"
|
|
# make a directory in chepec/tmp with the name of the current dir
|
|
mkdir /media/bay/taha/chepec/tmp/$tmpdirname
|
|
# Copy the contents of the current directory to the tmp/$currdirname directory
|
|
cp * -R /media/bay/taha/chepec/tmp/$tmpdirname
|
|
# Empty the current directory of all contents
|
|
rm * -R # note: hidden files and subdir unaffected
|
|
# Return the stuff we want to keep after wipe-dir
|
|
# (we are of course assuming that the following 4 file(type)s always exist)
|
|
# (if in fact they do not exist, use conditional statements instead (see below)
|
|
cp /media/bay/taha/chepec/tmp/$tmpdirname/*.Rnw .
|
|
cp /media/bay/taha/chepec/tmp/$tmpdirname/vc .
|
|
cp /media/bay/taha/chepec/tmp/$tmpdirname/vc.tex .
|
|
cp /media/bay/taha/chepec/tmp/$tmpdirname/vc-git.awk .
|
|
## Return stuff that may not always exist (check first...)
|
|
## The use of conditionals is mainly to avoid annoying "file does not exist" messages...
|
|
# Return *.Rproj file (removal is unnecessary and makes RStudio less useful)
|
|
Rprojfiles=`ls -1 /media/bay/taha/chepec/tmp/$tmpdirname/*.Rproj 2>/dev/null | wc -l`
|
|
if [ $Rprojfiles != 0 ]; then
|
|
cp /media/bay/taha/chepec/tmp/$tmpdirname/*.Rproj .
|
|
fi
|
|
# Return *.rda files (considering peak-data files, which "cost" a lot to create)
|
|
rdafiles=`ls -1 /media/bay/taha/chepec/tmp/$tmpdirname/*.rda 2>/dev/null | wc -l`
|
|
if [ $rdafiles != 0 ]; then
|
|
cp /media/bay/taha/chepec/tmp/$tmpdirname/*.rda .
|
|
fi
|
|
# Return *.Rmd files (R markdown source files)
|
|
Rmdfiles=`ls -1 /media/bay/taha/chepec/tmp/$tmpdirname/*.Rmd 2>/dev/null | wc -l`
|
|
if [ $Rmdfiles != 0 ]; then
|
|
cp /media/bay/taha/chepec/tmp/$tmpdirname/*.Rmd .
|
|
fi
|
|
# Return *.css files (css files) [for sample-matrix]
|
|
cssfiles=`ls -1 /media/bay/taha/chepec/tmp/$tmpdirname/*.css 2>/dev/null | wc -l`
|
|
if [ $cssfiles != 0 ]; then
|
|
cp /media/bay/taha/chepec/tmp/$tmpdirname/*.css .
|
|
fi
|
|
# Return .knitme file [empty file used to indicate knitr jobs]
|
|
knitmefile=`ls -1 /media/bay/taha/chepec/tmp/$tmpdirname/.knitme 2>/dev/null | wc -l`
|
|
if [ $knitmefile != 0 ]; then
|
|
cp /media/bay/taha/chepec/tmp/$tmpdirname/.knitme .
|
|
fi
|
|
fi
|
|
|
|
echo "Terminating..."
|
|
|
|
exit 0
|
|
fi
|
|
|
|
## Here is the wild land of more than one *.Rnw file in current directory
|
|
|
|
echo "<cheRTeX> This script can be run with one argument is process mode,"
|
|
echo "<cheRTeX> or with zero arguments in post-processing mode."
|
|
echo "<cheRTeX> Terminating..."
|
|
exit 1
|
|
fi
|
|
|
|
|
|
# Depending on whether the clock uses summer or wintertime, the date string length
|
|
# will differ by one (CEST vs CET).
|
|
# Just to be neat, we will take this into consideration when constructing the
|
|
# "job completed" block below.
|
|
cetcest=$(date +%Z)
|
|
|
|
# keep track of runtime of entire script
|
|
endtime=$(date +%s)
|
|
runtime=$(( $endtime - $starttime ))
|
|
|
|
# send push message via Gotify CLI
|
|
# if runtime is longer than X minutes (suitable limit perhaps 3 min)
|
|
if (( $runtime > 180 )); then
|
|
gotify push --quiet --title "$dir_wd" --priority 5 "chertex.sh $@ \nCompleted in $runtime s"
|
|
fi
|
|
|
|
echo "-------------------------------------"
|
|
# the padding for runtime makes the formatting work
|
|
# three digits for seconds is enough for just above 15 minutes
|
|
printf "=== chertex.sh completed in %03d s ===\n" $runtime
|
|
if [[ $cetcest == "CET" ]]; then
|
|
echo "=== $(date) ==="
|
|
else
|
|
echo "=== $(date) ==="
|
|
fi
|
|
echo "-------------------------------------"
|
|
simpledelay.sh 3
|
|
|
|
exit 0
|