You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
181 lines
4.4 KiB
Bash
181 lines
4.4 KiB
Bash
#!/bin/bash
|
|
# Umbrella function to simplify use of pdftk
|
|
# Created April 20, 2010
|
|
# Taha Ahmed, taha@chepec.se
|
|
# **************************
|
|
# Usage:
|
|
# ARG 1: filename of pdf, e.g., 'oxides.pdf'
|
|
# ARG 2: specify operation,
|
|
# - 'dump_data' --> writes metadata to $reportfile in current directory
|
|
# ARG 3: used when updating metadata, specifies the input text file
|
|
|
|
# $* Stores all the arguments that were entered on the command line ($1 $2 ...).
|
|
# $@ Stores all the arguments that were entered on the command line, individually quoted ("$1" "$2" ...).
|
|
# -o is logical OR - can be evaluated within single brackets
|
|
# -a is logical AND - can be evaluated within single brackets
|
|
|
|
#############################################################
|
|
## Original pdftk syntax
|
|
## *********************
|
|
## Dump metadata to file
|
|
#= pdftk <in.pdf> dump_data output <$reportfile>
|
|
## Update metadata from file
|
|
#= pdftk <in.pdf> update_info <$reportfile> output <out.pdf>
|
|
#############################################################
|
|
|
|
# Perhaps good to save original PDF
|
|
|
|
# Could be nice to be able to input metadata updates directly at the bash prompt, something like
|
|
# First, script displays current fields
|
|
# echo Which field do you want to update/create?
|
|
# echo <Display choice as a list>
|
|
# read choice newfieldinfo
|
|
# do the whole pdftk update stuff
|
|
# voila!
|
|
|
|
#set -x # for debugging
|
|
|
|
NAME=$(basename $0)
|
|
echo "***********************************************************"
|
|
echo "This is $NAME. Write $NAME -h for help."
|
|
|
|
# Set defaults
|
|
DUMPMETADATA=yes # default action
|
|
UPDATEMETADATA=no
|
|
|
|
# Set filetypes
|
|
pdfext=".pdf"
|
|
metadataext=".pdfmeta"
|
|
|
|
usage () {
|
|
cat <<EOF
|
|
NAME
|
|
$NAME - Wraps pdftk in a personalised run environment.
|
|
|
|
SYNOPSIS
|
|
$NAME [OPTION], or
|
|
$NAME [FILENAME.PDF]
|
|
|
|
DESCRIPTION
|
|
The following options can be used:
|
|
|
|
-h, --help
|
|
Displays this help text.
|
|
|
|
If no option nor filename is specified, the script defaults to
|
|
dumping metadata information for all PDF files present in the
|
|
current directory into *.pdfmeta files, one for each PDF.
|
|
If no PDF files are present in the current directory, the script
|
|
exits without taking any further action.
|
|
|
|
DETAILS
|
|
|
|
|
|
EXAMPLE
|
|
Extract metadata for all PDFs in directory and dump into separate files
|
|
$NAME -a
|
|
|
|
Default usage - dump PDF metadata in <filename>.pdfmeta
|
|
$NAME <filename>.pdf
|
|
|
|
AUTHOR
|
|
Taha Ahmed <taha.ahmed at mkem.uu.se>
|
|
|
|
EOF
|
|
}
|
|
|
|
OPTIONS=$@
|
|
|
|
for OPTION in "$@"; do
|
|
case "$OPTION" in
|
|
-h | --help )
|
|
usage
|
|
exit 0
|
|
;;
|
|
-a | --all )
|
|
# Perform default action on all PDFs in current directory
|
|
|
|
;;
|
|
* ) # no options, just run with default settings
|
|
;;
|
|
esac
|
|
done
|
|
|
|
FILES=$OPTIONS
|
|
|
|
# Check if no input files were given
|
|
if [ ! -n "$FILES" ]; then
|
|
# Perhaps do default action for all pdf files in current directory?
|
|
echo -e "\nError: There is no input files! Usage is:\n"
|
|
usage
|
|
exit 1
|
|
fi
|
|
|
|
# Check file types (pre-assumes a file extension exists)
|
|
for FILE in "$FILES"; do
|
|
PDFFILE=$(echo $FILE | grep '.[Pp][Dd][Ff]$')
|
|
TEXFILE=$(echo $FILE | grep '.[Tt][Ee][Xx]$')
|
|
if [ ! -n "$PDFFILE" -a ! -n "$TEXFILE" ]; then
|
|
echo "$FILE is not a supported file type!"
|
|
echo "It should be one of: .pdf or .tex"
|
|
exit 1
|
|
fi
|
|
NEWFILES="$NEWFILES $FILE"
|
|
unset PDFFILE TEXFILE
|
|
done
|
|
FILES=$NEWFILES
|
|
|
|
# Execution
|
|
for FILE in "$FILES"; do
|
|
# Metadata filename is pdf basename plus .pdfmeta extension
|
|
METADATAFILE=${FILE%.*}$metadataext # metadata filename
|
|
|
|
if [ "$DUMPMETADATA" = "yes" ]; then
|
|
#echo "pdftk $FILE dump_data output $METADATAFILE"
|
|
|
|
## It seems pdftk has problems dealing with spaces in filenames or paths.
|
|
## Possible work-arounds:
|
|
## Escape spaces with back-slashes?
|
|
## Check for spaces in filename, if present, copy to temporary file without spaces,
|
|
## then run call pdftk, then rename again...
|
|
|
|
pdftk $FILE dump_data output $METADATAFILE
|
|
fi
|
|
done
|
|
|
|
exit 0
|
|
|
|
### STUFF BELOW IS OLD
|
|
reportfile=./chepdftk-report.txt
|
|
|
|
# Check number of args supplied
|
|
if [ $# -eq 0 ]; then
|
|
# Zero arguments
|
|
echo No arguments given.
|
|
exit
|
|
else
|
|
# Some arguments were passed.
|
|
if [ $# -eq 1 ]; then
|
|
# One argument was passed
|
|
echo One argument.
|
|
else
|
|
# More than one argument
|
|
if [ $# -eq 2 ]; then
|
|
# Two arguments were passed
|
|
echo Two arguments.
|
|
if [ $2 == "dump_data" ]; then
|
|
pdftk $1 $2 output $reportfile
|
|
fi
|
|
else
|
|
# More than two arguments
|
|
if [ $# -eq 3 ]; then
|
|
# Three arguments were passed
|
|
echo Three arguments.
|
|
else
|
|
# More than three arguments
|
|
echo More than three arguments. Too many.
|
|
fi
|
|
fi
|
|
fi
|
|
fi
|