You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
181 lines
4.4 KiB
Bash
181 lines
4.4 KiB
Bash
14 years ago
|
#!/bin/bash
|
||
|
# Umbrella function to simplify use of pdftk
|
||
|
# Created April 20, 2010
|
||
|
# Taha Ahmed, taha@chepec.se
|
||
|
# **************************
|
||
|
# Usage:
|
||
|
# ARG 1: filename of pdf, e.g., 'oxides.pdf'
|
||
|
# ARG 2: specify operation,
|
||
|
# - 'dump_data' --> writes metadata to $reportfile in current directory
|
||
|
# ARG 3: used when updating metadata, specifies the input text file
|
||
|
|
||
|
# $* Stores all the arguments that were entered on the command line ($1 $2 ...).
|
||
|
# $@ Stores all the arguments that were entered on the command line, individually quoted ("$1" "$2" ...).
|
||
|
# -o is logical OR - can be evaluated within single brackets
|
||
|
# -a is logical AND - can be evaluated within single brackets
|
||
|
|
||
|
#############################################################
|
||
|
## Original pdftk syntax
|
||
|
## *********************
|
||
|
## Dump metadata to file
|
||
|
#= pdftk <in.pdf> dump_data output <$reportfile>
|
||
|
## Update metadata from file
|
||
|
#= pdftk <in.pdf> update_info <$reportfile> output <out.pdf>
|
||
|
#############################################################
|
||
|
|
||
|
# Perhaps good to save original PDF
|
||
|
|
||
|
# Could be nice to be able to input metadata updates directly at the bash prompt, something like
|
||
|
# First, script displays current fields
|
||
|
# echo Which field do you want to update/create?
|
||
|
# echo <Display choice as a list>
|
||
|
# read choice newfieldinfo
|
||
|
# do the whole pdftk update stuff
|
||
|
# voila!
|
||
|
|
||
|
#set -x # for debugging
|
||
|
|
||
|
NAME=$(basename $0)
|
||
|
echo "***********************************************************"
|
||
|
echo "This is $NAME. Write $NAME -h for help."
|
||
|
|
||
|
# Set defaults
|
||
|
DUMPMETADATA=yes # default action
|
||
|
UPDATEMETADATA=no
|
||
|
|
||
|
# Set filetypes
|
||
|
pdfext=".pdf"
|
||
|
metadataext=".pdfmeta"
|
||
|
|
||
|
usage () {
|
||
|
cat <<EOF
|
||
|
NAME
|
||
|
$NAME - Wraps pdftk in a personalised run environment.
|
||
|
|
||
|
SYNOPSIS
|
||
|
$NAME [OPTION], or
|
||
|
$NAME [FILENAME.PDF]
|
||
|
|
||
|
DESCRIPTION
|
||
|
The following options can be used:
|
||
|
|
||
|
-h, --help
|
||
|
Displays this help text.
|
||
|
|
||
|
If no option nor filename is specified, the script defaults to
|
||
|
dumping metadata information for all PDF files present in the
|
||
|
current directory into *.pdfmeta files, one for each PDF.
|
||
|
If no PDF files are present in the current directory, the script
|
||
|
exits without taking any further action.
|
||
|
|
||
|
DETAILS
|
||
|
|
||
|
|
||
|
EXAMPLE
|
||
|
Extract metadata for all PDFs in directory and dump into separate files
|
||
|
$NAME -a
|
||
|
|
||
|
Default usage - dump PDF metadata in <filename>.pdfmeta
|
||
|
$NAME <filename>.pdf
|
||
|
|
||
|
AUTHOR
|
||
|
Taha Ahmed <taha.ahmed at mkem.uu.se>
|
||
|
|
||
|
EOF
|
||
|
}
|
||
|
|
||
|
OPTIONS=$@
|
||
|
|
||
|
for OPTION in "$@"; do
|
||
|
case "$OPTION" in
|
||
|
-h | --help )
|
||
|
usage
|
||
|
exit 0
|
||
|
;;
|
||
|
-a | --all )
|
||
|
# Perform default action on all PDFs in current directory
|
||
|
|
||
|
;;
|
||
|
* ) # no options, just run with default settings
|
||
|
;;
|
||
|
esac
|
||
|
done
|
||
|
|
||
|
FILES=$OPTIONS
|
||
|
|
||
|
# Check if no input files were given
|
||
|
if [ ! -n "$FILES" ]; then
|
||
|
# Perhaps do default action for all pdf files in current directory?
|
||
|
echo -e "\nError: There is no input files! Usage is:\n"
|
||
|
usage
|
||
|
exit 1
|
||
|
fi
|
||
|
|
||
|
# Check file types (pre-assumes a file extension exists)
|
||
|
for FILE in "$FILES"; do
|
||
|
PDFFILE=$(echo $FILE | grep '.[Pp][Dd][Ff]$')
|
||
|
TEXFILE=$(echo $FILE | grep '.[Tt][Ee][Xx]$')
|
||
|
if [ ! -n "$PDFFILE" -a ! -n "$TEXFILE" ]; then
|
||
|
echo "$FILE is not a supported file type!"
|
||
|
echo "It should be one of: .pdf or .tex"
|
||
|
exit 1
|
||
|
fi
|
||
|
NEWFILES="$NEWFILES $FILE"
|
||
|
unset PDFFILE TEXFILE
|
||
|
done
|
||
|
FILES=$NEWFILES
|
||
|
|
||
|
# Execution
|
||
|
for FILE in "$FILES"; do
|
||
|
# Metadata filename is pdf basename plus .pdfmeta extension
|
||
|
METADATAFILE=${FILE%.*}$metadataext # metadata filename
|
||
|
|
||
|
if [ "$DUMPMETADATA" = "yes" ]; then
|
||
|
#echo "pdftk $FILE dump_data output $METADATAFILE"
|
||
|
|
||
|
## It seems pdftk has problems dealing with spaces in filenames or paths.
|
||
|
## Possible work-arounds:
|
||
|
## Escape spaces with back-slashes?
|
||
|
## Check for spaces in filename, if present, copy to temporary file without spaces,
|
||
|
## then run call pdftk, then rename again...
|
||
|
|
||
|
pdftk $FILE dump_data output $METADATAFILE
|
||
|
fi
|
||
|
done
|
||
|
|
||
|
exit 0
|
||
|
|
||
|
### STUFF BELOW IS OLD
|
||
|
reportfile=./chepdftk-report.txt
|
||
|
|
||
|
# Check number of args supplied
|
||
|
if [ $# -eq 0 ]; then
|
||
|
# Zero arguments
|
||
|
echo No arguments given.
|
||
|
exit
|
||
|
else
|
||
|
# Some arguments were passed.
|
||
|
if [ $# -eq 1 ]; then
|
||
|
# One argument was passed
|
||
|
echo One argument.
|
||
|
else
|
||
|
# More than one argument
|
||
|
if [ $# -eq 2 ]; then
|
||
|
# Two arguments were passed
|
||
|
echo Two arguments.
|
||
|
if [ $2 == "dump_data" ]; then
|
||
|
pdftk $1 $2 output $reportfile
|
||
|
fi
|
||
|
else
|
||
|
# More than two arguments
|
||
|
if [ $# -eq 3 ]; then
|
||
|
# Three arguments were passed
|
||
|
echo Three arguments.
|
||
|
else
|
||
|
# More than three arguments
|
||
|
echo More than three arguments. Too many.
|
||
|
fi
|
||
|
fi
|
||
|
fi
|
||
|
fi
|