diff --git a/clean-pdf.sh b/clean-pdf.sh new file mode 100755 index 0000000..aed86b5 --- /dev/null +++ b/clean-pdf.sh @@ -0,0 +1,34 @@ +#!/bin/sh + +set -x +set -e + +[ -z "$1" ] && { + >&2 echo "usage: $0 FILENAME" + exit 1 +} + +file="$1" +[ -f "$file" ] || { + >&2 echo "error: file $file does not exist" + exit 1 +} + +SED=sed +if [ "$(uname -s)" = "FreeBSD" ]; then SED=gsed; fi + +file_noext="${file%.*}" + +pdftk "$file" dump_data | $SED -e 's/\(InfoValue:\)\s.*/\1\ /g' | pdftk "$file" update_info - output "$file_noext.clean.pdf" +exiftool -all:all= "$file_noext.clean.pdf" +exiftool -all:all "$file_noext.clean.pdf" +exiftool -extractEmbedded -all:all "$file_noext.clean.pdf" +qpdf --linearize "$file_noext.clean.pdf" "$file_noext.clean2.pdf" +pdftk "$file_noext.clean2.pdf" dump_data +exiftool "$file_noext.clean2.pdf" +pdfinfo -meta "$file_noext.clean2.pdf" + +rm "$file_noext.clean.pdf" +rm "$file_noext.clean.pdf_original" +rm "$file" +mv "$file_noext.clean2.pdf" "$file"