bookscan_utils/clean-pdf.sh
2024-01-29 01:09:48 +03:00

35 lines
817 B
Bash
Executable File

#!/bin/sh
set -x
set -e
[ -z "$1" ] && {
>&2 echo "usage: $0 FILENAME"
exit 1
}
file="$1"
[ -f "$file" ] || {
>&2 echo "error: file $file does not exist"
exit 1
}
SED=sed
if [ "$(uname -s)" = "FreeBSD" ]; then SED=gsed; fi
file_noext="${file%.*}"
pdftk "$file" dump_data | $SED -e 's/\(InfoValue:\)\s.*/\1\ /g' | pdftk "$file" update_info - output "$file_noext.clean.pdf"
exiftool -all:all= "$file_noext.clean.pdf"
exiftool -all:all "$file_noext.clean.pdf"
exiftool -extractEmbedded -all:all "$file_noext.clean.pdf"
qpdf --linearize "$file_noext.clean.pdf" "$file_noext.clean2.pdf"
pdftk "$file_noext.clean2.pdf" dump_data
exiftool "$file_noext.clean2.pdf"
pdfinfo -meta "$file_noext.clean2.pdf"
rm "$file_noext.clean.pdf"
rm "$file_noext.clean.pdf_original"
rm "$file"
mv "$file_noext.clean2.pdf" "$file"