4in1_tools/iiif/dl_welcomecollection.sh
2024-07-25 02:58:08 +03:00

100 lines
3.1 KiB
Bash
Executable File

#!/bin/bash
usage() {
echo "Usage: $0 --method [size|full] --output-dir DIR --url URL"
echo "Options:"
echo " --method Method to use (allowed values: size, full)"
echo " --output-dir Output directory (will be created if it doesn't exist)"
echo " --url URL to process"
echo " --help Display this help message"
exit 1
}
[ $# -eq 0 ] && usage
METHOD=
OUTPUT_DIR=
URL=
while [[ $# -gt 0 ]]; do
key="$1"
case $key in
--method)
METHOD="$2"
if [[ "$METHOD" != "size" && "$METHOD" != "full" ]]; then
echo "Error: invalid value for --method. Allowed values are 'size' or 'full'."
exit 1
fi
shift
;;
--output-dir)
OUTPUT_DIR="$2"
shift
;;
--url)
URL="$2"
shift
;;
--help)
usage
;;
*)
echo "Error: unknown option $1"
usage
;;
esac
shift
done
if [ -z "$METHOD" ] || [ -z "$OUTPUT_DIR" ] || [ -z "$URL" ]; then
echo "Error: missing required arguments."
usage
fi
if [ ! -d "$OUTPUT_DIR" ]; then
mkdir -p "$OUTPUT_DIR"
if [ $? -ne 0 ]; then
echo "Error: failed to create directory '$OUTPUT_DIR'."
exit 1
fi
fi
cd "$OUTPUT_DIR"
if [ ! -f data.json ]; then
echo "> fetching json data..."
curl -s "$URL" | sed -n 's|.*<script id="__NEXT_DATA__" type="application/json">\([^<]*\)</script>.*|\1|p' \
| jq '{width: .props.pageProps.compressedTransformedManifest.compressedCanvases.width, height: .props.pageProps.compressedTransformedManifest.compressedCanvases.height, prefix: .props.pageProps.compressedTransformedManifest.compressedCanvases.imageServiceId.prefix, suffix: .props.pageProps.compressedTransformedManifest.compressedCanvases.imageServiceId.suffix, remainders: .props.pageProps.compressedTransformedManifest.compressedCanvases.imageServiceId.remainders}' > data.json
fi
echo "> deleting junk files..."
find . -type f -size 111c -exec rm {} +
prefix=$(cat data.json | jq -r '.prefix')
suffix=$(cat data.json | jq -r '.suffix')
echo "> downloading missing pages..."
cat data.json | jq -r '.remainders[]' | while read -r page; do
[ -e "$page.jpg" ] && continue
url="${prefix}${page}${suffix}"
case $METHOD in
full)
url="${url}/full/full/0/default.jpg"
;;
size)
jpage=$(echo $page | sed 's/^0*//')
jpage=$(( jpage-1 ))
w=$(cat sizes.json | jq ".width[$jpage]")
h=$(cat sizes.json | jq ".height[$jpage]")
url="${url}/0,0,${w},${h}/${w},${h}/0/default.jpg"
;;
esac
output="${page}.jpg"
curl --silent "$url" -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:127.0) Gecko/20100101 Firefox/127.0' -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8' -H 'Accept-Language: en-US,en;q=0.5' -H 'Accept-Encoding: gzip, deflate, br, zstd' -H 'DNT: 1' -H 'Connection: keep-alive' -H 'Upgrade-Insecure-Requests: 1' -H 'Sec-Fetch-Dest: document' -H 'Sec-Fetch-Mode: navigate' -H 'Sec-Fetch-Site: same-site' -H 'Priority: u=1' -H 'Pragma: no-cache' -H 'Cache-Control: no-cache' -H 'TE: trailers' -o "$output"
if [ $(stat -c%s "$output") -eq 111 ]; then
echo "> $page FAIL"
else
echo "> $page OK"
fi
done