diff --git a/iiif/dl_welcomecollection.sh b/iiif/dl_welcomecollection.sh new file mode 100755 index 0000000..0b9e206 --- /dev/null +++ b/iiif/dl_welcomecollection.sh @@ -0,0 +1,99 @@ +#!/bin/bash + +usage() { + echo "Usage: $0 --method [size|full] --output-dir DIR --url URL" + echo "Options:" + echo " --method Method to use (allowed values: size, full)" + echo " --output-dir Output directory (will be created if it doesn't exist)" + echo " --url URL to process" + echo " --help Display this help message" + exit 1 +} + + +[ $# -eq 0 ] && usage +METHOD= +OUTPUT_DIR= +URL= + +while [[ $# -gt 0 ]]; do + key="$1" + + case $key in + --method) + METHOD="$2" + if [[ "$METHOD" != "size" && "$METHOD" != "full" ]]; then + echo "Error: invalid value for --method. Allowed values are 'size' or 'full'." + exit 1 + fi + shift + ;; + --output-dir) + OUTPUT_DIR="$2" + shift + ;; + --url) + URL="$2" + shift + ;; + --help) + usage + ;; + *) + echo "Error: unknown option $1" + usage + ;; + esac + shift +done + +if [ -z "$METHOD" ] || [ -z "$OUTPUT_DIR" ] || [ -z "$URL" ]; then + echo "Error: missing required arguments." + usage +fi + +if [ ! -d "$OUTPUT_DIR" ]; then + mkdir -p "$OUTPUT_DIR" + if [ $? -ne 0 ]; then + echo "Error: failed to create directory '$OUTPUT_DIR'." + exit 1 + fi +fi + +cd "$OUTPUT_DIR" +if [ ! -f data.json ]; then + echo "> fetching json data..." + curl -s "$URL" | sed -n 's|.*.*|\1|p' \ + | jq '{width: .props.pageProps.compressedTransformedManifest.compressedCanvases.width, height: .props.pageProps.compressedTransformedManifest.compressedCanvases.height, prefix: .props.pageProps.compressedTransformedManifest.compressedCanvases.imageServiceId.prefix, suffix: .props.pageProps.compressedTransformedManifest.compressedCanvases.imageServiceId.suffix, remainders: .props.pageProps.compressedTransformedManifest.compressedCanvases.imageServiceId.remainders}' > data.json +fi + +echo "> deleting junk files..." +find . -type f -size 111c -exec rm {} + + +prefix=$(cat data.json | jq -r '.prefix') +suffix=$(cat data.json | jq -r '.suffix') + +echo "> downloading missing pages..." +cat data.json | jq -r '.remainders[]' | while read -r page; do + [ -e "$page.jpg" ] && continue + url="${prefix}${page}${suffix}" + case $METHOD in + full) + url="${url}/full/full/0/default.jpg" + ;; + size) + jpage=$(echo $page | sed 's/^0*//') + jpage=$(( jpage-1 )) + w=$(cat sizes.json | jq ".width[$jpage]") + h=$(cat sizes.json | jq ".height[$jpage]") + url="${url}/0,0,${w},${h}/${w},${h}/0/default.jpg" + ;; + esac + output="${page}.jpg" + curl --silent "$url" -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:127.0) Gecko/20100101 Firefox/127.0' -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8' -H 'Accept-Language: en-US,en;q=0.5' -H 'Accept-Encoding: gzip, deflate, br, zstd' -H 'DNT: 1' -H 'Connection: keep-alive' -H 'Upgrade-Insecure-Requests: 1' -H 'Sec-Fetch-Dest: document' -H 'Sec-Fetch-Mode: navigate' -H 'Sec-Fetch-Site: same-site' -H 'Priority: u=1' -H 'Pragma: no-cache' -H 'Cache-Control: no-cache' -H 'TE: trailers' -o "$output" + if [ $(stat -c%s "$output") -eq 111 ]; then + echo "> $page FAIL" + else + echo "> $page OK" + fi +done diff --git a/iiif/folger.sh b/iiif/folger.sh new file mode 100755 index 0000000..4031fee --- /dev/null +++ b/iiif/folger.sh @@ -0,0 +1,69 @@ +#!/bin/sh + +set -e +#set -x + +usage() { + echo "usage: $0 --output-dir|-o [--format|-f jpg|tif] " + exit 1 +} + +output_dir= +url= +format=jpg + +while [ "$#" -gt 0 ]; do + case $1 in + --output-dir|-o) + output_dir="$2" + shift + ;; + --format|-f) + format="$2" + shift + ;; + *) + url="$1" + ;; + esac + shift +done + +if [ -z "$output_dir" ] || [ -z "$url" ]; then + usage +fi + +if [ -d "$output_dir" ]; then + echo -n "The directory '$output_dir' already exists. Do you want to delete/recreate it (d) or skip and go on (s)? " + read response + case $response in + [dD]) + rm -rf "$output_dir" + mkdir -p "$output_dir" + ;; + [sS]) + ;; + *) + 2>&1 echo "error: invalid option" + exit 1 + ;; + esac +else + mkdir -p "$output_dir" +fi + +image_urls=$(curl -s "$url" | jq -r ".sequences[].canvases[].images[].resource.\"@id\"") +counter=1 +for image_url in $image_urls; do + image_name=$(basename "$image_url") + filename=$(printf "$output_dir/%04d.$format" $counter) + if [ "$format" = "tif" ]; then + base="${image_url%.jpg}" + iamge_url="${image_url}.tif" + fi + curl -sSL "$image_url" -o "$filename" + counter=$((counter + 1)) + echo "$(basename "$filename") saved" +done + +cleanup diff --git a/validate_tiff.sh b/validate_tiff.sh new file mode 100755 index 0000000..003a5a0 --- /dev/null +++ b/validate_tiff.sh @@ -0,0 +1,43 @@ +#!/bin/sh +# NOTE: tifftopnm is a part of netpbm package + +usage() { + echo "Usage: $0 [--delete]" + echo + echo "This script checks if TIFF files in the current directory are valid and prints invalid ones." + echo + echo "Options:" + echo " --delete Delete invalid TIFF files." + echo " --help Display this help message." +} + +DELETE_FILES=false +for arg in "$@"; do + case $arg in + --delete) + DELETE_FILES=true + ;; + --help) + usage + exit 0 + ;; + *) + echo "Unknown argument: $arg" >&2 + usage >&2 + exit 1 + ;; + esac +done + +for f in *.tif; do + echo -n "$f... " + tifftopnm "$f" 2>&1 >/dev/null | grep -i error >/dev/null && { + if [ "$DELETE_FILES" = true ]; then + rm "$f" + echo -n "DELETED" + else + echo -n "ERROR" + fi + } + echo +done diff --git a/get-refs.sh b/web/get-refs.sh similarity index 97% rename from get-refs.sh rename to web/get-refs.sh index 3b59f8f..15b096a 100755 --- a/get-refs.sh +++ b/web/get-refs.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/local/bin/bash source ~/.local/my/bash/bashrc diff --git a/release-book.sh b/web/release-book.sh similarity index 100% rename from release-book.sh rename to web/release-book.sh