add some new tools

This commit is contained in:
E. S. 2024-07-25 02:58:08 +03:00
parent 397ee7d420
commit 3b7808f179
5 changed files with 212 additions and 1 deletions

99
iiif/dl_welcomecollection.sh Executable file
View File

@ -0,0 +1,99 @@
#!/bin/bash
usage() {
echo "Usage: $0 --method [size|full] --output-dir DIR --url URL"
echo "Options:"
echo " --method Method to use (allowed values: size, full)"
echo " --output-dir Output directory (will be created if it doesn't exist)"
echo " --url URL to process"
echo " --help Display this help message"
exit 1
}
[ $# -eq 0 ] && usage
METHOD=
OUTPUT_DIR=
URL=
while [[ $# -gt 0 ]]; do
key="$1"
case $key in
--method)
METHOD="$2"
if [[ "$METHOD" != "size" && "$METHOD" != "full" ]]; then
echo "Error: invalid value for --method. Allowed values are 'size' or 'full'."
exit 1
fi
shift
;;
--output-dir)
OUTPUT_DIR="$2"
shift
;;
--url)
URL="$2"
shift
;;
--help)
usage
;;
*)
echo "Error: unknown option $1"
usage
;;
esac
shift
done
if [ -z "$METHOD" ] || [ -z "$OUTPUT_DIR" ] || [ -z "$URL" ]; then
echo "Error: missing required arguments."
usage
fi
if [ ! -d "$OUTPUT_DIR" ]; then
mkdir -p "$OUTPUT_DIR"
if [ $? -ne 0 ]; then
echo "Error: failed to create directory '$OUTPUT_DIR'."
exit 1
fi
fi
cd "$OUTPUT_DIR"
if [ ! -f data.json ]; then
echo "> fetching json data..."
curl -s "$URL" | sed -n 's|.*<script id="__NEXT_DATA__" type="application/json">\([^<]*\)</script>.*|\1|p' \
| jq '{width: .props.pageProps.compressedTransformedManifest.compressedCanvases.width, height: .props.pageProps.compressedTransformedManifest.compressedCanvases.height, prefix: .props.pageProps.compressedTransformedManifest.compressedCanvases.imageServiceId.prefix, suffix: .props.pageProps.compressedTransformedManifest.compressedCanvases.imageServiceId.suffix, remainders: .props.pageProps.compressedTransformedManifest.compressedCanvases.imageServiceId.remainders}' > data.json
fi
echo "> deleting junk files..."
find . -type f -size 111c -exec rm {} +
prefix=$(cat data.json | jq -r '.prefix')
suffix=$(cat data.json | jq -r '.suffix')
echo "> downloading missing pages..."
cat data.json | jq -r '.remainders[]' | while read -r page; do
[ -e "$page.jpg" ] && continue
url="${prefix}${page}${suffix}"
case $METHOD in
full)
url="${url}/full/full/0/default.jpg"
;;
size)
jpage=$(echo $page | sed 's/^0*//')
jpage=$(( jpage-1 ))
w=$(cat sizes.json | jq ".width[$jpage]")
h=$(cat sizes.json | jq ".height[$jpage]")
url="${url}/0,0,${w},${h}/${w},${h}/0/default.jpg"
;;
esac
output="${page}.jpg"
curl --silent "$url" -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:127.0) Gecko/20100101 Firefox/127.0' -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8' -H 'Accept-Language: en-US,en;q=0.5' -H 'Accept-Encoding: gzip, deflate, br, zstd' -H 'DNT: 1' -H 'Connection: keep-alive' -H 'Upgrade-Insecure-Requests: 1' -H 'Sec-Fetch-Dest: document' -H 'Sec-Fetch-Mode: navigate' -H 'Sec-Fetch-Site: same-site' -H 'Priority: u=1' -H 'Pragma: no-cache' -H 'Cache-Control: no-cache' -H 'TE: trailers' -o "$output"
if [ $(stat -c%s "$output") -eq 111 ]; then
echo "> $page FAIL"
else
echo "> $page OK"
fi
done

69
iiif/folger.sh Executable file
View File

@ -0,0 +1,69 @@
#!/bin/sh
set -e
#set -x
usage() {
echo "usage: $0 --output-dir|-o <output-directory> [--format|-f jpg|tif] <IIIF-manifest-URL>"
exit 1
}
output_dir=
url=
format=jpg
while [ "$#" -gt 0 ]; do
case $1 in
--output-dir|-o)
output_dir="$2"
shift
;;
--format|-f)
format="$2"
shift
;;
*)
url="$1"
;;
esac
shift
done
if [ -z "$output_dir" ] || [ -z "$url" ]; then
usage
fi
if [ -d "$output_dir" ]; then
echo -n "The directory '$output_dir' already exists. Do you want to delete/recreate it (d) or skip and go on (s)? "
read response
case $response in
[dD])
rm -rf "$output_dir"
mkdir -p "$output_dir"
;;
[sS])
;;
*)
2>&1 echo "error: invalid option"
exit 1
;;
esac
else
mkdir -p "$output_dir"
fi
image_urls=$(curl -s "$url" | jq -r ".sequences[].canvases[].images[].resource.\"@id\"")
counter=1
for image_url in $image_urls; do
image_name=$(basename "$image_url")
filename=$(printf "$output_dir/%04d.$format" $counter)
if [ "$format" = "tif" ]; then
base="${image_url%.jpg}"
iamge_url="${image_url}.tif"
fi
curl -sSL "$image_url" -o "$filename"
counter=$((counter + 1))
echo "$(basename "$filename") saved"
done
cleanup

43
validate_tiff.sh Executable file
View File

@ -0,0 +1,43 @@
#!/bin/sh
# NOTE: tifftopnm is a part of netpbm package
usage() {
echo "Usage: $0 [--delete]"
echo
echo "This script checks if TIFF files in the current directory are valid and prints invalid ones."
echo
echo "Options:"
echo " --delete Delete invalid TIFF files."
echo " --help Display this help message."
}
DELETE_FILES=false
for arg in "$@"; do
case $arg in
--delete)
DELETE_FILES=true
;;
--help)
usage
exit 0
;;
*)
echo "Unknown argument: $arg" >&2
usage >&2
exit 1
;;
esac
done
for f in *.tif; do
echo -n "$f... "
tifftopnm "$f" 2>&1 >/dev/null | grep -i error >/dev/null && {
if [ "$DELETE_FILES" = true ]; then
rm "$f"
echo -n "DELETED"
else
echo -n "ERROR"
fi
}
echo
done

View File

@ -1,4 +1,4 @@
#!/bin/bash #!/usr/local/bin/bash
source ~/.local/my/bash/bashrc source ~/.local/my/bash/bashrc