bookscan_utils/pagenum-probe.py
2023-10-14 11:35:01 +03:00

57 lines
2.1 KiB
Python
Executable File

#!/usr/bin/env python3
import os
import logging
from argparse import ArgumentParser
from pagenum.system import ensure_dependencies, desktop_open_image
from pagenum.image import pdf2png, zonecrop, img2pagenum, Zone
if __name__ == '__main__':
ensure_dependencies()
parser = ArgumentParser()
parser.add_argument('--input',
help="input pdf file")
parser.add_argument('--input-page', type=int, default=1,
help="page in pdf file")
parser.add_argument('--zone', required=True,
choices=Zone.getzones(),
help="where to look for page number")
parser.add_argument('--height', type=int, required=True)
parser.add_argument('--width', type=int, required=True)
parser.add_argument('--margin-top', type=int, default=0)
parser.add_argument('--margin-right', type=int, default=0)
parser.add_argument('--margin-bottom', type=int, default=0)
parser.add_argument('--margin-left', type=int, default=0)
parser.add_argument('--max-page-num-length', type=int, default=3)
parser.add_argument('--preview', action='store_true',
help="open cropped image part in image viewer")
parser.add_argument('--verbose', action='store_true')
parser.add_argument('--tesseract-psm', type=int)
args = parser.parse_args()
logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
if not os.path.exists(args.input):
raise OSError(f'{args.input}: no such file')
z = Zone(args.zone, args.width, args.height,
args.margin_top, args.margin_right,
args.margin_bottom, args.margin_left)
png_file = pdf2png(args.input, args.input_page)
cropped_file = zonecrop(png_file, z)
# desktop_open_image(png_file)
if args.preview:
desktop_open_image(cropped_file)
i2pn_kw = {}
if args.tesseract_psm:
i2pn_kw['psm'] = args.tesseract_psm
num = img2pagenum(cropped_file, args.max_page_num_length, **i2pn_kw)
print('num:', num)
os.unlink(png_file)
os.unlink(cropped_file)
print(f'zone: {z}')