#!/usr/bin/env python3 import os import logging from argparse import ArgumentParser from pagenum.system import ensure_dependencies, desktop_open_image from pagenum.image import pdf2png, zonecrop, img2pagenum, Zone if __name__ == '__main__': ensure_dependencies() parser = ArgumentParser() parser.add_argument('--input', help="input pdf file") parser.add_argument('--input-page', type=int, default=1, help="page in pdf file") parser.add_argument('--zone', required=True, choices=Zone.getzones(), help="where to look for page number") parser.add_argument('--height', type=int, required=True) parser.add_argument('--width', type=int, required=True) parser.add_argument('--margin-top', type=int, default=0) parser.add_argument('--margin-right', type=int, default=0) parser.add_argument('--margin-bottom', type=int, default=0) parser.add_argument('--margin-left', type=int, default=0) parser.add_argument('--max-page-num-length', type=int, default=3) parser.add_argument('--preview', action='store_true', help="open cropped image part in image viewer") parser.add_argument('--verbose', action='store_true') parser.add_argument('--tesseract-psm', type=int) args = parser.parse_args() logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO) if not os.path.exists(args.input): raise OSError(f'{args.input}: no such file') z = Zone(args.zone, args.width, args.height, args.margin_top, args.margin_right, args.margin_bottom, args.margin_left) png_file = pdf2png(args.input, args.input_page) cropped_file = zonecrop(png_file, z) # desktop_open_image(png_file) if args.preview: desktop_open_image(cropped_file) i2pn_kw = {} if args.tesseract_psm: i2pn_kw['psm'] = args.tesseract_psm num = img2pagenum(cropped_file, args.max_page_num_length, **i2pn_kw) print('num:', num) os.unlink(png_file) os.unlink(cropped_file) print(f'zone: {z}')