idb_utils/odt_to_md.py

#!/usr/bin/env python3
import re

from argparse import ArgumentParser
from os.path import basename

from idb import tzo
from idb.doc import DocumentReader
from idb.tzo import get_part_by_odt_name


def tzo_replace_images(md: str,
                       tzo_part: int,
                       dr: DocumentReader):
    il = tzo.part_image_list(tzo_part)

    def _markdown_image(image, title) -> str:
        if title:
            return f'![]({image.url} "{title}")'
        else:
            return f'![]({image.url})'

    def _repl(match: re.Match) -> str:
        orig_alt, path, title = match.groups()
        w, h = dr.get_embedded_image_size(path)
        found_images = il.get_images_by_size(w, h)
        # if len(found_images) > 1:
        #     raise ValueError(f'more than one image found with size {w}x{h}')
        return ''.join(list(map(lambda i: _markdown_image(i, title), found_images)))

    regex = re.compile(r'!\[(.*?)]\((\S+?)(?:\s+"(.*?)")?\)')
    return regex.sub(_repl, md)


def main():
    parser = ArgumentParser()
    parser.add_argument('-i', '--input', required=True, type=str, help='Input file')
    parser.add_argument('-c', '--column', default=1, type=int, help='Column number')
    parser.add_argument('--tzo', action='store_true', help='TZO')
    args = parser.parse_args()

    reader = DocumentReader(args.input)
    md = reader.get_markdown(args.column)
    if args.tzo:
        md = tzo_replace_images(md, get_part_by_odt_name(basename(args.input)), reader)

    print(md)


if __name__ == '__main__':
    main()