idb_utils/odt_to_md.py
2025-05-17 04:28:55 +03:00

52 lines
1.5 KiB
Python
Executable File

#!/usr/bin/env python3
import re
from argparse import ArgumentParser
from os.path import basename
from idb import tzo
from idb.doc import DocumentReader
from idb.tzo import get_part_by_odt_name
def tzo_replace_images(md: str,
tzo_part: int,
dr: DocumentReader):
il = tzo.part_image_list(tzo_part)
def _markdown_image(image, title) -> str:
if title:
return f'![]({image.url} "{title}")'
else:
return f'![]({image.url})'
def _repl(match: re.Match) -> str:
orig_alt, path, title = match.groups()
w, h = dr.get_embedded_image_size(path)
found_images = il.get_images_by_size(w, h)
# if len(found_images) > 1:
# raise ValueError(f'more than one image found with size {w}x{h}')
return ''.join(list(map(lambda i: _markdown_image(i, title), found_images)))
regex = re.compile(r'!\[(.*?)]\((\S+?)(?:\s+"(.*?)")?\)')
return regex.sub(_repl, md)
def main():
parser = ArgumentParser()
parser.add_argument('-i', '--input', required=True, type=str, help='Input file')
parser.add_argument('-c', '--column', default=1, type=int, help='Column number')
parser.add_argument('--tzo', action='store_true', help='TZO')
args = parser.parse_args()
reader = DocumentReader(args.input)
md = reader.get_markdown(args.column)
if args.tzo:
md = tzo_replace_images(md, get_part_by_odt_name(basename(args.input)), reader)
print(md)
if __name__ == '__main__':
main()