import os import re from PIL import Image from collections import namedtuple from .util import image_url_to_filename tzo_urls = ( 'https://kniganews.org/2012/12/20/beyond-clouds-1/', 'https://kniganews.org/2012/12/21/beyond-clouds-21/', 'https://kniganews.org/2012/12/22/beyond-clouds-22/', 'https://kniganews.org/2012/12/23/beyond-clouds-31/', 'https://kniganews.org/2012/12/24/beyond-clouds-32/', 'https://kniganews.org/2012/12/25/beyond-clouds-33/', 'https://kniganews.org/2012/12/28/beyond-clouds-41/', 'https://kniganews.org/2012/12/29/beyond-clouds-42/', 'https://kniganews.org/2012/12/30/beyond-clouds-43/', 'https://kniganews.org/2013/01/01/beyond-clouds-44/', 'https://kniganews.org/2013/01/06/beyond-clouds-51/', 'https://kniganews.org/2013/01/07/beyond-clouds-52/', 'https://kniganews.org/2013/02/16/beyond-clouds-53/', 'https://kniganews.org/2013/03/25/beyond-clouds-61/', 'https://kniganews.org/2013/05/10/beyond-clouds-62/', 'https://kniganews.org/2013/06/17/beyond-clouds-731/', 'https://kniganews.org/2013/08/07/beyond-clouds-732/', 'https://kniganews.org/2013/09/17/beyond-clouds-73/' ) after_tzo_urls = ( 'https://kniganews.org/2012/11/17/langlands-plus/', ) ImageInfo = namedtuple('ImageInfo', ('url', 'local_name', 'local_path', 'width', 'height')) class ImageList: images: list[ImageInfo] def __init__(self): self.images = [] def add_image(self, url): local_name = image_url_to_filename(url) local_path = os.path.realpath(os.path.join( os.path.dirname(__file__), '..', 'images', local_name )) image = Image.open(local_path) self.images.append(ImageInfo(url, local_name, local_path, image.size[0], image.size[1])) def get_images_by_size(self, w, h) -> list[ImageInfo]: return list(filter(lambda image: image.width == w and image.height == h, self.images)) def get_part_by_odt_name(name: str) -> int: m = re.match(r'^beyond-clouds-(\d+)(?:v\d+)?\.odt$', name) if not m: raise ValueError('could not parse file name') if not m.group(1).isnumeric(): raise ValueError('extracted value is not a number') return int(m.group(1)) def part_image_list(part) -> ImageList: file = os.path.realpath(os.path.join( os.path.dirname(__file__), '..', 'tzo', f'beyond-clouds-{part}-ru.txt', )) with open(file) as f: txt = f.read() urls = re.findall(r'!\[.*?]\((.*?)\)', txt) images = ImageList() for url in urls: images.add_image(url) return images