idb_utils/idb/tzo.py
2025-05-17 04:28:55 +03:00

78 lines
2.6 KiB
Python

import os
import re
from PIL import Image
from collections import namedtuple
from .util import image_url_to_filename
tzo_urls = (
'https://kniganews.org/2012/12/20/beyond-clouds-1/',
'https://kniganews.org/2012/12/21/beyond-clouds-21/',
'https://kniganews.org/2012/12/22/beyond-clouds-22/',
'https://kniganews.org/2012/12/23/beyond-clouds-31/',
'https://kniganews.org/2012/12/24/beyond-clouds-32/',
'https://kniganews.org/2012/12/25/beyond-clouds-33/',
'https://kniganews.org/2012/12/28/beyond-clouds-41/',
'https://kniganews.org/2012/12/29/beyond-clouds-42/',
'https://kniganews.org/2012/12/30/beyond-clouds-43/',
'https://kniganews.org/2013/01/01/beyond-clouds-44/',
'https://kniganews.org/2013/01/06/beyond-clouds-51/',
'https://kniganews.org/2013/01/07/beyond-clouds-52/',
'https://kniganews.org/2013/02/16/beyond-clouds-53/',
'https://kniganews.org/2013/03/25/beyond-clouds-61/',
'https://kniganews.org/2013/05/10/beyond-clouds-62/',
'https://kniganews.org/2013/06/17/beyond-clouds-731/',
'https://kniganews.org/2013/08/07/beyond-clouds-732/',
'https://kniganews.org/2013/09/17/beyond-clouds-73/'
)
after_tzo_urls = (
'https://kniganews.org/2012/11/17/langlands-plus/',
)
ImageInfo = namedtuple('ImageInfo', ('url', 'local_name', 'local_path', 'width', 'height'))
class ImageList:
images: list[ImageInfo]
def __init__(self):
self.images = []
def add_image(self, url):
local_name = image_url_to_filename(url)
local_path = os.path.realpath(os.path.join(
os.path.dirname(__file__),
'..',
'images',
local_name
))
image = Image.open(local_path)
self.images.append(ImageInfo(url, local_name, local_path, image.size[0], image.size[1]))
def get_images_by_size(self, w, h) -> list[ImageInfo]:
return list(filter(lambda image: image.width == w and image.height == h, self.images))
def get_part_by_odt_name(name: str) -> int:
m = re.match(r'^beyond-clouds-(\d+)(?:v\d+)?\.odt$', name)
if not m:
raise ValueError('could not parse file name')
if not m.group(1).isnumeric():
raise ValueError('extracted value is not a number')
return int(m.group(1))
def part_image_list(part) -> ImageList:
file = os.path.realpath(os.path.join(
os.path.dirname(__file__),
'..',
'tzo',
f'beyond-clouds-{part}-ru.txt',
))
with open(file) as f:
txt = f.read()
urls = re.findall(r'!\[.*?]\((.*?)\)', txt)
images = ImageList()
for url in urls:
images.add_image(url)
return images