45 lines
1.1 KiB
Python
45 lines
1.1 KiB
Python
import re, os.path
|
|
import requests
|
|
from urllib.parse import urlparse
|
|
|
|
|
|
def save_file(file, content):
|
|
with open(file, 'w') as f:
|
|
f.write(content)
|
|
|
|
|
|
def read_file(filename):
|
|
with open(filename) as f:
|
|
return f.read()
|
|
|
|
|
|
def name_from_url(url):
|
|
return os.path.basename(url[:-1])
|
|
|
|
|
|
def image_url_to_filename(url):
|
|
parsed_url = urlparse(url)
|
|
filename = os.path.basename(parsed_url.path)
|
|
name, ext = os.path.splitext(filename)
|
|
date_match = re.search(r'(\d{4})/(\d{2})', parsed_url.path)
|
|
if not date_match:
|
|
raise ValueError("no valid date found in URL")
|
|
year = date_match.group(1)
|
|
month = date_match.group(2)
|
|
return f"{year}{month}_{name}{ext}"
|
|
|
|
|
|
def extract_images_from_markdown(markdown_text):
|
|
image_pattern = r"!\[.*?\]\((.*?)\)"
|
|
images = re.findall(image_pattern, markdown_text)
|
|
return images
|
|
|
|
|
|
def download_file(url, filename):
|
|
response = requests.get(url, stream=True)
|
|
response.raise_for_status()
|
|
if response.status_code == 200:
|
|
with open(filename, 'wb') as file:
|
|
for chunk in response.iter_content(1024):
|
|
file.write(chunk)
|