23 lines
666 B
Python
23 lines
666 B
Python
import requests
|
|
from bs4 import BeautifulSoup
|
|
from html import unescape
|
|
from markdownify import markdownify
|
|
from collections import namedtuple
|
|
|
|
ArticleContent = namedtuple('ArticleContent', ['title', 'html', 'md'])
|
|
|
|
|
|
def fetch(url) -> ArticleContent:
|
|
response = requests.get(url)
|
|
soup = BeautifulSoup(response.text, 'html.parser')
|
|
|
|
flair_element = soup.find(id="jp-post-flair")
|
|
if flair_element:
|
|
flair_element.decompose()
|
|
|
|
html = str(soup.find("div", class_="entry-content")).strip()
|
|
title = unescape(soup.find(class_="entry-title").get_text(strip=True))
|
|
md = markdownify(html).strip()
|
|
|
|
return ArticleContent(title, html, md)
|