fix

2021-12-10 19:13:17 +03:00 · 2021-12-10 19:13:17 +03:00 · 6c33b96ba6
commit 6c33b96ba6
parent 7d011538f6
2 changed files with 61 additions and 55 deletions
--- a/binance-announcements-scraping-bot.py
+++ b/binance-announcements-scraping-bot.py
@ -1,47 +1,80 @@
 #!/usr/bin/env python3
 import traceback
 import json
 import sys
 from requests import get
 from bs4 import BeautifulSoup
 from ch1p import State, telegram_notify
 from html import escape
 from argparse import ArgumentParser
 def scrap_announcements():
-    url = "https://www.binance.com/en/support/announcement"
+    response = get('https://www.binance.com/bapi/composite/v1/public/cms/article/list/query?type=1&pageNo=1&pageSize=50')
    response = get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
-    data = []
+    data = json.loads(response.text)
-    total_news = 0
+    categories = []
    count = 0
-    categories_list = soup.find_all(class_='css-wmvdm0')
+    for catalog in data['data']['catalogs']:
-    for c in categories_list:
+        category = {
-        category_title = c.select('h2[data-bn-type="text"]')[0].text
+            'name': catalog['catalogName'],
-        category_data = {
+            'articles': []
            'title': category_title,
            'news': []
        }
-        for link in c.find_next('div').select('a[data-bn-type="link"]'):
+        for article in catalog['articles']:
-            if link.text.strip().lower() == 'view more':
+            category['articles'].append({
-                continue
+                'url': f'https://www.binance.com/en/support/announcement/{article["code"]}',
-
+                'rel_date': article['releaseDate'],
-            href = link.get('href')
+                'title': article['title']
            if href.startswith('/'):
                href = f'https://www.binance.com{href}'
            category_data['news'].append({
                'text': link.text,
                'link': href
            })
-            total_news += 1
+            count += 1
-        data.append(category_data)
+        categories.append(category)
-    if not total_news:
+    if not count:
        raise RuntimeError('failed to find any articles')
-    return data
+    return categories
 def main(print_to_stdout: bool):
    last_rel_date = 0
    state = State(default={'urls': [], 'last_rel_date': last_rel_date})
    if 'last_rel_date' in state:
        last_rel_date = state['last_rel_date']
    try:
        blocks = []
        data = scrap_announcements()
        for category in data:
            updates = []
            for item in category['articles']:
                if item['rel_date'] <= last_rel_date or item['url'] in state['urls']:
                    continue
                updates.append(item)
                if item['rel_date'] > last_rel_date:
                    last_rel_date = item['rel_date']
            if updates:
                buf = f"<b>Binance: {category['name']}</b>\n"
                buf += '\n'.join(list(map(lambda a: f"<a href='{a['url']}'>{a['title']}</a>", updates)))
                blocks.append(buf)
        state['last_rel_date'] = last_rel_date
        if blocks:
            message = '\n\n'.join(blocks)
            if print_to_stdout:
                print(message)
            else:
                telegram_notify(text=message, parse_mode='HTML', disable_web_page_preview=True)
    except:
        if print_to_stdout:
            traceback.print_exc()
        else:
            telegram_notify(text='error: ' + escape(traceback.format_exc()), parse_mode='HTML')
 if __name__ == '__main__':
@ -49,32 +82,5 @@ if __name__ == '__main__':
    parser.add_argument('--stdout', action='store_true')
    args = parser.parse_args()
-    state = State(default=dict(urls=[]))
+    main(args.stdout)
    try:
        blocks = []
        data = scrap_announcements()
        for category in data:
            updates = []
            for item in category['news']:
                if item['link'] not in state['urls']:
                    updates.append(item)
                    state['urls'].append(item['link'])
            if updates:
                buf = f"<b>Binance: {category['title']}</b>\n"
                buf += '\n'.join(list(map(lambda item: f"<a href='{item['link']}'>{item['text']}</a>", updates)))
                blocks.append(buf)
        if blocks:
            message = '\n\n'.join(blocks)
            if args.stdout:
                print(message)
            else:
                telegram_notify(text=message, parse_mode='HTML', disable_web_page_preview=True)
    except:
        if args.stdout:
            traceback.print_exc()
        else:
            telegram_notify(text='error: ' + escape(traceback.format_exc()), parse_mode='HTML')
--- a/requirements.txt
+++ b/requirements.txt
@ -1,3 +1,3 @@
 requests~=2.26.0
 beautifulsoup4~=4.10.0
-ch1p~=0.0.6
+ch1p~=0.0.7