fix

2021-12-10 19:13:17 +03:00 · 2021-12-10 19:13:17 +03:00 · 6c33b96ba6
commit 6c33b96ba6
parent 7d011538f6
2 changed files with 61 additions and 55 deletions
--- a/binance-announcements-scraping-bot.py
+++ b/binance-announcements-scraping-bot.py
@ -1,47 +1,80 @@
 #!/usr/bin/env python3
 import traceback
+import json
+import sys
+
 from requests import get
-from bs4 import BeautifulSoup
 from ch1p import State, telegram_notify
 from html import escape
 from argparse import ArgumentParser


 def scrap_announcements():
-    url = "https://www.binance.com/en/support/announcement"
-    response = get(url)
-    soup = BeautifulSoup(response.text, 'html.parser')
+    response = get('https://www.binance.com/bapi/composite/v1/public/cms/article/list/query?type=1&pageNo=1&pageSize=50')

-    data = []
-    total_news = 0
+    data = json.loads(response.text)
+    categories = []
+    count = 0

-    categories_list = soup.find_all(class_='css-wmvdm0')
-    for c in categories_list:
-        category_title = c.select('h2[data-bn-type="text"]')[0].text
-        category_data = {
-            'title': category_title,
-            'news': []
+    for catalog in data['data']['catalogs']:
+        category = {
+            'name': catalog['catalogName'],
+            'articles': []
        }

-        for link in c.find_next('div').select('a[data-bn-type="link"]'):
-            if link.text.strip().lower() == 'view more':
-                continue
-
-            href = link.get('href')
-            if href.startswith('/'):
-                href = f'https://www.binance.com{href}'
-            category_data['news'].append({
-                'text': link.text,
-                'link': href
+        for article in catalog['articles']:
+            category['articles'].append({
+                'url': f'https://www.binance.com/en/support/announcement/{article["code"]}',
+                'rel_date': article['releaseDate'],
+                'title': article['title']
            })
-            total_news += 1
+            count += 1

-        data.append(category_data)
+        categories.append(category)

-    if not total_news:
+    if not count:
        raise RuntimeError('failed to find any articles')

-    return data
+    return categories
+
+
+def main(print_to_stdout: bool):
+    last_rel_date = 0
+    state = State(default={'urls': [], 'last_rel_date': last_rel_date})
+    if 'last_rel_date' in state:
+        last_rel_date = state['last_rel_date']
+
+    try:
+        blocks = []
+        data = scrap_announcements()
+        for category in data:
+            updates = []
+            for item in category['articles']:
+                if item['rel_date'] <= last_rel_date or item['url'] in state['urls']:
+                    continue
+
+                updates.append(item)
+                if item['rel_date'] > last_rel_date:
+                    last_rel_date = item['rel_date']
+
+            if updates:
+                buf = f"<b>Binance: {category['name']}</b>\n"
+                buf += '\n'.join(list(map(lambda a: f"<a href='{a['url']}'>{a['title']}</a>", updates)))
+                blocks.append(buf)
+
+        state['last_rel_date'] = last_rel_date
+
+        if blocks:
+            message = '\n\n'.join(blocks)
+            if print_to_stdout:
+                print(message)
+            else:
+                telegram_notify(text=message, parse_mode='HTML', disable_web_page_preview=True)
+    except:
+        if print_to_stdout:
+            traceback.print_exc()
+        else:
+            telegram_notify(text='error: ' + escape(traceback.format_exc()), parse_mode='HTML')


 if __name__ == '__main__':
@ -49,32 +82,5 @@ if __name__ == '__main__':
    parser.add_argument('--stdout', action='store_true')
    args = parser.parse_args()

-    state = State(default=dict(urls=[]))
-    try:
-        blocks = []
-        data = scrap_announcements()
-        for category in data:
-            updates = []
-            for item in category['news']:
-                if item['link'] not in state['urls']:
-                    updates.append(item)
-                    state['urls'].append(item['link'])
+    main(args.stdout)

-            if updates:
-                buf = f"<b>Binance: {category['title']}</b>\n"
-                buf += '\n'.join(list(map(lambda item: f"<a href='{item['link']}'>{item['text']}</a>", updates)))
-                blocks.append(buf)
-
-        if blocks:
-            message = '\n\n'.join(blocks)
-
-            if args.stdout:
-                print(message)
-            else:
-                telegram_notify(text=message, parse_mode='HTML', disable_web_page_preview=True)
-
-    except:
-        if args.stdout:
-            traceback.print_exc()
-        else:
-            telegram_notify(text='error: ' + escape(traceback.format_exc()), parse_mode='HTML')
--- a/requirements.txt
+++ b/requirements.txt
@ -1,3 +1,3 @@
 requests~=2.26.0
 beautifulsoup4~=4.10.0
-ch1p~=0.0.6
+ch1p~=0.0.7