fix parsing, add telegram notifications

2021-09-16 17:01:10 +03:00 · 2021-09-16 17:01:10 +03:00 · 38f3737e72
commit 38f3737e72
parent c2dbff2e2a
5 changed files with 81 additions and 48 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
 .idea
--- a/README.md
+++ b/README.md
@ -1,8 +1,11 @@
 # Binance-Announcements
 Web scraping for getting notifications on new announcements
-Like it or not, everything Binance touches turns to gold. New listings on their platform are a great opportunity for traders, so they'd better be well-informed.
+Fork of https://github.com/darroyolpz/Binance-Announcements.
-CHZ coin was listed on their platform yesterday, but unfortunately I missed the announcement on their webpage.
+- Fixed html layout parsing
 - Removed discord
 - Added Telegram notifications
-In order to not happen again, I've created a short script to get the notifications automatically from Binance page. It will run indefinitely in my computer, so everytime they release a new announcement, I'll receive a tweet and a telegram message.
+## License
 MIT
--- a/binance-announcement-scraping-bot.py
+++ b/binance-announcement-scraping-bot.py
@ -0,0 +1,70 @@
 #!usr/bin/env python3
 import sys, traceback
 from requests import get
 from bs4 import BeautifulSoup
 from ch1p import State, telegram_notify
 from html import escape
 def scrap_announcements():
    url = "https://www.binance.com/en/support/announcement"
    response = get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    data = []
    total_news = 0
    categories_list = soup.find_all(class_='css-wmvdm0')
    for c in categories_list:
        category_title = c.select('h2[data-bn-type="text"]')[0].text
        category_data = {
            'title': category_title,
            'news': []
        }
        for link in c.find_next('div').select('a[data-bn-type="link"]'):
            id = link.get('id')
            if id is None:
                continue
            if not link.get('id').startswith('supportList'):
                continue
            category_data['news'].append({
                'text': link.text,
                'link': link.get('href')
            })
            total_news += 1
        data.append(category_data)
    if not total_news:
        raise RuntimeError('failed to find any articles')
    return data
 if __name__ == '__main__':
    state = State(default=dict(urls=[]))
    try:
        blocks = []
        data = scrap_announcements()
        for category in data:
            updates = []
            for item in category['news']:
                if item['link'] not in state['urls']:
                    updates.append(item)
                    state['urls'].append(item['link'])
            if updates:
                buf = f"<i>{category['title']}</i>\n"
                buf += '\n'.join(list(map(lambda item: f"<a href=\"{item['link']}\">{item['text']}</a>", updates)))
                blocks.append(buf)
        if blocks:
            message = '<b>Binance Announcements</b>\n\n'
            message += '\n\n'.join(blocks)
            telegram_notify(text=message, parse_mode='HTML')
    except:
        telegram_notify(text='error: ' + escape(traceback.format_exc()), parse_mode='HTML')
--- a/binance-scraping-bot.py
+++ b/binance-scraping-bot.py
@ -1,44 +0,0 @@
 import os, requests
 import pandas as pd
 from bs4 import BeautifulSoup
 from requests import get
 from discord_webhook import DiscordWebhook
 # Webhook settings
 url_wb = os.environ.get('DISCORD_WH')
 # Data for the scrap
 url = "https://www.binance.com/en/support/announcement"
 response = get(url)
 soup = BeautifulSoup(response.text, 'html.parser')
 news_list = soup.find_all(class_ = 'css-sbrje5')
 # Create a bag of key words for getting matches
 key_words = ['list', 'token sale', 'open trading', 'opens trading', 'perpetual', 'defi', 'uniswap', 'airdrop']
 # Open old database file
 path = "/home/pi/OpenAlpha/db.xlsx"
 df = pd.read_excel(path)
 # Empty list
 updated_list = []
 for news in news_list:
 	article_text = news.text
 	# Check for matchings
 	for item in key_words:
 		if (item in article_text.lower()) and (article_text not in df.values):
 			article_link = 'https://www.binance.com' + news.get('href')
 			msg = article_text + '\n' + article_link
 			updated_list.append([article_text, article_link])
 			print(article_text)
 			# Send message to Discord server
 			webhook = DiscordWebhook(url=url_wb, content=msg)
 			response = webhook.execute()
 # Export updated news to Excel
 cols = ['Text', 'Link']
 df = df.append(pd.DataFrame(updated_list, columns=cols), ignore_index = True)
 df.to_excel(path, index = False)
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,3 @@
 requests~=2.26.0
 beautifulsoup4~=4.10.0
 ch1p~=0.0.5