fix parsing, add telegram notifications

This commit is contained in:
Evgeny Zinoviev 2021-09-16 17:01:10 +03:00
parent c2dbff2e2a
commit 38f3737e72
5 changed files with 81 additions and 48 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
.idea

View File

@ -1,8 +1,11 @@
# Binance-Announcements
Web scraping for getting notifications on new announcements
Like it or not, everything Binance touches turns to gold. New listings on their platform are a great opportunity for traders, so they'd better be well-informed.
Fork of https://github.com/darroyolpz/Binance-Announcements.
CHZ coin was listed on their platform yesterday, but unfortunately I missed the announcement on their webpage.
- Fixed html layout parsing
- Removed discord
- Added Telegram notifications
In order to not happen again, I've created a short script to get the notifications automatically from Binance page. It will run indefinitely in my computer, so everytime they release a new announcement, I'll receive a tweet and a telegram message.
## License
MIT

View File

@ -0,0 +1,70 @@
#!usr/bin/env python3
import sys, traceback
from requests import get
from bs4 import BeautifulSoup
from ch1p import State, telegram_notify
from html import escape
def scrap_announcements():
url = "https://www.binance.com/en/support/announcement"
response = get(url)
soup = BeautifulSoup(response.text, 'html.parser')
data = []
total_news = 0
categories_list = soup.find_all(class_='css-wmvdm0')
for c in categories_list:
category_title = c.select('h2[data-bn-type="text"]')[0].text
category_data = {
'title': category_title,
'news': []
}
for link in c.find_next('div').select('a[data-bn-type="link"]'):
id = link.get('id')
if id is None:
continue
if not link.get('id').startswith('supportList'):
continue
category_data['news'].append({
'text': link.text,
'link': link.get('href')
})
total_news += 1
data.append(category_data)
if not total_news:
raise RuntimeError('failed to find any articles')
return data
if __name__ == '__main__':
state = State(default=dict(urls=[]))
try:
blocks = []
data = scrap_announcements()
for category in data:
updates = []
for item in category['news']:
if item['link'] not in state['urls']:
updates.append(item)
state['urls'].append(item['link'])
if updates:
buf = f"<i>{category['title']}</i>\n"
buf += '\n'.join(list(map(lambda item: f"<a href=\"{item['link']}\">{item['text']}</a>", updates)))
blocks.append(buf)
if blocks:
message = '<b>Binance Announcements</b>\n\n'
message += '\n\n'.join(blocks)
telegram_notify(text=message, parse_mode='HTML')
except:
telegram_notify(text='error: ' + escape(traceback.format_exc()), parse_mode='HTML')

View File

@ -1,44 +0,0 @@
import os, requests
import pandas as pd
from bs4 import BeautifulSoup
from requests import get
from discord_webhook import DiscordWebhook
# Webhook settings
url_wb = os.environ.get('DISCORD_WH')
# Data for the scrap
url = "https://www.binance.com/en/support/announcement"
response = get(url)
soup = BeautifulSoup(response.text, 'html.parser')
news_list = soup.find_all(class_ = 'css-sbrje5')
# Create a bag of key words for getting matches
key_words = ['list', 'token sale', 'open trading', 'opens trading', 'perpetual', 'defi', 'uniswap', 'airdrop']
# Open old database file
path = "/home/pi/OpenAlpha/db.xlsx"
df = pd.read_excel(path)
# Empty list
updated_list = []
for news in news_list:
article_text = news.text
# Check for matchings
for item in key_words:
if (item in article_text.lower()) and (article_text not in df.values):
article_link = 'https://www.binance.com' + news.get('href')
msg = article_text + '\n' + article_link
updated_list.append([article_text, article_link])
print(article_text)
# Send message to Discord server
webhook = DiscordWebhook(url=url_wb, content=msg)
response = webhook.execute()
# Export updated news to Excel
cols = ['Text', 'Link']
df = df.append(pd.DataFrame(updated_list, columns=cols), ignore_index = True)
df.to_excel(path, index = False)

3
requirements.txt Normal file
View File

@ -0,0 +1,3 @@
requests~=2.26.0
beautifulsoup4~=4.10.0
ch1p~=0.0.5