This commit is contained in:
Evgeny Zinoviev 2022-03-16 16:31:39 +03:00
parent 2dd41e65a3
commit 35d86a39e2
3 changed files with 22 additions and 12 deletions

9
mgs.py
View File

@ -1,4 +1,10 @@
import requests, textract, re, os, tempfile, random, string
import requests
import textract
import re
import os
import tempfile
import random
import string
from bs4 import BeautifulSoup
from typing import List, Dict
@ -13,6 +19,7 @@ regex = r"""(?i)\b((?:https?:(?:/{1,3}|[a-z0-9%])|[a-z0-9.\-]+[.](?:com|net|org|
def strgen(n: int) -> str:
return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(n)).lower()
def get_links(s: str) -> List[str]:
return list(set(re.findall(regex, s)))

View File

@ -1,4 +1,4 @@
requests~=2.25.1
textract~=1.6.3
beautifulsoup4~=4.8.0
ch1p~=0.0.4
ch1p~=0.0.7

View File

@ -24,13 +24,13 @@ if __name__ == '__main__':
cases = mgs.get_cases()
# read state
jst = State(file=args.state_file, default=dict(cases=[]))
data = jst.read()
state = State(file=args.state_file,
default=dict(cases=[]))
# loop through cases
results = []
for case in cases:
if case['statement_number'] in data['cases']:
if case['statement_number'] in state['cases']:
continue
matched = False
@ -38,19 +38,22 @@ if __name__ == '__main__':
if mydomain in case['decision_text']:
matched = True
results.append('%s found in %s' % (mydomain, case['statement_number']))
data['cases'].append(case['statement_number'])
state['cases'].append(case['statement_number'])
if matched:
break
# remember found cases
jst.write(data)
# if found anything, send to telegram
if results:
text = '\n'.join(results)
text = 'new mos-gorsud findings:\n' + text
text = 'new MGS findings:\n' + text
telegram_notify(text=escape(text), parse_mode='HTML', token=args.token, chat_id=args.chat_id)
telegram_notify(text=escape(text),
parse_mode='HTML',
token=args.token,
chat_id=args.chat_id)
except:
telegram_notify(text='error: '+escape(traceback.format_exc()), parse_mode='HTML', token=args.token, chat_id=args.chat_id)
telegram_notify(text='error: '+escape(traceback.format_exc()),
parse_mode='HTML',
token=args.token,
chat_id=args.chat_id)