240 lines
6.6 KiB
Python
Executable File
240 lines
6.6 KiB
Python
Executable File
#!/usr/bin/env python3
|
||
|
||
import argparse, sys, json, os, re
|
||
try:
|
||
from termcolor import cprint
|
||
colors_supported = True
|
||
except ImportError:
|
||
colors_supported = False
|
||
|
||
CWD = os.path.dirname(os.path.realpath(__file__))
|
||
|
||
def print_colored(s, color, fallback_prefix=''):
|
||
if colors_supported:
|
||
cprint(s, color)
|
||
else:
|
||
print(fallback_prefix + s)
|
||
|
||
def load_data():
|
||
with open(os.path.join(CWD, "data.json")) as f:
|
||
data = json.loads(f.read())
|
||
|
||
# ignore placeholders
|
||
data = list(filter(lambda i: i['text'] != '', data))
|
||
|
||
return data
|
||
|
||
def clean_string(s, remove_junk=False):
|
||
s = s.replace(')', ') ')
|
||
s = re.sub(r'(\!|\.)([^\)])', r'\1 \2', s)
|
||
#s = s.replace('/', ' ')
|
||
s = s.upper()
|
||
|
||
s = re.sub(r'\s+', ' ', s).strip()
|
||
|
||
junks = [
|
||
'ВОЕННОЕ',
|
||
'ВЫШЕСТОЯЩИХ',
|
||
'ПРАВО',
|
||
'ПРАВИЛАМ ВОЙНЫ',
|
||
'ВЫПИСКА',
|
||
'КОНТРОЛЬ',
|
||
'ИХ',
|
||
'ПО',
|
||
'НАВЫКИ',
|
||
'С ВЫШЕСТОЯЩИМИ',
|
||
#'ПРИСУТСТВИЕ',
|
||
#'ЛИНЕЙНО',
|
||
'ЗАКОННО!',
|
||
'ПОХЛЕБКА',
|
||
'СВЯЗЕЙ',
|
||
'ЖУЮЩЕГО ХРЯЩИ',
|
||
'ИНДЕКСИРОВАН БЕЗУКОРИЗНЕННО',
|
||
'ОТКЛАДЫВАЕТСЯ ЛИНЕЙНО',
|
||
'- ЕГО ВЕЛИЧЕСТВО',
|
||
'ГУБЕРНИЯ',
|
||
'С ВЫШЕСТОЯЩИМИ КОНТРОЛЬ',
|
||
'С ЛОКАЦИИ',
|
||
#'КАЗНЬ',
|
||
'ГУБЕРНИЯ',
|
||
'ПРОВЕРКИ',
|
||
'УСТАНОВЛЕНО',
|
||
'ПОБЕДИТЕЛЕМ',
|
||
#'СТАЛЬНЫЕ',
|
||
'НЕРВЫ',
|
||
'ДАРОВАНО',
|
||
#'ТРАНСПОРТИРОВКА',
|
||
'ОДОБРЕНО',
|
||
'ПРОЯВЛЕНИЯ',
|
||
'УЗАКОНЕНО',
|
||
'ИМЕЕТСЯ',
|
||
'ЗНАЛ',
|
||
'НЕ ПРИМЕЧЕНО',
|
||
'НА СЕВЕР',
|
||
'ПРИГОВОРИТЬ',
|
||
'ШЕСТВУЕМ',
|
||
'ДАГОН',
|
||
'ДА МЕРЗНУЩИЙ',
|
||
'КОФЕ',
|
||
#'РЕАГИРОВАНИЕ',
|
||
'УКАЗАНО',
|
||
'- ВЫСОКИЙ ТИТУЛ',
|
||
'ЗАКАЗ',
|
||
'ЧЕРТЫ ЛИЦА',
|
||
|
||
# english
|
||
'SCHOOL ON THE RIGHT',
|
||
'WILL NOT ALLOW',
|
||
'FLYWHEEL',
|
||
'TRIUMPHANTLY',
|
||
'BEING USED',
|
||
'NICE',
|
||
'UMBRELLA',
|
||
#'BIOROBOT',
|
||
'CONSERVATISM',
|
||
'WAS ESTABLISHED',
|
||
'WITH A PASSWORD',
|
||
'ANT',
|
||
'YEAR',
|
||
'RECOGNIZED',
|
||
'SEARCHED'
|
||
#'LEGAL',
|
||
#'FIGHTING'
|
||
]
|
||
|
||
# только без пробелов
|
||
junks_words = list(filter(lambda w: ' ' not in w, junks))
|
||
|
||
# только с пробелами
|
||
junks_nwords = list(filter(lambda w: w not in junks_words, junks))
|
||
|
||
if remove_junk:
|
||
s = s.split(' ')
|
||
s = list(filter(lambda l: re.sub(r'\.|\!$', '', l) not in junks_words, s))
|
||
s = ' '.join(s)
|
||
|
||
for j in junks_nwords:
|
||
s = s.replace(j, '')
|
||
|
||
# хортица - это буква Х
|
||
s = s.replace('Х О Р Т И Ц А', 'Х_О_Р_Т_И_Ц_А')
|
||
|
||
s = re.sub(r'\s+', ' ', s).strip()
|
||
return s
|
||
|
||
def decode(s, is_url=False):
|
||
buf = ''
|
||
for word in s.split(' '):
|
||
word = word.strip()
|
||
if word == '':
|
||
continue
|
||
|
||
if re.match(r'^\d+\%$', word):
|
||
buf += word
|
||
elif is_url and word.endswith('://'):
|
||
buf += word[0]
|
||
buf += '://'
|
||
else:
|
||
letter = word[0]
|
||
buf += letter
|
||
|
||
return buf
|
||
|
||
def decode2(s):
|
||
buf = ''
|
||
for s in re.split(r'[\?\.\!]+', s):
|
||
s = s.strip()
|
||
if s == '':
|
||
continue
|
||
|
||
words = s.split(' ')
|
||
|
||
letter = words[1][0]
|
||
buf += letter
|
||
|
||
return buf
|
||
|
||
|
||
# s: source
|
||
# t: type
|
||
def decode_auto(s, t, reverse_decoded=False, remove_junk=True):
|
||
if t == 1:
|
||
s = clean_string(s, remove_junk=remove_junk)
|
||
result = decode(s)
|
||
|
||
elif t == 2:
|
||
result = decode2(s)
|
||
|
||
if reverse_decoded:
|
||
# reverse string
|
||
result = result[::-1]
|
||
|
||
return result
|
||
|
||
|
||
def main():
|
||
parser = argparse.ArgumentParser()
|
||
parser.add_argument('--decode', action='store_true')
|
||
parser.add_argument('--stats', action='store_true')
|
||
parser.add_argument('--decode-string')
|
||
parser.add_argument('--decode-file')
|
||
parser.add_argument('--with-junk', action='store_true')
|
||
parser.add_argument('--is-url', action='store_true')
|
||
parser.add_argument('--type', type=int, choices=[1, 2], default=1)
|
||
parser.add_argument('--reverse-decoded', action='store_true')
|
||
|
||
args = parser.parse_args()
|
||
data = load_data()
|
||
|
||
if args.decode:
|
||
# filter by type
|
||
if args.type == 2:
|
||
data = list(filter(lambda i: 'type' in i and i['type'] == 2, data))
|
||
else:
|
||
data = list(filter(lambda i: 'type' not in i, data))
|
||
|
||
# sort by text length
|
||
data = sorted(data, key=lambda i: len(i['text']))
|
||
|
||
for obj in data:
|
||
text = obj['text']
|
||
text_decoded = decode_auto(text,
|
||
args.type,
|
||
remove_junk=(not args.with_junk),
|
||
reverse_decoded=args.reverse_decoded)
|
||
|
||
# print all information
|
||
print(obj['text'])
|
||
print_colored(text, 'green', fallback_prefix='[CLEANED] ')
|
||
print_colored(text_decoded, 'cyan', fallback_prefix='[DECODED] ')
|
||
|
||
if 'pic' in obj:
|
||
pic = obj['pic'] if isinstance(obj['pic'], list) else [obj['pic']]
|
||
print_colored(', '.join(pic), 'red', fallback_prefix='[PICS] ')
|
||
if 'link' in obj:
|
||
print_colored(obj['link'], 'red', fallback_prefix='[LINK] ')
|
||
|
||
print("\n")
|
||
|
||
elif args.decode_string or args.decode_file:
|
||
if args.decode_string:
|
||
source = args.decode_string
|
||
else:
|
||
with open(args.decode_file, 'r') as f:
|
||
source = f.read()
|
||
|
||
text_decoded = decode_auto(source,
|
||
args.type,
|
||
remove_junk=(not args.with_junk),
|
||
reverse_decoded=args.reverse_decoded)
|
||
|
||
# print
|
||
print_colored(text_decoded, 'cyan', fallback_prefix='[DECODED] ')
|
||
|
||
elif args.stats:
|
||
count = len(data)
|
||
print("Total texts: %s" % count)
|
||
|
||
if __name__ == '__main__':
|
||
sys.exit(main())
|