triumfalno/main.py

#!/usr/bin/env python3

import argparse, sys, json, os, re
try:
    from termcolor import cprint
    colors_supported = True
except ImportError:
    colors_supported = False

CWD = os.path.dirname(os.path.realpath(__file__))

def print_colored(s, color, fallback_prefix=''):
    if colors_supported:
        cprint(s, color)
    else:
        print(fallback_prefix + s)

def load_data():
    with open(os.path.join(CWD, "data.json")) as f:
        data = json.loads(f.read())

    # ignore placeholders
    data = list(filter(lambda i: i['text'] != '', data))

    return data

def clean_string(s, remove_junk=False):
    s = s.replace(')', ') ')
    s = re.sub(r'(\!|\.)([^\)])', r'\1 \2', s)
    #s = s.replace('/', ' ')
    s = s.upper()

    s = re.sub(r'\s+', ' ', s).strip()

    junks = [
        'ВОЕННОЕ',
        'ВЫШЕСТОЯЩИХ',
        'ПРАВО',
        'ПРАВИЛАМ ВОЙНЫ',
        'ВЫПИСКА',
        'КОНТРОЛЬ',
        'ИХ',
        'ПО',
        'НАВЫКИ',
        'С ВЫШЕСТОЯЩИМИ',
        #'ПРИСУТСТВИЕ',
        #'ЛИНЕЙНО',
        'ЗАКОННО!',
        'ПОХЛЕБКА',
        'СВЯЗЕЙ',
        'ЖУЮЩЕГО ХРЯЩИ',
        'ИНДЕКСИРОВАН БЕЗУКОРИЗНЕННО',
        'ОТКЛАДЫВАЕТСЯ ЛИНЕЙНО',
        '- ЕГО ВЕЛИЧЕСТВО',
        'ГУБЕРНИЯ',
        'С ВЫШЕСТОЯЩИМИ КОНТРОЛЬ',
        'С ЛОКАЦИИ',
        #'КАЗНЬ',
        'ГУБЕРНИЯ',
        'ПРОВЕРКИ',
        'УСТАНОВЛЕНО',
        'ПОБЕДИТЕЛЕМ',
        #'СТАЛЬНЫЕ',
        'НЕРВЫ',
        'ДАРОВАНО',
        #'ТРАНСПОРТИРОВКА',
        'ОДОБРЕНО',
        'ПРОЯВЛЕНИЯ',
        'УЗАКОНЕНО',
        'ИМЕЕТСЯ',
        'ЗНАЛ',
        'НЕ ПРИМЕЧЕНО',
        'НА СЕВЕР',
        'ПРИГОВОРИТЬ',
        'ШЕСТВУЕМ',
        'ДАГОН',
        'ДА МЕРЗНУЩИЙ',
        'КОФЕ',
        #'РЕАГИРОВАНИЕ',
        'УКАЗАНО',
        '- ВЫСОКИЙ ТИТУЛ',
        'ЗАКАЗ',
        'ЧЕРТЫ ЛИЦА',

        # english
        'SCHOOL ON THE RIGHT',
        'WILL NOT ALLOW',
        'FLYWHEEL',
        'TRIUMPHANTLY',
        'BEING USED',
        'NICE',
        'UMBRELLA',
        #'BIOROBOT',
        'CONSERVATISM',
        'WAS ESTABLISHED',
        'WITH A PASSWORD',
        'ANT',
        'YEAR',
        'RECOGNIZED',
        'SEARCHED'
        #'LEGAL',
        #'FIGHTING'
    ]

    # только без пробелов
    junks_words = list(filter(lambda w: ' ' not in w, junks))

    # только с пробелами
    junks_nwords = list(filter(lambda w: w not in junks_words, junks))

    if remove_junk:
        s = s.split(' ')
        s = list(filter(lambda l: re.sub(r'\.|\!$', '', l) not in junks_words, s))
        s = ' '.join(s)

        for j in junks_nwords:
            s = s.replace(j, '')

        # хортица - это буква Х
        s = s.replace('Х О Р Т И Ц А', 'Х_О_Р_Т_И_Ц_А')

    s = re.sub(r'\s+', ' ', s).strip()
    return s

def decode(s, is_url=False):
    buf = ''
    for word in s.split(' '):
        word = word.strip()
        if word == '':
            continue

        if re.match(r'^\d+\%$', word):
            buf += word
        elif is_url and word.endswith('://'):
            buf += word[0]
            buf += '://'
        else:
            letter = word[0]
            buf += letter

    return buf

def decode2(s):
    buf = ''
    for s in re.split(r'[\?\.\!]+', s):
        s = s.strip()
        if s == '':
            continue

        words = s.split(' ')

        letter = words[1][0]
        buf += letter

    return buf


# s: source
# t: type
def decode_auto(s, t, reverse_decoded=False, remove_junk=True):
    if t == 1:
        s = clean_string(s, remove_junk=remove_junk)
        result = decode(s)

    elif t == 2:
        result = decode2(s)

    if reverse_decoded:
        # reverse string
        result = result[::-1]

    return result


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--decode', action='store_true')
    parser.add_argument('--stats', action='store_true')
    parser.add_argument('--decode-string')
    parser.add_argument('--decode-file')
    parser.add_argument('--with-junk', action='store_true')
    parser.add_argument('--is-url', action='store_true')
    parser.add_argument('--type', type=int, choices=[1, 2], default=1)
    parser.add_argument('--reverse-decoded', action='store_true')

    args = parser.parse_args()
    data = load_data()

    if args.decode:
        # filter by type
        if args.type == 2:
            data = list(filter(lambda i: 'type' in i and i['type'] == 2, data))
        else:
            data = list(filter(lambda i: 'type' not in i, data))

        # sort by text length
        data = sorted(data, key=lambda i: len(i['text']))

        for obj in data:
            text = obj['text']
            text_decoded = decode_auto(text,
                                      args.type,
                                      remove_junk=(not args.with_junk),
                                      reverse_decoded=args.reverse_decoded)

            # print all information
            print(obj['text'])
            print_colored(text, 'green', fallback_prefix='[CLEANED] ')
            print_colored(text_decoded, 'cyan', fallback_prefix='[DECODED] ')

            if 'pic' in obj:
                pic = obj['pic'] if isinstance(obj['pic'], list) else [obj['pic']]
                print_colored(', '.join(pic), 'red', fallback_prefix='[PICS] ')
            if 'link' in obj:
                print_colored(obj['link'], 'red', fallback_prefix='[LINK] ')

            print("\n")

    elif args.decode_string or args.decode_file:
        if args.decode_string:
            source = args.decode_string
        else:
            with open(args.decode_file, 'r') as f:
                source = f.read()

        text_decoded = decode_auto(source,
                                   args.type,
                                   remove_junk=(not args.with_junk),
                                   reverse_decoded=args.reverse_decoded)

        # print
        print_colored(text_decoded, 'cyan', fallback_prefix='[DECODED] ')

    elif args.stats:
        count = len(data)
        print("Total texts: %s" % count)

if __name__ == '__main__':
    sys.exit(main())