From 61a8028ffefdf79a6eed46d727d0da3e80f0311f Mon Sep 17 00:00:00 2001 From: bakatrouble Date: Wed, 27 Nov 2019 22:23:57 +0300 Subject: [PATCH] add markov to cyberlina --- bots/modules/_mtg.py | 72 +++++++++++++++++++++++++++++++++++++++ bots/modules/cyberlina.py | 22 +++++++++++- 2 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 bots/modules/_mtg.py diff --git a/bots/modules/_mtg.py b/bots/modules/_mtg.py new file mode 100644 index 0000000..b4434e9 --- /dev/null +++ b/bots/modules/_mtg.py @@ -0,0 +1,72 @@ +import random +# import re +import json +import os # listdir, path.join, path.isdir + +# from collections import Counter + +# punctuation to remove +RM_PUNCT = u"'\"”„+-—:;()" + + +# Markov random Text Generator +class MTG(object): + def __init__(self, data: dict): + self.MC = data or {} + + def train_from_seq(self, word_seq): + for w1, w2, w3 in self.triples(word_seq): + key = w1 + '+' + w2 + if key not in self.MC: + self.MC[key] = [] + self.MC[key].append(w3) + + def train_from_text(self, text, remove=RM_PUNCT): + text = text.replace('.', ' . ').replace(',', ' , ') + word_seq = text.strip().split() + word_seq = [word.strip(remove) for word in word_seq if len(word.strip(remove)) != 0] + self.train_from_seq(word_seq) + + # add data form another MTG + def __iadd__(self, other): + for key, value in other.MC.items(): + if key not in self.MC: + self.MC[key] = [] + self.MC[key].extend(value) + + def save(self): + return self.MC + + # generate all triples to build MC + + def triples(self, word_seq): + if len(word_seq) < 3: + return + for i in range(len(word_seq) - 2): + yield (word_seq[i], word_seq[i + 1], word_seq[i + 2]) + + def choose_start_words(self, first_word=None, second_word=None): + if second_word is not None: + return first_word, second_word + elif first_word is not None: + if len(self.MC['.' + '+' + first_word]) != 0: + second_word = random.choice(self.MC['.' + '+' + first_word]) + elif len(self.MC['.' + '+' + first_word.capitalize()]) != 0: + second_word = random.choice(self.MC['.' + '+' + first_word.capitalize()]) + else: + return self.choose_start_words() + return first_word, second_word + else: + first_word = random.choice(self.MC[random.choice(self.MC.keys())]) + return self.choose_start_words(first_word, second_word) + + def generate_text(self, first_word=None, second_word=None, size=30): + w1, w2 = self.choose_start_words(first_word, second_word) + gen_words = [w1.capitalize()] + while not (len(gen_words) > size and w2 == '.'): + gen_words.append(w2.capitalize() if (w1 == '.') else w2) + w1, w2 = w2, random.choice(self.MC[w1 + '+' + w2]) + gen_words.append(w2) + text = ' '.join(gen_words) + text = text.replace(' ,', ',').replace(' .', '.') + return text diff --git a/bots/modules/cyberlina.py b/bots/modules/cyberlina.py index 72c07e5..8525a36 100644 --- a/bots/modules/cyberlina.py +++ b/bots/modules/cyberlina.py @@ -14,6 +14,7 @@ from telegram.ext import Dispatcher, CallbackContext, MessageHandler, Filters, C from telegram.utils.helpers import mention_html from bots.models import TelegramBotModuleConfig +from bots.modules._mtg import MTG class CyberLinaBotModuleConfig(TelegramBotModuleConfig): @@ -24,6 +25,8 @@ class CyberLinaBotModuleConfig(TelegramBotModuleConfig): already_ran = JSONField(default='[]') welcome_reactions = JSONField(default='[]') inline_reactions = JSONField(default='[]') + _mtg_data = JSONField(default={}, blank=True, null=True) + mtg_train = models.TextField(null=True, blank=True) MODULE_NAME = 'Киберлиночка' CUSTOM_WIDGETS = { @@ -34,8 +37,13 @@ class CyberLinaBotModuleConfig(TelegramBotModuleConfig): 'already_ran': JSONEditor(), 'welcome_reactions': JSONEditor(), 'inline_reactions': JSONEditor(), + '_mtg_data': JSONEditor(), } + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.mtg = MTG(self._mtg_data) + def message_handler(self, update: Update, ctx: CallbackContext): if not update.effective_chat or not update.effective_user: return @@ -89,7 +97,12 @@ class CyberLinaBotModuleConfig(TelegramBotModuleConfig): id=uuid4(), title='Не нажимай >_<', input_message_content=InputTextMessageContent(choice(self.inline_reactions)) - ) + ), + InlineQueryResultArticle( + id=uuid4(), + title='ФлаБеПроЛейка', + input_message_content=InputTextMessageContent(choice(self.mtg.generate_text())) + ), ] update.inline_query.answer(results, cache_time=0) @@ -99,6 +112,13 @@ class CyberLinaBotModuleConfig(TelegramBotModuleConfig): dispatcher.add_handler(InlineQueryHandler(self.inline_query_handler)) return dispatcher + def save(self, force_insert=False, force_update=False, using=None, update_fields=None): + if self.mtg_train: + self.mtg.train_from_text(self.mtg_train) + self._mtg_data = self.mtg.save() + self.mtg_train = None + super().save(force_insert, force_update, using, update_fields) + class CyberLinaChat(models.Model): config = models.ForeignKey(CyberLinaBotModuleConfig, on_delete=models.CASCADE, related_name='chats')