From 5f53513a64c0ce3bf702183963dafb50e87a60ca Mon Sep 17 00:00:00 2001 From: bakatrouble Date: Wed, 27 Nov 2019 23:10:15 +0300 Subject: [PATCH] use markovify --- bots/modules/_mtg.py | 74 --------------------------------------- bots/modules/cyberlina.py | 8 ++--- requirements.txt | 2 ++ 3 files changed, 5 insertions(+), 79 deletions(-) delete mode 100644 bots/modules/_mtg.py diff --git a/bots/modules/_mtg.py b/bots/modules/_mtg.py deleted file mode 100644 index a1d7cb3..0000000 --- a/bots/modules/_mtg.py +++ /dev/null @@ -1,74 +0,0 @@ -import random -# import re -import json -import os # listdir, path.join, path.isdir - -# from collections import Counter - -# punctuation to remove -RM_PUNCT = u"'\"”„+-—:;()" - - -# Markov random Text Generator -class MTG(object): - def __init__(self, data: dict): - self.MC = data or {} - - def train_from_seq(self, word_seq): - for w1, w2, w3 in self.triples(word_seq): - key = w1 + '+' + w2 - if key not in self.MC: - self.MC[key] = [] - self.MC[key].append(w3) - - def train_from_text(self, text, remove=RM_PUNCT): - text = text.replace('.', ' . ').replace(',', ' , ') - word_seq = text.strip().split() - word_seq = [word.strip(remove) for word in word_seq if len(word.strip(remove)) != 0] - self.train_from_seq(word_seq) - - # add data form another MTG - def __iadd__(self, other): - for key, value in other.MC.items(): - if key not in self.MC: - self.MC[key] = [] - self.MC[key].extend(value) - - def save(self): - return self.MC - - # generate all triples to build MC - - def triples(self, word_seq): - if len(word_seq) < 3: - return - for i in range(len(word_seq) - 2): - yield (word_seq[i], word_seq[i + 1], word_seq[i + 2]) - - def choose_start_words(self, first_word=None, second_word=None): - if second_word is not None: - return first_word, second_word - elif first_word is not None: - word = '.' + '+' + first_word - word_cap = '.' + '+' + first_word.capitalize() - if word in self.MC and len(self.MC[word]): - second_word = random.choice(self.MC[word]) - elif word_cap in self.MC and len(self.MC[word_cap]): - second_word = random.choice(self.MC[word_cap]) - else: - return self.choose_start_words() - return first_word, second_word - else: - first_word = random.choice(self.MC[random.choice(list(self.MC.keys()))]) - return self.choose_start_words(first_word, second_word) - - def generate_text(self, first_word=None, second_word=None, size=30): - w1, w2 = self.choose_start_words(first_word, second_word) - gen_words = [w1.capitalize()] - while not (len(gen_words) > size and w2 == '.'): - gen_words.append(w2.capitalize() if (w1 == '.') else w2) - w1, w2 = w2, random.choice(self.MC[w1 + '+' + w2]) - gen_words.append(w2) - text = ' '.join(gen_words) - text = text.replace(' ,', ',').replace(' .', '.') - return text diff --git a/bots/modules/cyberlina.py b/bots/modules/cyberlina.py index 8525a36..cccbc73 100644 --- a/bots/modules/cyberlina.py +++ b/bots/modules/cyberlina.py @@ -4,6 +4,7 @@ from random import choice, seed from uuid import uuid4 import humanize +import markovify from django.db import models from django.utils.timezone import localdate, now, make_aware from jsoneditor.forms import JSONEditor @@ -14,7 +15,6 @@ from telegram.ext import Dispatcher, CallbackContext, MessageHandler, Filters, C from telegram.utils.helpers import mention_html from bots.models import TelegramBotModuleConfig -from bots.modules._mtg import MTG class CyberLinaBotModuleConfig(TelegramBotModuleConfig): @@ -42,7 +42,7 @@ class CyberLinaBotModuleConfig(TelegramBotModuleConfig): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.mtg = MTG(self._mtg_data) + self.mtg = markovify.Text.from_dict(self._mtg_data) def message_handler(self, update: Update, ctx: CallbackContext): if not update.effective_chat or not update.effective_user: @@ -114,9 +114,7 @@ class CyberLinaBotModuleConfig(TelegramBotModuleConfig): def save(self, force_insert=False, force_update=False, using=None, update_fields=None): if self.mtg_train: - self.mtg.train_from_text(self.mtg_train) - self._mtg_data = self.mtg.save() - self.mtg_train = None + self._mtg_data = markovify.Text(self.mtg_train).to_dict() super().save(force_insert, force_update, using, update_fields) diff --git a/requirements.txt b/requirements.txt index e73f11b..fdcf8a0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -43,6 +43,7 @@ json-rpc==1.12.1 jsonfield2==3.0.3 jsonschema==3.2.0 kombu==4.2.2.post1 +markovify==0.7.2 more-itertools==7.2.0 oauthlib==3.0.1 packaging==19.2 @@ -79,6 +80,7 @@ TgCrypto==1.1.1 tornado==6.0.3 Twisted==19.10.0 txaio==18.8.1 +Unidecode==1.1.1 urllib3==1.24.1 vine==1.2.0 vk-api==11.6.1