use markovify

This commit is contained in:
bakatrouble 2019-11-27 23:10:15 +03:00
parent c90445aaf0
commit 5f53513a64
3 changed files with 5 additions and 79 deletions

View File

@ -1,74 +0,0 @@
import random
# import re
import json
import os # listdir, path.join, path.isdir
# from collections import Counter
# punctuation to remove
RM_PUNCT = u"'\"”„+-—:;()"
# Markov random Text Generator
class MTG(object):
def __init__(self, data: dict):
self.MC = data or {}
def train_from_seq(self, word_seq):
for w1, w2, w3 in self.triples(word_seq):
key = w1 + '+' + w2
if key not in self.MC:
self.MC[key] = []
self.MC[key].append(w3)
def train_from_text(self, text, remove=RM_PUNCT):
text = text.replace('.', ' . ').replace(',', ' , ')
word_seq = text.strip().split()
word_seq = [word.strip(remove) for word in word_seq if len(word.strip(remove)) != 0]
self.train_from_seq(word_seq)
# add data form another MTG
def __iadd__(self, other):
for key, value in other.MC.items():
if key not in self.MC:
self.MC[key] = []
self.MC[key].extend(value)
def save(self):
return self.MC
# generate all triples to build MC
def triples(self, word_seq):
if len(word_seq) < 3:
return
for i in range(len(word_seq) - 2):
yield (word_seq[i], word_seq[i + 1], word_seq[i + 2])
def choose_start_words(self, first_word=None, second_word=None):
if second_word is not None:
return first_word, second_word
elif first_word is not None:
word = '.' + '+' + first_word
word_cap = '.' + '+' + first_word.capitalize()
if word in self.MC and len(self.MC[word]):
second_word = random.choice(self.MC[word])
elif word_cap in self.MC and len(self.MC[word_cap]):
second_word = random.choice(self.MC[word_cap])
else:
return self.choose_start_words()
return first_word, second_word
else:
first_word = random.choice(self.MC[random.choice(list(self.MC.keys()))])
return self.choose_start_words(first_word, second_word)
def generate_text(self, first_word=None, second_word=None, size=30):
w1, w2 = self.choose_start_words(first_word, second_word)
gen_words = [w1.capitalize()]
while not (len(gen_words) > size and w2 == '.'):
gen_words.append(w2.capitalize() if (w1 == '.') else w2)
w1, w2 = w2, random.choice(self.MC[w1 + '+' + w2])
gen_words.append(w2)
text = ' '.join(gen_words)
text = text.replace(' ,', ',').replace(' .', '.')
return text

View File

@ -4,6 +4,7 @@ from random import choice, seed
from uuid import uuid4 from uuid import uuid4
import humanize import humanize
import markovify
from django.db import models from django.db import models
from django.utils.timezone import localdate, now, make_aware from django.utils.timezone import localdate, now, make_aware
from jsoneditor.forms import JSONEditor from jsoneditor.forms import JSONEditor
@ -14,7 +15,6 @@ from telegram.ext import Dispatcher, CallbackContext, MessageHandler, Filters, C
from telegram.utils.helpers import mention_html from telegram.utils.helpers import mention_html
from bots.models import TelegramBotModuleConfig from bots.models import TelegramBotModuleConfig
from bots.modules._mtg import MTG
class CyberLinaBotModuleConfig(TelegramBotModuleConfig): class CyberLinaBotModuleConfig(TelegramBotModuleConfig):
@ -42,7 +42,7 @@ class CyberLinaBotModuleConfig(TelegramBotModuleConfig):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
self.mtg = MTG(self._mtg_data) self.mtg = markovify.Text.from_dict(self._mtg_data)
def message_handler(self, update: Update, ctx: CallbackContext): def message_handler(self, update: Update, ctx: CallbackContext):
if not update.effective_chat or not update.effective_user: if not update.effective_chat or not update.effective_user:
@ -114,9 +114,7 @@ class CyberLinaBotModuleConfig(TelegramBotModuleConfig):
def save(self, force_insert=False, force_update=False, using=None, update_fields=None): def save(self, force_insert=False, force_update=False, using=None, update_fields=None):
if self.mtg_train: if self.mtg_train:
self.mtg.train_from_text(self.mtg_train) self._mtg_data = markovify.Text(self.mtg_train).to_dict()
self._mtg_data = self.mtg.save()
self.mtg_train = None
super().save(force_insert, force_update, using, update_fields) super().save(force_insert, force_update, using, update_fields)

View File

@ -43,6 +43,7 @@ json-rpc==1.12.1
jsonfield2==3.0.3 jsonfield2==3.0.3
jsonschema==3.2.0 jsonschema==3.2.0
kombu==4.2.2.post1 kombu==4.2.2.post1
markovify==0.7.2
more-itertools==7.2.0 more-itertools==7.2.0
oauthlib==3.0.1 oauthlib==3.0.1
packaging==19.2 packaging==19.2
@ -79,6 +80,7 @@ TgCrypto==1.1.1
tornado==6.0.3 tornado==6.0.3
Twisted==19.10.0 Twisted==19.10.0
txaio==18.8.1 txaio==18.8.1
Unidecode==1.1.1
urllib3==1.24.1 urllib3==1.24.1
vine==1.2.0 vine==1.2.0
vk-api==11.6.1 vk-api==11.6.1