use markovify
This commit is contained in:
parent
c90445aaf0
commit
5f53513a64
@ -1,74 +0,0 @@
|
|||||||
import random
|
|
||||||
# import re
|
|
||||||
import json
|
|
||||||
import os # listdir, path.join, path.isdir
|
|
||||||
|
|
||||||
# from collections import Counter
|
|
||||||
|
|
||||||
# punctuation to remove
|
|
||||||
RM_PUNCT = u"'\"”„+-—:;()"
|
|
||||||
|
|
||||||
|
|
||||||
# Markov random Text Generator
|
|
||||||
class MTG(object):
|
|
||||||
def __init__(self, data: dict):
|
|
||||||
self.MC = data or {}
|
|
||||||
|
|
||||||
def train_from_seq(self, word_seq):
|
|
||||||
for w1, w2, w3 in self.triples(word_seq):
|
|
||||||
key = w1 + '+' + w2
|
|
||||||
if key not in self.MC:
|
|
||||||
self.MC[key] = []
|
|
||||||
self.MC[key].append(w3)
|
|
||||||
|
|
||||||
def train_from_text(self, text, remove=RM_PUNCT):
|
|
||||||
text = text.replace('.', ' . ').replace(',', ' , ')
|
|
||||||
word_seq = text.strip().split()
|
|
||||||
word_seq = [word.strip(remove) for word in word_seq if len(word.strip(remove)) != 0]
|
|
||||||
self.train_from_seq(word_seq)
|
|
||||||
|
|
||||||
# add data form another MTG
|
|
||||||
def __iadd__(self, other):
|
|
||||||
for key, value in other.MC.items():
|
|
||||||
if key not in self.MC:
|
|
||||||
self.MC[key] = []
|
|
||||||
self.MC[key].extend(value)
|
|
||||||
|
|
||||||
def save(self):
|
|
||||||
return self.MC
|
|
||||||
|
|
||||||
# generate all triples to build MC
|
|
||||||
|
|
||||||
def triples(self, word_seq):
|
|
||||||
if len(word_seq) < 3:
|
|
||||||
return
|
|
||||||
for i in range(len(word_seq) - 2):
|
|
||||||
yield (word_seq[i], word_seq[i + 1], word_seq[i + 2])
|
|
||||||
|
|
||||||
def choose_start_words(self, first_word=None, second_word=None):
|
|
||||||
if second_word is not None:
|
|
||||||
return first_word, second_word
|
|
||||||
elif first_word is not None:
|
|
||||||
word = '.' + '+' + first_word
|
|
||||||
word_cap = '.' + '+' + first_word.capitalize()
|
|
||||||
if word in self.MC and len(self.MC[word]):
|
|
||||||
second_word = random.choice(self.MC[word])
|
|
||||||
elif word_cap in self.MC and len(self.MC[word_cap]):
|
|
||||||
second_word = random.choice(self.MC[word_cap])
|
|
||||||
else:
|
|
||||||
return self.choose_start_words()
|
|
||||||
return first_word, second_word
|
|
||||||
else:
|
|
||||||
first_word = random.choice(self.MC[random.choice(list(self.MC.keys()))])
|
|
||||||
return self.choose_start_words(first_word, second_word)
|
|
||||||
|
|
||||||
def generate_text(self, first_word=None, second_word=None, size=30):
|
|
||||||
w1, w2 = self.choose_start_words(first_word, second_word)
|
|
||||||
gen_words = [w1.capitalize()]
|
|
||||||
while not (len(gen_words) > size and w2 == '.'):
|
|
||||||
gen_words.append(w2.capitalize() if (w1 == '.') else w2)
|
|
||||||
w1, w2 = w2, random.choice(self.MC[w1 + '+' + w2])
|
|
||||||
gen_words.append(w2)
|
|
||||||
text = ' '.join(gen_words)
|
|
||||||
text = text.replace(' ,', ',').replace(' .', '.')
|
|
||||||
return text
|
|
@ -4,6 +4,7 @@ from random import choice, seed
|
|||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
|
|
||||||
import humanize
|
import humanize
|
||||||
|
import markovify
|
||||||
from django.db import models
|
from django.db import models
|
||||||
from django.utils.timezone import localdate, now, make_aware
|
from django.utils.timezone import localdate, now, make_aware
|
||||||
from jsoneditor.forms import JSONEditor
|
from jsoneditor.forms import JSONEditor
|
||||||
@ -14,7 +15,6 @@ from telegram.ext import Dispatcher, CallbackContext, MessageHandler, Filters, C
|
|||||||
from telegram.utils.helpers import mention_html
|
from telegram.utils.helpers import mention_html
|
||||||
|
|
||||||
from bots.models import TelegramBotModuleConfig
|
from bots.models import TelegramBotModuleConfig
|
||||||
from bots.modules._mtg import MTG
|
|
||||||
|
|
||||||
|
|
||||||
class CyberLinaBotModuleConfig(TelegramBotModuleConfig):
|
class CyberLinaBotModuleConfig(TelegramBotModuleConfig):
|
||||||
@ -42,7 +42,7 @@ class CyberLinaBotModuleConfig(TelegramBotModuleConfig):
|
|||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
self.mtg = MTG(self._mtg_data)
|
self.mtg = markovify.Text.from_dict(self._mtg_data)
|
||||||
|
|
||||||
def message_handler(self, update: Update, ctx: CallbackContext):
|
def message_handler(self, update: Update, ctx: CallbackContext):
|
||||||
if not update.effective_chat or not update.effective_user:
|
if not update.effective_chat or not update.effective_user:
|
||||||
@ -114,9 +114,7 @@ class CyberLinaBotModuleConfig(TelegramBotModuleConfig):
|
|||||||
|
|
||||||
def save(self, force_insert=False, force_update=False, using=None, update_fields=None):
|
def save(self, force_insert=False, force_update=False, using=None, update_fields=None):
|
||||||
if self.mtg_train:
|
if self.mtg_train:
|
||||||
self.mtg.train_from_text(self.mtg_train)
|
self._mtg_data = markovify.Text(self.mtg_train).to_dict()
|
||||||
self._mtg_data = self.mtg.save()
|
|
||||||
self.mtg_train = None
|
|
||||||
super().save(force_insert, force_update, using, update_fields)
|
super().save(force_insert, force_update, using, update_fields)
|
||||||
|
|
||||||
|
|
||||||
|
@ -43,6 +43,7 @@ json-rpc==1.12.1
|
|||||||
jsonfield2==3.0.3
|
jsonfield2==3.0.3
|
||||||
jsonschema==3.2.0
|
jsonschema==3.2.0
|
||||||
kombu==4.2.2.post1
|
kombu==4.2.2.post1
|
||||||
|
markovify==0.7.2
|
||||||
more-itertools==7.2.0
|
more-itertools==7.2.0
|
||||||
oauthlib==3.0.1
|
oauthlib==3.0.1
|
||||||
packaging==19.2
|
packaging==19.2
|
||||||
@ -79,6 +80,7 @@ TgCrypto==1.1.1
|
|||||||
tornado==6.0.3
|
tornado==6.0.3
|
||||||
Twisted==19.10.0
|
Twisted==19.10.0
|
||||||
txaio==18.8.1
|
txaio==18.8.1
|
||||||
|
Unidecode==1.1.1
|
||||||
urllib3==1.24.1
|
urllib3==1.24.1
|
||||||
vine==1.2.0
|
vine==1.2.0
|
||||||
vk-api==11.6.1
|
vk-api==11.6.1
|
||||||
|
Loading…
Reference in New Issue
Block a user