telegram_bots/feeds/modules/tapas.py

76 lines
2.8 KiB
Python
Raw Normal View History

2019-03-31 20:35:38 +00:00
import json
2019-03-31 19:58:20 +00:00
import os
2019-03-31 20:35:38 +00:00
import re
2019-03-31 19:58:20 +00:00
from tempfile import TemporaryDirectory
import sentry_sdk
from django.db import models
from djconfig import config
from telebot import TeleBot
from telebot.types import InputMediaPhoto, InputMediaVideo
from bs4 import BeautifulSoup
import requests
from PIL import Image
from feeds.models import FeedModuleConfig
class TapasFeedModuleConfig(FeedModuleConfig):
display_name = models.CharField(max_length=256)
MODULE_NAME = 'Tapas.io comic'
def execute(self, bot: TeleBot, chat_id, last_id):
config._reload_maybe()
if last_id is None:
last_id = 0
2019-03-31 20:35:38 +00:00
series_page = requests.get(
2019-03-31 20:20:09 +00:00
f'https://tapas.io/series/{self.display_name}',
headers={'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:67.0) Gecko/20100101 Firefox/67.0'},
2019-03-31 20:35:38 +00:00
).text
2020-05-06 16:05:14 +00:00
ssoup = BeautifulSoup(series_page, 'html.parser')
# episode_list = json.loads(re.search(r'episodeList : (\[.*\]),', series_page).group(1))
for episode in ssoup.select('a[data-ga-category=Episode]'):
eid = int(episode['data-id'])
title = episode.select_one('.info__title').text.strip()
2019-03-31 19:58:20 +00:00
if eid <= last_id:
continue
2020-05-06 16:05:14 +00:00
# if not episode['free']:
# continue
2019-03-31 20:35:38 +00:00
caption = f'{title}\nhttps://tapas.io/episode/{eid}'
2019-03-31 19:58:20 +00:00
2019-03-31 20:20:09 +00:00
esoup = BeautifulSoup(requests.get(
f'https://tapas.io/episode/{eid}',
headers={'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:67.0) Gecko/20100101 Firefox/67.0'},
).text, 'html.parser')
2019-03-31 19:58:20 +00:00
imgs = []
2020-05-06 16:05:14 +00:00
for img in esoup.select(f'.js-episode-article .content__img'):
imgs.append(img['data-src'])
2019-03-31 19:58:20 +00:00
if len(imgs) == 1:
2019-03-31 20:40:48 +00:00
bot.send_photo(chat_id, imgs[0], caption=caption)
2019-03-31 19:58:20 +00:00
if len(imgs) > 1:
with TemporaryDirectory() as d:
ims = []
for i, img in enumerate(imgs):
img_path = os.path.join(d, f'{i}.jpg')
with open(img_path, 'wb') as f:
f.write(requests.get(img).content)
ims.append(Image.open(img_path))
widths, heights = zip(*(i.size for i in ims))
max_width = max(widths)
2019-04-10 23:46:32 +00:00
total_height = sum(heights)
2019-03-31 19:58:20 +00:00
new_im = Image.new('RGB', (max_width, total_height))
offset = 0
for im in ims:
2019-04-10 23:48:21 +00:00
new_im.paste(im, (0, offset))
offset += im.size[1]
2019-03-31 19:58:20 +00:00
new_im_path = os.path.join(d, 'combined.jpg')
new_im.save(new_im_path)
2019-04-10 23:18:42 +00:00
with open(new_im_path, 'rb') as f:
2019-04-10 23:20:09 +00:00
bot.send_document(chat_id, f, caption=caption)
2019-03-31 19:58:20 +00:00
yield eid