From f00577060c69ca8e621f569a0188ebdc873a6cb4 Mon Sep 17 00:00:00 2001 From: bakatrouble Date: Sun, 31 Mar 2019 23:35:38 +0300 Subject: [PATCH] fix tapas episode list --- feeds/modules/tapas.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/feeds/modules/tapas.py b/feeds/modules/tapas.py index 2240744..8a6fa38 100644 --- a/feeds/modules/tapas.py +++ b/feeds/modules/tapas.py @@ -1,4 +1,6 @@ +import json import os +import re from tempfile import TemporaryDirectory import sentry_sdk @@ -24,22 +26,25 @@ class TapasFeedModuleConfig(FeedModuleConfig): if last_id is None: last_id = 0 - soup = BeautifulSoup(requests.get( + series_page = requests.get( f'https://tapas.io/series/{self.display_name}', headers={'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:67.0) Gecko/20100101 Firefox/67.0'}, - ).text, 'html.parser') - for episode in soup.select('#episode-nav .episode'): - eid = int(episode['data-eid']) + ).text + episode_list = json.loads(re.search(r'episodeList : (\[.*\]),', series_page).group(1)) + for episode in episode_list: + eid = episode['id'] + title = episode['title'] if eid <= last_id: continue + if not episode['free']: + continue + + caption = f'{title}\nhttps://tapas.io/episode/{eid}' esoup = BeautifulSoup(requests.get( f'https://tapas.io/episode/{eid}', headers={'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:67.0) Gecko/20100101 Firefox/67.0'}, ).text, 'html.parser') - # series_title = esoup.select_one('#series-info-wrap a.series-header-title').text - title = esoup.select_one(f'#episodes [data-eid={eid}] h1').text - caption = f'{title}\nhttps://tapas.io/episode/{eid}' imgs = [] for img in esoup.select(f'#episodes [data-eid={eid}] .art-image'): imgs.append(img['src'])