From bf14280a80c6c379ef85a200d037198a3795ec16 Mon Sep 17 00:00:00 2001 From: hillerliao Date: Sat, 25 Apr 2020 22:28:04 +0800 Subject: [PATCH] fix weiyangx tag list --- rsshub/spiders/weiyangx/tag.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/rsshub/spiders/weiyangx/tag.py b/rsshub/spiders/weiyangx/tag.py index 99753ea..616340b 100644 --- a/rsshub/spiders/weiyangx/tag.py +++ b/rsshub/spiders/weiyangx/tag.py @@ -1,21 +1,27 @@ from rsshub.utils import DEFAULT_HEADERS -from rsshub.utils import fetch +import requests +import json +from parsel import Selector domain = 'https://www.weiyangx.com' def parse(post): item = {} - item['title'] = post.css('h2::text').extract_first() - item['description'] = post.css('p::text').extract_first() - item['link'] = post.css('a::attr(href)').extract_first() + item['title'] = post['post_title'] + item['description'] = post['post_content'] + post_id = post['post_id'] + item['link'] = f'{domain}/{post_id}.html' return item def ctx(category=''): url = f'https://www.weiyangx.com/tag/{category}' - tree = fetch(url, headers=DEFAULT_HEADERS) - posts = tree.css('.category-post-node') + res = requests.get(url, headers=DEFAULT_HEADERS) + res = Selector(res.text) + posts = res.css('script::text')[-4].extract().split('=')[-1] + posts = json.loads(posts) + # posts = tree.css('script::text')[-5].extract().split('=')[-1] items = list(map(parse, posts)) return { 'title': f'{category} - 文章 - 未央网',