From 9220cfb29407ef8ef9dc13b7d2c5f0172ea987fe Mon Sep 17 00:00:00 2001 From: hillerliao Date: Fri, 24 Apr 2020 10:34:11 +0800 Subject: [PATCH 1/3] infoq topic pub time --- rsshub/spiders/infoq/topic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rsshub/spiders/infoq/topic.py b/rsshub/spiders/infoq/topic.py index 5aa9d12..7b87a00 100644 --- a/rsshub/spiders/infoq/topic.py +++ b/rsshub/spiders/infoq/topic.py @@ -9,6 +9,7 @@ def parse(post): item['title'] = post['article_title'] item['description'] = f"{post['article_summary']}
" item['link'] = f"{domain}/article/{post['uuid']}" + item['pubDate'] = post['publish_time'] return item @@ -21,7 +22,7 @@ def ctx(category=''): posts = json.loads(posts.text)['data'] return { - 'title': f'{category} - topic - infoq', + 'title': f'{category} - Topic - InfoQ', 'link': referer, 'description': 'InfoQ - 促进软件开发领域知识与创新的传播', 'author': 'hillerliao', From d6f2bc9c8652e9bfb1b0c3565a2390accca8749b Mon Sep 17 00:00:00 2001 From: hillerliao Date: Fri, 24 Apr 2020 10:43:59 +0800 Subject: [PATCH 2/3] add benzinga item link --- rsshub/spiders/benzinga/ratings.py | 1 + 1 file changed, 1 insertion(+) diff --git a/rsshub/spiders/benzinga/ratings.py b/rsshub/spiders/benzinga/ratings.py index 6163998..21e300a 100644 --- a/rsshub/spiders/benzinga/ratings.py +++ b/rsshub/spiders/benzinga/ratings.py @@ -10,6 +10,7 @@ def ctx(category=''): def parse(post): item = {} item['description'] = item['title'] = stock.upper() + '的评级:' + ', '.join(post.css('td::text').extract()) + item['link'] = url return item From bf14280a80c6c379ef85a200d037198a3795ec16 Mon Sep 17 00:00:00 2001 From: hillerliao Date: Sat, 25 Apr 2020 22:28:04 +0800 Subject: [PATCH 3/3] fix weiyangx tag list --- rsshub/spiders/weiyangx/tag.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/rsshub/spiders/weiyangx/tag.py b/rsshub/spiders/weiyangx/tag.py index 99753ea..616340b 100644 --- a/rsshub/spiders/weiyangx/tag.py +++ b/rsshub/spiders/weiyangx/tag.py @@ -1,21 +1,27 @@ from rsshub.utils import DEFAULT_HEADERS -from rsshub.utils import fetch +import requests +import json +from parsel import Selector domain = 'https://www.weiyangx.com' def parse(post): item = {} - item['title'] = post.css('h2::text').extract_first() - item['description'] = post.css('p::text').extract_first() - item['link'] = post.css('a::attr(href)').extract_first() + item['title'] = post['post_title'] + item['description'] = post['post_content'] + post_id = post['post_id'] + item['link'] = f'{domain}/{post_id}.html' return item def ctx(category=''): url = f'https://www.weiyangx.com/tag/{category}' - tree = fetch(url, headers=DEFAULT_HEADERS) - posts = tree.css('.category-post-node') + res = requests.get(url, headers=DEFAULT_HEADERS) + res = Selector(res.text) + posts = res.css('script::text')[-4].extract().split('=')[-1] + posts = json.loads(posts) + # posts = tree.css('script::text')[-5].extract().split('=')[-1] items = list(map(parse, posts)) return { 'title': f'{category} - 文章 - 未央网',