From 82e7563700b0f8dff52059bff439af4a204f0384 Mon Sep 17 00:00:00 2001 From: hillerliao Date: Wed, 18 May 2022 21:41:21 +0800 Subject: [PATCH] change pgyer rss description --- rsshub/spiders/pgyer/app.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/rsshub/spiders/pgyer/app.py b/rsshub/spiders/pgyer/app.py index c3fd67d..c4dfdec 100644 --- a/rsshub/spiders/pgyer/app.py +++ b/rsshub/spiders/pgyer/app.py @@ -4,6 +4,16 @@ from rsshub.utils import fetch domain = 'https://www.pgyer.com' +def parse(post): + item = {} + item['title'] = post.xpath('//meta[@property="og:description"]').attrib['content'] + item['description'] = post.css('div.update-description').extract_first() + item['description'] = re.sub(r'<[^>]*>', '', item['description'] )\ + .split('备注信息:')[1].split('执行人')[0].strip() + link = post.css('img.qrcode').attrib['src'].split('app/qrcode/') + item['link'] = link[0] + link[1] + return item + def ctx(category=''): url = f"{domain}/{category}" tree = fetch(url,headers=DEFAULT_HEADERS) @@ -19,13 +29,3 @@ def ctx(category=''): 'items': list(map(parse, posts)) } -def parse(post): - item = {} - item['title'] = post.xpath('//meta[@property="og:description"]').attrib['content'] - item['description'] = item['title'] + ';' \ - + post.css('ul.breadcrumb > li::text').getall()[1] + ';' \ - + post.css('ul.breadcrumb > li::text').getall()[2] - item['description'] = re.sub(r'\s|\n', '', item['description']) - link = post.css('img.qrcode').attrib['src'].split('app/qrcode/') - item['link'] = link[0] + link[1] - return item \ No newline at end of file