diff --git a/rsshub/spiders/pgyer/app.py b/rsshub/spiders/pgyer/app.py index c3fd67d..c4dfdec 100644 --- a/rsshub/spiders/pgyer/app.py +++ b/rsshub/spiders/pgyer/app.py @@ -4,6 +4,16 @@ from rsshub.utils import fetch domain = 'https://www.pgyer.com' +def parse(post): + item = {} + item['title'] = post.xpath('//meta[@property="og:description"]').attrib['content'] + item['description'] = post.css('div.update-description').extract_first() + item['description'] = re.sub(r'<[^>]*>', '', item['description'] )\ + .split('备注信息:')[1].split('执行人')[0].strip() + link = post.css('img.qrcode').attrib['src'].split('app/qrcode/') + item['link'] = link[0] + link[1] + return item + def ctx(category=''): url = f"{domain}/{category}" tree = fetch(url,headers=DEFAULT_HEADERS) @@ -19,13 +29,3 @@ def ctx(category=''): 'items': list(map(parse, posts)) } -def parse(post): - item = {} - item['title'] = post.xpath('//meta[@property="og:description"]').attrib['content'] - item['description'] = item['title'] + ';' \ - + post.css('ul.breadcrumb > li::text').getall()[1] + ';' \ - + post.css('ul.breadcrumb > li::text').getall()[2] - item['description'] = re.sub(r'\s|\n', '', item['description']) - link = post.css('img.qrcode').attrib['src'].split('app/qrcode/') - item['link'] = link[0] + link[1] - return item \ No newline at end of file