From 940dc1fb8241eaf16ac2e61497027780cfbffb1b Mon Sep 17 00:00:00 2001 From: Hiller Liao Date: Tue, 9 Mar 2021 17:20:00 +0800 Subject: [PATCH] fix prnewswire title parser --- rsshub/spiders/earningsdate/prnewswire.py | 4 ++-- rsshub/templates/main/atom.xml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/rsshub/spiders/earningsdate/prnewswire.py b/rsshub/spiders/earningsdate/prnewswire.py index 48af011..51dcb90 100644 --- a/rsshub/spiders/earningsdate/prnewswire.py +++ b/rsshub/spiders/earningsdate/prnewswire.py @@ -4,7 +4,7 @@ domain = 'https://www.prnewswire.com' def parse(post): item = {} - item['title'] = post.css('a::text').extract_first() + item['title'] = post.css('h3::text').getall()[1] item['description'] = post.css('p::text').extract_first() item['link'] = f"{domain}{post.css('a::attr(href)').extract_first()}" item['pubDate'] = post.css('small::text').extract_first() @@ -14,7 +14,7 @@ def ctx(category=''): # DEFAULT_HEADERS.update({'upgrade-insecure-requests': 1}) url = f"{domain}/news-releases/financial-services-latest-news/earnings-list/?page=1&pagesize=100" tree = fetch(url, headers=DEFAULT_HEADERS) - posts = tree.css('.card-list-hr .col-sm-8') + posts = tree.css('.card-list-hr .row') items = list(map(parse, posts)) items = filter_content(items) return { diff --git a/rsshub/templates/main/atom.xml b/rsshub/templates/main/atom.xml index 0cb6491..57eb99f 100644 --- a/rsshub/templates/main/atom.xml +++ b/rsshub/templates/main/atom.xml @@ -17,7 +17,7 @@ {{item.pubDate|default(now)}} - ![CDATA[{{item.author|safe}}]] + {% endfor %} \ No newline at end of file