From c7b593270333e747440f4c7ee71bc3007ad523dd Mon Sep 17 00:00:00 2001 From: hillerliao Date: Tue, 9 Mar 2021 17:48:33 +0800 Subject: [PATCH 1/2] fix pr newswire title parser --- rsshub/spiders/earningsdate/prnewswire.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rsshub/spiders/earningsdate/prnewswire.py b/rsshub/spiders/earningsdate/prnewswire.py index 48af011..c5fd469 100644 --- a/rsshub/spiders/earningsdate/prnewswire.py +++ b/rsshub/spiders/earningsdate/prnewswire.py @@ -4,7 +4,7 @@ domain = 'https://www.prnewswire.com' def parse(post): item = {} - item['title'] = post.css('a::text').extract_first() + item['title'] = post.css('a::text').getall()[1] item['description'] = post.css('p::text').extract_first() item['link'] = f"{domain}{post.css('a::attr(href)').extract_first()}" item['pubDate'] = post.css('small::text').extract_first() @@ -23,4 +23,4 @@ def ctx(category=''): 'description': 'Earnings Date - Prnewswire', 'author': 'hillerliao', 'items': items - } \ No newline at end of file + } From 3ecd7213fad10231a08fba532237026702bc4b02 Mon Sep 17 00:00:00 2001 From: hillerliao Date: Tue, 9 Mar 2021 17:59:04 +0800 Subject: [PATCH 2/2] get title from h3 tag instead of a tag --- rsshub/spiders/earningsdate/prnewswire.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rsshub/spiders/earningsdate/prnewswire.py b/rsshub/spiders/earningsdate/prnewswire.py index c5fd469..2e6005e 100644 --- a/rsshub/spiders/earningsdate/prnewswire.py +++ b/rsshub/spiders/earningsdate/prnewswire.py @@ -4,7 +4,7 @@ domain = 'https://www.prnewswire.com' def parse(post): item = {} - item['title'] = post.css('a::text').getall()[1] + item['title'] = post.css('h3::text').getall()[1] item['description'] = post.css('p::text').extract_first() item['link'] = f"{domain}{post.css('a::attr(href)').extract_first()}" item['pubDate'] = post.css('small::text').extract_first()