fix pr newswire title parser

This commit is contained in:
hillerliao
2021-03-09 17:48:33 +08:00
committed by GitHub
parent 13b498732a
commit c7b5932703

View File

@@ -4,7 +4,7 @@ domain = 'https://www.prnewswire.com'
def parse(post):
item = {}
item['title'] = post.css('a::text').extract_first()
item['title'] = post.css('a::text').getall()[1]
item['description'] = post.css('p::text').extract_first()
item['link'] = f"{domain}{post.css('a::attr(href)').extract_first()}"
item['pubDate'] = post.css('small::text').extract_first()
@@ -23,4 +23,4 @@ def ctx(category=''):
'description': 'Earnings Date - Prnewswire',
'author': 'hillerliao',
'items': items
}
}