fix none to businesswire item link

This commit is contained in:
hillerliao
2020-03-14 14:58:38 +08:00
parent e4734cb1d2
commit 8f129e9c00
2 changed files with 3 additions and 3 deletions

View File

@@ -7,8 +7,8 @@ domain = 'businesswire.com'
def parse(post):
item = {}
item['title'] = post.css('title::text').extract_first().strip()
item['description'] = post.css('description::text').extract_first().strip(']]>')
item['link'] = post.css('link::text').extract_first()
item['description'] = post.css('description::text').extract_first()
item['link'] = post.extract().split(' ')[-2].split('>')[-1].strip()
item['pubDate'] = post.css('pubDate::text').extract_first()
return item