fix prnewswire title parser

This commit is contained in:
Hiller Liao
2021-03-09 17:20:00 +08:00
parent 13b498732a
commit 940dc1fb82
2 changed files with 3 additions and 3 deletions

View File

@@ -4,7 +4,7 @@ domain = 'https://www.prnewswire.com'
def parse(post):
item = {}
item['title'] = post.css('a::text').extract_first()
item['title'] = post.css('h3::text').getall()[1]
item['description'] = post.css('p::text').extract_first()
item['link'] = f"{domain}{post.css('a::attr(href)').extract_first()}"
item['pubDate'] = post.css('small::text').extract_first()
@@ -14,7 +14,7 @@ def ctx(category=''):
# DEFAULT_HEADERS.update({'upgrade-insecure-requests': 1})
url = f"{domain}/news-releases/financial-services-latest-news/earnings-list/?page=1&pagesize=100"
tree = fetch(url, headers=DEFAULT_HEADERS)
posts = tree.css('.card-list-hr .col-sm-8')
posts = tree.css('.card-list-hr .row')
items = list(map(parse, posts))
items = filter_content(items)
return {

View File

@@ -17,7 +17,7 @@
<updated>{{item.pubDate|default(now)}}</updated>
<link href="{{item.link}}"/>
<content type="html" src="{{item.link}}"><![CDATA[{{item.description|safe}}]]></content>
<author>![CDATA[{{item.author|safe}}]]</author>
</entry>
{% endfor %}
</feed>