diff --git a/rsshub/spiders/earningsdate/businesswire.py b/rsshub/spiders/earningsdate/businesswire.py index 9ab24d5..26657a6 100644 --- a/rsshub/spiders/earningsdate/businesswire.py +++ b/rsshub/spiders/earningsdate/businesswire.py @@ -7,8 +7,8 @@ domain = 'businesswire.com' def parse(post): item = {} item['title'] = post.css('title::text').extract_first().strip() - item['description'] = post.css('description::text').extract_first().strip(']]>') - item['link'] = post.css('link::text').extract_first() + item['description'] = post.css('description::text').extract_first() + item['link'] = post.extract().split(' ')[-2].split('>')[-1].strip() item['pubDate'] = post.css('pubDate::text').extract_first() return item diff --git a/rsshub/utils.py b/rsshub/utils.py index 614393e..3f6803b 100644 --- a/rsshub/utils.py +++ b/rsshub/utils.py @@ -28,7 +28,7 @@ def fetch(url: str, headers: dict=DEFAULT_HEADERS, proxies: dict=None): def filter_content(items): content = [] p1 = re.compile(r'(.*)(to|will|date|schedule) (.*)results', re.IGNORECASE) - p2 = re.compile(r'(.*)(schedule|announce|to) (.*)call', re.IGNORECASE) + p2 = re.compile(r'(.*)(schedule|schedules|announce|to) (.*)call', re.IGNORECASE) p3 = re.compile(r'(.*)release (.*)date', re.IGNORECASE) for item in items: