From ddced774f3911095868e2a31ade0cd55e4e700d8 Mon Sep 17 00:00:00 2001 From: hillerliao Date: Wed, 5 Feb 2020 22:57:23 +0800 Subject: [PATCH] filter earnings date for us stock --- rsshub/blueprints/main.py | 19 ++++++++++++++++ rsshub/spiders/earningsdate/businesswire.py | 24 ++++++++++++++++++++ rsshub/spiders/earningsdate/globenewswire.py | 24 ++++++++++++++++++++ rsshub/spiders/earningsdate/prnewswire.py | 24 ++++++++++++++++++++ rsshub/templates/main/feeds.html | 13 +++++++++++ rsshub/utils.py | 13 +++++++++++ 6 files changed, 117 insertions(+) create mode 100644 rsshub/spiders/earningsdate/businesswire.py create mode 100644 rsshub/spiders/earningsdate/globenewswire.py create mode 100644 rsshub/spiders/earningsdate/prnewswire.py diff --git a/rsshub/blueprints/main.py b/rsshub/blueprints/main.py index cfef6a0..aea8ace 100644 --- a/rsshub/blueprints/main.py +++ b/rsshub/blueprints/main.py @@ -31,6 +31,7 @@ def filter_content(ctx): return ctx + #---------- feed路由从这里开始 -----------# @bp.route('/cninfo/announcement//') @bp.route('/cninfo/announcement') @@ -58,7 +59,25 @@ def infoq_recommend(): from rsshub.spiders.infoq.recommend import ctx return render_template('main/atom.xml', **filter_content(ctx())) + @bp.route('/dxzg/notice') def dxzg_notice(): from rsshub.spiders.dxzg.notice import ctx return render_template('main/atom.xml', **filter_content(ctx())) + + +@bp.route('/earningsdate/prnewswire') +def earningsdate_prnewswire(): + from rsshub.spiders.earningsdate.prnewswire import ctx + return render_template('main/atom.xml', **filter_content(ctx())) + +@bp.route('/earningsdate/globenewswire') +def earningsdate_globenewswire(): + from rsshub.spiders.earningsdate.globenewswire import ctx + return render_template('main/atom.xml', **filter_content(ctx())) + +@bp.route('/earningsdate/businesswire') +def earningsdate_businesswire(): + from rsshub.spiders.earningsdate.businesswire import ctx + return render_template('main/atom.xml', **filter_content(ctx())) + diff --git a/rsshub/spiders/earningsdate/businesswire.py b/rsshub/spiders/earningsdate/businesswire.py new file mode 100644 index 0000000..2485e44 --- /dev/null +++ b/rsshub/spiders/earningsdate/businesswire.py @@ -0,0 +1,24 @@ +from rsshub.utils import fetch, filter_content + +domain = 'https://www.businesswire.com' + +def parse(post): + item = {} + item['title'] = post.css('span[itemprop=headline]::text').extract_first().strip() + item['description'] = item['title'] + item['link'] = f"{domain}{post.css('a.bwTitleLink::attr(href)').extract_first()}" + item['pubDate'] = post.css('time::text').extract_first().strip() + return item + +def ctx(category=''): + tree = fetch(f"{domain}/portal/site/home/template.PAGE/news/") + posts = tree.css('.bwNewsList li') + items = list(map(parse, posts)) + items = filter_content(items) + return { + 'title': 'Earnings Date - Prnewswire', + 'link': f'{domain}/portal/site/home/template.PAGE/news/', + 'description': 'Earnings Date - Prnewswire', + 'author': 'hillerliao', + 'items': items + } \ No newline at end of file diff --git a/rsshub/spiders/earningsdate/globenewswire.py b/rsshub/spiders/earningsdate/globenewswire.py new file mode 100644 index 0000000..2659ba5 --- /dev/null +++ b/rsshub/spiders/earningsdate/globenewswire.py @@ -0,0 +1,24 @@ +from rsshub.utils import fetch, filter_content + +domain = 'https://www.globenewswire.com' + +def parse(post): + item = {} + item['title'] = post.css('title::text').extract_first().strip() + item['description'] = post.css('description::text').extract_first().strip(']]>') + item['link'] = post.css('guid::text').extract_first() + item['pubDate'] = post.css('pubDate::text').extract_first() + return item + +def ctx(category=''): + tree = fetch(f"{domain}/RssFeed/subjectcode/13-Earnings%20Releases%20And%20Operating%20Results/feedTitle/GlobeNewswire%20-%20Earnings%20Releases%20And%20Operating%20Results") + posts = tree.css('item') + items = list(map(parse, posts)) + items = filter_content(items) + return { + 'title': 'Earnings Date - Globenewswire', + 'link': f'{domain}/RssFeed/subjectcode/13-Earnings%20Releases%20And%20Operating%20Results/feedTitle/GlobeNewswire%20-%20Earnings%20Releases%20And%20Operating%20Results', + 'description': 'Earnings Date - Globenewswire', + 'author': 'hillerliao', + 'items': items + } \ No newline at end of file diff --git a/rsshub/spiders/earningsdate/prnewswire.py b/rsshub/spiders/earningsdate/prnewswire.py new file mode 100644 index 0000000..2c61940 --- /dev/null +++ b/rsshub/spiders/earningsdate/prnewswire.py @@ -0,0 +1,24 @@ +from rsshub.utils import fetch, filter_content + +domain = 'https://www.prnewswire.com' + +def parse(post): + item = {} + item['title'] = post.css('a::text').extract_first() + item['description'] = post.css('p::text').extract_first() + item['link'] = f"{domain}{post.css('a::attr(href)').extract_first()}" + item['pubDate'] = post.css('small::text').extract_first() + return item + +def ctx(category=''): + tree = fetch(f"{domain}/news-releases/financial-services-latest-news/earnings-list/?page=5&pagesize=100") + posts = tree.css('.card-list-hr .col-sm-8') + items = list(map(parse, posts)) + items = filter_content(items) + return { + 'title': 'Earnings Date - Prnewswire', + 'link': f'{domain}/news-releases/financial-services-latest-news/earnings-list/', + 'description': 'Earnings Date - Prnewswire', + 'author': 'hillerliao', + 'items': items + } \ No newline at end of file diff --git a/rsshub/templates/main/feeds.html b/rsshub/templates/main/feeds.html index 7010d15..cb2e622 100644 --- a/rsshub/templates/main/feeds.html +++ b/rsshub/templates/main/feeds.html @@ -85,4 +85,17 @@
+ + +
+
+

Earnings Date

+
Earnings Date by hillerliao
+

举例:https://rsshub-python.herokuapp.com/earningsdate/businesswire

+

路由:/earningsdate/:category

+

参数:category [必填,可以为“businesswire、globenewswire、prnewswire”]

+
+
+
+ {% endblock content %} \ No newline at end of file diff --git a/rsshub/utils.py b/rsshub/utils.py index 5cdbbe7..614393e 100644 --- a/rsshub/utils.py +++ b/rsshub/utils.py @@ -1,3 +1,4 @@ +import re from flask import Response import requests from parsel import Selector @@ -23,3 +24,15 @@ def fetch(url: str, headers: dict=DEFAULT_HEADERS, proxies: dict=None): html = res.text tree = Selector(text=html) return tree + +def filter_content(items): + content = [] + p1 = re.compile(r'(.*)(to|will|date|schedule) (.*)results', re.IGNORECASE) + p2 = re.compile(r'(.*)(schedule|announce|to) (.*)call', re.IGNORECASE) + p3 = re.compile(r'(.*)release (.*)date', re.IGNORECASE) + + for item in items: + title = item['title'] + if p1.match(title) or p2.match(title) or p3.match(title): + content.append(item) + return content \ No newline at end of file