filter earnings date for us stock

This commit is contained in:
hillerliao
2020-02-05 22:57:23 +08:00
parent 74fac3dc8d
commit ddced774f3
6 changed files with 117 additions and 0 deletions

View File

@@ -31,6 +31,7 @@ def filter_content(ctx):
return ctx
#---------- feed路由从这里开始 -----------#
@bp.route('/cninfo/announcement/<string:stock_id>/<string:category>')
@bp.route('/cninfo/announcement')
@@ -58,7 +59,25 @@ def infoq_recommend():
from rsshub.spiders.infoq.recommend import ctx
return render_template('main/atom.xml', **filter_content(ctx()))
@bp.route('/dxzg/notice')
def dxzg_notice():
from rsshub.spiders.dxzg.notice import ctx
return render_template('main/atom.xml', **filter_content(ctx()))
@bp.route('/earningsdate/prnewswire')
def earningsdate_prnewswire():
from rsshub.spiders.earningsdate.prnewswire import ctx
return render_template('main/atom.xml', **filter_content(ctx()))
@bp.route('/earningsdate/globenewswire')
def earningsdate_globenewswire():
from rsshub.spiders.earningsdate.globenewswire import ctx
return render_template('main/atom.xml', **filter_content(ctx()))
@bp.route('/earningsdate/businesswire')
def earningsdate_businesswire():
from rsshub.spiders.earningsdate.businesswire import ctx
return render_template('main/atom.xml', **filter_content(ctx()))

View File

@@ -0,0 +1,24 @@
from rsshub.utils import fetch, filter_content
domain = 'https://www.businesswire.com'
def parse(post):
item = {}
item['title'] = post.css('span[itemprop=headline]::text').extract_first().strip()
item['description'] = item['title']
item['link'] = f"{domain}{post.css('a.bwTitleLink::attr(href)').extract_first()}"
item['pubDate'] = post.css('time::text').extract_first().strip()
return item
def ctx(category=''):
tree = fetch(f"{domain}/portal/site/home/template.PAGE/news/")
posts = tree.css('.bwNewsList li')
items = list(map(parse, posts))
items = filter_content(items)
return {
'title': 'Earnings Date - Prnewswire',
'link': f'{domain}/portal/site/home/template.PAGE/news/',
'description': 'Earnings Date - Prnewswire',
'author': 'hillerliao',
'items': items
}

View File

@@ -0,0 +1,24 @@
from rsshub.utils import fetch, filter_content
domain = 'https://www.globenewswire.com'
def parse(post):
item = {}
item['title'] = post.css('title::text').extract_first().strip()
item['description'] = post.css('description::text').extract_first().strip(']]>')
item['link'] = post.css('guid::text').extract_first()
item['pubDate'] = post.css('pubDate::text').extract_first()
return item
def ctx(category=''):
tree = fetch(f"{domain}/RssFeed/subjectcode/13-Earnings%20Releases%20And%20Operating%20Results/feedTitle/GlobeNewswire%20-%20Earnings%20Releases%20And%20Operating%20Results")
posts = tree.css('item')
items = list(map(parse, posts))
items = filter_content(items)
return {
'title': 'Earnings Date - Globenewswire',
'link': f'{domain}/RssFeed/subjectcode/13-Earnings%20Releases%20And%20Operating%20Results/feedTitle/GlobeNewswire%20-%20Earnings%20Releases%20And%20Operating%20Results',
'description': 'Earnings Date - Globenewswire',
'author': 'hillerliao',
'items': items
}

View File

@@ -0,0 +1,24 @@
from rsshub.utils import fetch, filter_content
domain = 'https://www.prnewswire.com'
def parse(post):
item = {}
item['title'] = post.css('a::text').extract_first()
item['description'] = post.css('p::text').extract_first()
item['link'] = f"{domain}{post.css('a::attr(href)').extract_first()}"
item['pubDate'] = post.css('small::text').extract_first()
return item
def ctx(category=''):
tree = fetch(f"{domain}/news-releases/financial-services-latest-news/earnings-list/?page=5&pagesize=100")
posts = tree.css('.card-list-hr .col-sm-8')
items = list(map(parse, posts))
items = filter_content(items)
return {
'title': 'Earnings Date - Prnewswire',
'link': f'{domain}/news-releases/financial-services-latest-news/earnings-list/',
'description': 'Earnings Date - Prnewswire',
'author': 'hillerliao',
'items': items
}

View File

@@ -85,4 +85,17 @@
</div>
<br>
<!--item info end-->
<!--item info start-->
<div class="card text-left">
<div class="card-body">
<h4 class="card-title">Earnings Date</h4>
<h6 class="text-muted">Earnings Date <a href="https://github.com/hillerliao" target="_blank" class="badge badge-secondary">by hillerliao</a></h6>
<p class="card-text">举例:<a href="https://rsshub-python.herokuapp.com/earningsdate/businesswire" target="_blank">https://rsshub-python.herokuapp.com/earningsdate/businesswire</a></p>
<p class="card-text">路由:<code>/earningsdate/:category</code></p>
<p class="card-text">参数category [必填可以为“businesswire、globenewswire、prnewswire”]</p>
</div>
</div>
<br>
<!--item info end-->
{% endblock content %}

View File

@@ -1,3 +1,4 @@
import re
from flask import Response
import requests
from parsel import Selector
@@ -23,3 +24,15 @@ def fetch(url: str, headers: dict=DEFAULT_HEADERS, proxies: dict=None):
html = res.text
tree = Selector(text=html)
return tree
def filter_content(items):
content = []
p1 = re.compile(r'(.*)(to|will|date|schedule) (.*)results', re.IGNORECASE)
p2 = re.compile(r'(.*)(schedule|announce|to) (.*)call', re.IGNORECASE)
p3 = re.compile(r'(.*)release (.*)date', re.IGNORECASE)
for item in items:
title = item['title']
if p1.match(title) or p2.match(title) or p3.match(title):
content.append(item)
return content