filter earnings date for us stock

2026-05-14 07:26:44 +00:00 · 2020-02-05 22:57:23 +08:00
parent 74fac3dc8d
commit ddced774f3
6 changed files with 117 additions and 0 deletions
--- a/rsshub/blueprints/main.py
+++ b/rsshub/blueprints/main.py
@@ -31,6 +31,7 @@ def filter_content(ctx):
    return ctx


+
 #---------- feed路由从这里开始 -----------#
@bp.route('/cninfo/announcement/<string:stock_id>/<string:category>')
@bp.route('/cninfo/announcement')
@@ -58,7 +59,25 @@ def infoq_recommend():
    from rsshub.spiders.infoq.recommend import ctx
    return render_template('main/atom.xml', **filter_content(ctx()))

+
@bp.route('/dxzg/notice')
 def dxzg_notice():
    from rsshub.spiders.dxzg.notice import ctx
    return render_template('main/atom.xml', **filter_content(ctx()))
+
+
+@bp.route('/earningsdate/prnewswire')
+def earningsdate_prnewswire():
+    from rsshub.spiders.earningsdate.prnewswire import ctx
+    return render_template('main/atom.xml', **filter_content(ctx()))
+
+@bp.route('/earningsdate/globenewswire')
+def earningsdate_globenewswire():
+    from rsshub.spiders.earningsdate.globenewswire import ctx
+    return render_template('main/atom.xml', **filter_content(ctx()))
+
+@bp.route('/earningsdate/businesswire')
+def earningsdate_businesswire():
+    from rsshub.spiders.earningsdate.businesswire import ctx
+    return render_template('main/atom.xml', **filter_content(ctx()))
+
--- a/rsshub/spiders/earningsdate/businesswire.py
+++ b/rsshub/spiders/earningsdate/businesswire.py
@@ -0,0 +1,24 @@
+from rsshub.utils import fetch, filter_content
+
+domain = 'https://www.businesswire.com'
+
+def parse(post):
+    item = {}
+    item['title'] = post.css('span[itemprop=headline]::text').extract_first().strip()
+    item['description'] = item['title']
+    item['link'] = f"{domain}{post.css('a.bwTitleLink::attr(href)').extract_first()}"
+    item['pubDate'] = post.css('time::text').extract_first().strip()
+    return item
+
+def ctx(category=''):
+    tree = fetch(f"{domain}/portal/site/home/template.PAGE/news/")
+    posts = tree.css('.bwNewsList li')
+    items = list(map(parse, posts)) 
+    items = filter_content(items)
+    return {
+        'title': 'Earnings Date - Prnewswire',
+        'link': f'{domain}/portal/site/home/template.PAGE/news/',
+        'description': 'Earnings Date - Prnewswire',
+        'author': 'hillerliao',
+        'items': items
+    }
--- a/rsshub/spiders/earningsdate/globenewswire.py
+++ b/rsshub/spiders/earningsdate/globenewswire.py
@@ -0,0 +1,24 @@
+from rsshub.utils import fetch, filter_content
+
+domain = 'https://www.globenewswire.com'
+
+def parse(post):
+    item = {}
+    item['title'] = post.css('title::text').extract_first().strip()
+    item['description'] = post.css('description::text').extract_first().strip(']]>')
+    item['link'] = post.css('guid::text').extract_first()
+    item['pubDate'] = post.css('pubDate::text').extract_first()
+    return item
+
+def ctx(category=''):
+    tree = fetch(f"{domain}/RssFeed/subjectcode/13-Earnings%20Releases%20And%20Operating%20Results/feedTitle/GlobeNewswire%20-%20Earnings%20Releases%20And%20Operating%20Results")
+    posts = tree.css('item')
+    items = list(map(parse, posts)) 
+    items = filter_content(items)
+    return {
+        'title': 'Earnings Date - Globenewswire',
+        'link': f'{domain}/RssFeed/subjectcode/13-Earnings%20Releases%20And%20Operating%20Results/feedTitle/GlobeNewswire%20-%20Earnings%20Releases%20And%20Operating%20Results',
+        'description': 'Earnings Date - Globenewswire',
+        'author': 'hillerliao',
+        'items': items
+    }
--- a/rsshub/spiders/earningsdate/prnewswire.py
+++ b/rsshub/spiders/earningsdate/prnewswire.py
@@ -0,0 +1,24 @@
+from rsshub.utils import fetch, filter_content
+
+domain = 'https://www.prnewswire.com'
+
+def parse(post):
+    item = {}
+    item['title'] = post.css('a::text').extract_first()
+    item['description'] = post.css('p::text').extract_first()
+    item['link'] = f"{domain}{post.css('a::attr(href)').extract_first()}"
+    item['pubDate'] = post.css('small::text').extract_first()
+    return item
+
+def ctx(category=''):
+    tree = fetch(f"{domain}/news-releases/financial-services-latest-news/earnings-list/?page=5&pagesize=100")
+    posts = tree.css('.card-list-hr .col-sm-8')
+    items = list(map(parse, posts)) 
+    items = filter_content(items)
+    return {
+        'title': 'Earnings Date - Prnewswire',
+        'link': f'{domain}/news-releases/financial-services-latest-news/earnings-list/',
+        'description': 'Earnings Date - Prnewswire',
+        'author': 'hillerliao',
+        'items': items
+    }
--- a/rsshub/templates/main/feeds.html
+++ b/rsshub/templates/main/feeds.html
@@ -85,4 +85,17 @@
 </div>
 <br>
 <!--item info end-->
+
+<!--item info start-->
+<div class="card text-left">
+  <div class="card-body">
+    <h4 class="card-title">Earnings Date</h4>
+    <h6 class="text-muted">Earnings Date <a href="https://github.com/hillerliao" target="_blank" class="badge badge-secondary">by hillerliao</a></h6>
+    <p class="card-text">举例：<a href="https://rsshub-python.herokuapp.com/earningsdate/businesswire" target="_blank">https://rsshub-python.herokuapp.com/earningsdate/businesswire</a></p>
+    <p class="card-text">路由：<code>/earningsdate/:category</code></p>
+    <p class="card-text">参数：category [必填，可以为“businesswire、globenewswire、prnewswire”]</p>
+  </div>
+</div>
+<br>
+<!--item info end-->
 {% endblock content %}
--- a/rsshub/utils.py
+++ b/rsshub/utils.py
@@ -1,3 +1,4 @@
+import re
 from flask import Response
 import requests
 from parsel import Selector
@@ -23,3 +24,15 @@ def fetch(url: str, headers: dict=DEFAULT_HEADERS, proxies: dict=None):
        html = res.text
        tree = Selector(text=html)
        return tree
+
+def filter_content(items):
+    content = []    
+    p1 = re.compile(r'(.*)(to|will|date|schedule) (.*)results', re.IGNORECASE)
+    p2 = re.compile(r'(.*)(schedule|announce|to) (.*)call', re.IGNORECASE)
+    p3 = re.compile(r'(.*)release (.*)date', re.IGNORECASE)
+
+    for item in items:
+        title = item['title']
+        if p1.match(title) or p2.match(title) or p3.match(title):
+            content.append(item)
+    return content