From ddced774f3911095868e2a31ade0cd55e4e700d8 Mon Sep 17 00:00:00 2001
From: hillerliao <liaomeigu@gmail.com>
Date: Wed, 5 Feb 2020 22:57:23 +0800
Subject: [PATCH] filter earnings date for us stock

---
 rsshub/blueprints/main.py                    | 19 ++++++++++++++++
 rsshub/spiders/earningsdate/businesswire.py  | 24 ++++++++++++++++++++
 rsshub/spiders/earningsdate/globenewswire.py | 24 ++++++++++++++++++++
 rsshub/spiders/earningsdate/prnewswire.py    | 24 ++++++++++++++++++++
 rsshub/templates/main/feeds.html             | 13 +++++++++++
 rsshub/utils.py                              | 13 +++++++++++
 6 files changed, 117 insertions(+)
 create mode 100644 rsshub/spiders/earningsdate/businesswire.py
 create mode 100644 rsshub/spiders/earningsdate/globenewswire.py
 create mode 100644 rsshub/spiders/earningsdate/prnewswire.py

diff --git a/rsshub/blueprints/main.py b/rsshub/blueprints/main.py
index cfef6a0..aea8ace 100644
--- a/rsshub/blueprints/main.py
+++ b/rsshub/blueprints/main.py
@@ -31,6 +31,7 @@ def filter_content(ctx):
     return ctx
 
 
+
 #---------- feed路由从这里开始 -----------#
 @bp.route('/cninfo/announcement/<string:stock_id>/<string:category>')
 @bp.route('/cninfo/announcement')
@@ -58,7 +59,25 @@ def infoq_recommend():
     from rsshub.spiders.infoq.recommend import ctx
     return render_template('main/atom.xml', **filter_content(ctx()))
 
+
 @bp.route('/dxzg/notice')
 def dxzg_notice():
     from rsshub.spiders.dxzg.notice import ctx
     return render_template('main/atom.xml', **filter_content(ctx()))
+
+
+@bp.route('/earningsdate/prnewswire')
+def earningsdate_prnewswire():
+    from rsshub.spiders.earningsdate.prnewswire import ctx
+    return render_template('main/atom.xml', **filter_content(ctx()))
+
+@bp.route('/earningsdate/globenewswire')
+def earningsdate_globenewswire():
+    from rsshub.spiders.earningsdate.globenewswire import ctx
+    return render_template('main/atom.xml', **filter_content(ctx()))
+
+@bp.route('/earningsdate/businesswire')
+def earningsdate_businesswire():
+    from rsshub.spiders.earningsdate.businesswire import ctx
+    return render_template('main/atom.xml', **filter_content(ctx()))
+
diff --git a/rsshub/spiders/earningsdate/businesswire.py b/rsshub/spiders/earningsdate/businesswire.py
new file mode 100644
index 0000000..2485e44
--- /dev/null
+++ b/rsshub/spiders/earningsdate/businesswire.py
@@ -0,0 +1,24 @@
+from rsshub.utils import fetch, filter_content
+
+domain = 'https://www.businesswire.com'
+
+def parse(post):
+    item = {}
+    item['title'] = post.css('span[itemprop=headline]::text').extract_first().strip()
+    item['description'] = item['title']
+    item['link'] = f"{domain}{post.css('a.bwTitleLink::attr(href)').extract_first()}"
+    item['pubDate'] = post.css('time::text').extract_first().strip()
+    return item
+
+def ctx(category=''):
+    tree = fetch(f"{domain}/portal/site/home/template.PAGE/news/")
+    posts = tree.css('.bwNewsList li')
+    items = list(map(parse, posts)) 
+    items = filter_content(items)
+    return {
+        'title': 'Earnings Date - Prnewswire',
+        'link': f'{domain}/portal/site/home/template.PAGE/news/',
+        'description': 'Earnings Date - Prnewswire',
+        'author': 'hillerliao',
+        'items': items
+    }
\ No newline at end of file
diff --git a/rsshub/spiders/earningsdate/globenewswire.py b/rsshub/spiders/earningsdate/globenewswire.py
new file mode 100644
index 0000000..2659ba5
--- /dev/null
+++ b/rsshub/spiders/earningsdate/globenewswire.py
@@ -0,0 +1,24 @@
+from rsshub.utils import fetch, filter_content
+
+domain = 'https://www.globenewswire.com'
+
+def parse(post):
+    item = {}
+    item['title'] = post.css('title::text').extract_first().strip()
+    item['description'] = post.css('description::text').extract_first().strip(']]>')
+    item['link'] = post.css('guid::text').extract_first()
+    item['pubDate'] = post.css('pubDate::text').extract_first()
+    return item
+
+def ctx(category=''):
+    tree = fetch(f"{domain}/RssFeed/subjectcode/13-Earnings%20Releases%20And%20Operating%20Results/feedTitle/GlobeNewswire%20-%20Earnings%20Releases%20And%20Operating%20Results")
+    posts = tree.css('item')
+    items = list(map(parse, posts)) 
+    items = filter_content(items)
+    return {
+        'title': 'Earnings Date - Globenewswire',
+        'link': f'{domain}/RssFeed/subjectcode/13-Earnings%20Releases%20And%20Operating%20Results/feedTitle/GlobeNewswire%20-%20Earnings%20Releases%20And%20Operating%20Results',
+        'description': 'Earnings Date - Globenewswire',
+        'author': 'hillerliao',
+        'items': items
+    }
\ No newline at end of file
diff --git a/rsshub/spiders/earningsdate/prnewswire.py b/rsshub/spiders/earningsdate/prnewswire.py
new file mode 100644
index 0000000..2c61940
--- /dev/null
+++ b/rsshub/spiders/earningsdate/prnewswire.py
@@ -0,0 +1,24 @@
+from rsshub.utils import fetch, filter_content
+
+domain = 'https://www.prnewswire.com'
+
+def parse(post):
+    item = {}
+    item['title'] = post.css('a::text').extract_first()
+    item['description'] = post.css('p::text').extract_first()
+    item['link'] = f"{domain}{post.css('a::attr(href)').extract_first()}"
+    item['pubDate'] = post.css('small::text').extract_first()
+    return item
+
+def ctx(category=''):
+    tree = fetch(f"{domain}/news-releases/financial-services-latest-news/earnings-list/?page=5&pagesize=100")
+    posts = tree.css('.card-list-hr .col-sm-8')
+    items = list(map(parse, posts)) 
+    items = filter_content(items)
+    return {
+        'title': 'Earnings Date - Prnewswire',
+        'link': f'{domain}/news-releases/financial-services-latest-news/earnings-list/',
+        'description': 'Earnings Date - Prnewswire',
+        'author': 'hillerliao',
+        'items': items
+    }
\ No newline at end of file
diff --git a/rsshub/templates/main/feeds.html b/rsshub/templates/main/feeds.html
index 7010d15..cb2e622 100644
--- a/rsshub/templates/main/feeds.html
+++ b/rsshub/templates/main/feeds.html
@@ -85,4 +85,17 @@
 </div>
 <br>
 <!--item info end-->
+
+<!--item info start-->
+<div class="card text-left">
+  <div class="card-body">
+    <h4 class="card-title">Earnings Date</h4>
+    <h6 class="text-muted">Earnings Date <a href="https://github.com/hillerliao" target="_blank" class="badge badge-secondary">by hillerliao</a></h6>
+    <p class="card-text">举例：<a href="https://rsshub-python.herokuapp.com/earningsdate/businesswire" target="_blank">https://rsshub-python.herokuapp.com/earningsdate/businesswire</a></p>
+    <p class="card-text">路由：<code>/earningsdate/:category</code></p>
+    <p class="card-text">参数：category [必填，可以为“businesswire、globenewswire、prnewswire”]</p>
+  </div>
+</div>
+<br>
+<!--item info end-->
 {% endblock content %}
\ No newline at end of file
diff --git a/rsshub/utils.py b/rsshub/utils.py
index 5cdbbe7..614393e 100644
--- a/rsshub/utils.py
+++ b/rsshub/utils.py
@@ -1,3 +1,4 @@
+import re
 from flask import Response
 import requests
 from parsel import Selector
@@ -23,3 +24,15 @@ def fetch(url: str, headers: dict=DEFAULT_HEADERS, proxies: dict=None):
         html = res.text
         tree = Selector(text=html)
         return tree
+
+def filter_content(items):
+    content = []    
+    p1 = re.compile(r'(.*)(to|will|date|schedule) (.*)results', re.IGNORECASE)
+    p2 = re.compile(r'(.*)(schedule|announce|to) (.*)call', re.IGNORECASE)
+    p3 = re.compile(r'(.*)release (.*)date', re.IGNORECASE)
+
+    for item in items:
+        title = item['title']
+        if p1.match(title) or p2.match(title) or p3.match(title):
+            content.append(item)
+    return content  
\ No newline at end of file