scrc audit status crawler

This commit is contained in:
hillerliao
2020-02-29 10:56:43 +08:00
parent 3a34d84693
commit 527bdb190c
3 changed files with 56 additions and 0 deletions

View File

@@ -84,4 +84,9 @@ def earningsdate_businesswire():
@bp.route('/jiemian/newsflash/<string:category>')
def jiemian_newsflash(category=''):
from rsshub.spiders.jiemian.newsflash import ctx
return render_template('main/atom.xml', **filter_content(ctx(category)))
@bp.route('/csrc/audit/<string:category>')
def csrc_audit(category=''):
from rsshub.spiders.csrc.audit import ctx
return render_template('main/atom.xml', **filter_content(ctx(category)))

View File

@@ -0,0 +1,38 @@
import requests
from parsel import Selector
domain = 'https://neris.csrc.gov.cn'
def parse(post):
item = {}
item['title'] = post.css('li.templateTip').css('li::text').extract_first()
audit_status = post.css('td[style="font-weight:100 ;color: black ;position: relative;left:20px"]').css('td::text').extract()
audit_date = post.css('td[style="font-weight:100 ;color:black;position: relative; "]').css('td::text').extract()
description = item['title'] + ''
for i in range(len(audit_status)):
description += '<' + audit_date[i] + ' ' + audit_status[i] + '>\n'
item['title'] += '' + audit_status[-1]
item['description'] = description
item['pubDate'] = audit_date[-1]
return item
def ctx(category=''):
q_url = f"{domain}/alappl/home1/onlinealog.do"
items = []
for i in range(1,4):
q_data = {"appMatrCde": category, "pageNo": str(i), "pageSize": "10"}
res = requests.post(q_url,data=q_data, verify=False)
tree = Selector(res.text)
posts = tree.css('tr[height="50"]')
items.extend(list(map(parse, posts)))
return {
'title': f'申请事项进度查询 - {category} - 中国证监会',
'link': f'{domain}/alappl/home1/onlinealog?appMatrCde={category}',
'description': f'{category} 申请事项进度查询 - 中国证监会',
'author': 'hillerliao',
'items': items
}

View File

@@ -111,4 +111,17 @@
</div>
<br>
<!--item info end-->
<!--item info start-->
<div class="card text-left">
<div class="card-body">
<h4 class="card-title">证监会审核进度</h4>
<h6 class="text-muted">证监会审核进度 <a href="https://github.com/hillerliao" target="_blank" class="badge badge-secondary">by hillerliao</a></h6>
<p class="card-text">举例:<a href="https://rsshub-python.herokuapp.com/csrc/audit/a1d50077cd7f4b15bd1c8d6163f32850" target="_blank">https://rsshub-python.herokuapp.com/csrc/audit/a1d50077cd7f4b15bd1c8d6163f32850</a></p>
<p class="card-text">路由:<code>/csrc/audit/:category</code></p>
<p class="card-text">参数category [必填,见证监会栏目 https://neris.csrc.gov.cn/alappl/home/gongshi]</p>
</div>
</div>
<br>
<!--item info end-->
{% endblock content %}