From 527bdb190c0681c4a759900bf5ad1c7d6425575e Mon Sep 17 00:00:00 2001 From: hillerliao Date: Sat, 29 Feb 2020 10:56:43 +0800 Subject: [PATCH] scrc audit status crawler --- rsshub/blueprints/main.py | 5 +++++ rsshub/spiders/csrc/audit.py | 38 ++++++++++++++++++++++++++++++++ rsshub/templates/main/feeds.html | 13 +++++++++++ 3 files changed, 56 insertions(+) create mode 100644 rsshub/spiders/csrc/audit.py diff --git a/rsshub/blueprints/main.py b/rsshub/blueprints/main.py index 83d4908..46da12f 100644 --- a/rsshub/blueprints/main.py +++ b/rsshub/blueprints/main.py @@ -84,4 +84,9 @@ def earningsdate_businesswire(): @bp.route('/jiemian/newsflash/') def jiemian_newsflash(category=''): from rsshub.spiders.jiemian.newsflash import ctx + return render_template('main/atom.xml', **filter_content(ctx(category))) + +@bp.route('/csrc/audit/') +def csrc_audit(category=''): + from rsshub.spiders.csrc.audit import ctx return render_template('main/atom.xml', **filter_content(ctx(category))) \ No newline at end of file diff --git a/rsshub/spiders/csrc/audit.py b/rsshub/spiders/csrc/audit.py new file mode 100644 index 0000000..2e1f852 --- /dev/null +++ b/rsshub/spiders/csrc/audit.py @@ -0,0 +1,38 @@ +import requests +from parsel import Selector + +domain = 'https://neris.csrc.gov.cn' + + +def parse(post): + item = {} + item['title'] = post.css('li.templateTip').css('li::text').extract_first() + audit_status = post.css('td[style="font-weight:100 ;color: black ;position: relative;left:20px"]').css('td::text').extract() + audit_date = post.css('td[style="font-weight:100 ;color:black;position: relative; "]').css('td::text').extract() + + description = item['title'] + ';' + for i in range(len(audit_status)): + description += '<' + audit_date[i] + ' ' + audit_status[i] + '>\n' + + item['title'] += ',' + audit_status[-1] + item['description'] = description + item['pubDate'] = audit_date[-1] + return item + + +def ctx(category=''): + q_url = f"{domain}/alappl/home1/onlinealog.do" + items = [] + for i in range(1,4): + q_data = {"appMatrCde": category, "pageNo": str(i), "pageSize": "10"} + res = requests.post(q_url,data=q_data, verify=False) + tree = Selector(res.text) + posts = tree.css('tr[height="50"]') + items.extend(list(map(parse, posts))) + return { + 'title': f'申请事项进度查询 - {category} - 中国证监会', + 'link': f'{domain}/alappl/home1/onlinealog?appMatrCde={category}', + 'description': f'{category} 申请事项进度查询 - 中国证监会', + 'author': 'hillerliao', + 'items': items + } diff --git a/rsshub/templates/main/feeds.html b/rsshub/templates/main/feeds.html index 14cc221..c23112b 100644 --- a/rsshub/templates/main/feeds.html +++ b/rsshub/templates/main/feeds.html @@ -111,4 +111,17 @@
+ + +
+
+

证监会审核进度

+
证监会审核进度 by hillerliao
+

举例:https://rsshub-python.herokuapp.com/csrc/audit/a1d50077cd7f4b15bd1c8d6163f32850

+

路由:/csrc/audit/:category

+

参数:category [必填,见证监会栏目 https://neris.csrc.gov.cn/alappl/home/gongshi]

+
+
+
+ {% endblock content %} \ No newline at end of file