Merge pull request #10 from flyingicedragon/master

Add xinhuanet (news.cn)
2026-05-14 15:09:23 +00:00 · 2023-07-01 18:01:01 +08:00
parent 64096acd2e a0c40774cf
commit 341bfee02d
7 changed files with 146 additions and 1 deletions
--- a/rsshub/blueprints/main.py
+++ b/rsshub/blueprints/main.py
@@ -237,7 +237,32 @@ def mp_gh(gh=''):
@bp.route('/mp/youwuqiong/<string:author>')
 def mp_youwuqiong(author=''):
    from rsshub.spiders.mp.youwuqiong import ctx
-    return render_template('main/atom.xml', **filter_content(ctx(author)))        
+    return render_template('main/atom.xml', **filter_content(ctx(author)))
+
+
+@bp.route('/xinhuanet/zuixinbobao')
+def xinhuanet_zuixinbobao():
+    from rsshub.spiders.xinhuanet.zuixinbobao import ctx
+    return render_template('main/atom.xml', **filter_content(ctx()))
+
+
+@bp.route('/xinhuanet/shizhenglianbo')
+def xinhuanet_shizhenglianbo():
+    from rsshub.spiders.xinhuanet.shizhenglianbo import ctx
+    return render_template('main/atom.xml', **filter_content(ctx()))
+
+
+@bp.route('/xinhuanet/yaodianjujiao')
+def xinhuanet_yaodianjujiao():
+    from rsshub.spiders.xinhuanet.yaodianjujiao import ctx
+    return render_template('main/atom.xml', **filter_content(ctx()))
+
+
+@bp.route('/xinhuanet/world')
+def xinhuanet_world():
+    from rsshub.spiders.xinhuanet.world import ctx
+    return render_template('main/atom.xml', **filter_content(ctx()))
+

@bp.route('/yfchuhai/express/')
 def yfchuhai_express():
--- a/rsshub/spiders/xinhuanet/shizhenglianbo.py
+++ b/rsshub/spiders/xinhuanet/shizhenglianbo.py
@@ -0,0 +1,32 @@
+import json
+
+import requests
+
+from rsshub.utils import DEFAULT_HEADERS
+
+domain = 'http://www.news.cn'
+
+
+def parse(post):
+    item = {}
+    item['title'] = post['Title']
+    item['description'] = post['Abstract']
+    item['link'] = post['LinkUrl']
+    return item
+
+
+def ctx():
+    url = 'http://da.wa.news.cn/nodeart/page'
+    posts = requests.get(
+        url,
+        params={'nid': '113351', 'pgnum': '1', 'cnt': '20'},
+        headers=DEFAULT_HEADERS,
+    ).text
+    posts = json.loads(posts)['data']['list']
+    return {
+        'title': '新华网 - 时政联播',
+        'link': url,
+        'description': '新华网 - 时政联播',
+        'author': 'flyingicedragon',
+        'items': list(map(parse, posts)),
+    }
--- a/rsshub/spiders/xinhuanet/utils.py
+++ b/rsshub/spiders/xinhuanet/utils.py
@@ -0,0 +1,14 @@
+from rsshub.utils import DEFAULT_HEADERS, fetch
+
+
+def parse_html(post):
+    item = {}
+    item['title'] = post.xpath('text()').extract_first()
+    item['link'] = post.xpath('@href').extract_first()
+    print(item['link'])
+    item['description'] = (
+        fetch(item['link'], headers=DEFAULT_HEADERS)
+        .xpath('//div[@id=\'detail\']')
+        .get()
+    )
+    return item
--- a/rsshub/spiders/xinhuanet/world.py
+++ b/rsshub/spiders/xinhuanet/world.py
@@ -0,0 +1,19 @@
+from rsshub.spiders.xinhuanet.utils import parse_html as parse
+from rsshub.utils import DEFAULT_HEADERS, fetch
+
+domain = 'http://www.news.cn/world/index.html'
+
+
+def ctx():
+    url = f'{domain}'
+    tree = fetch(url, headers=DEFAULT_HEADERS)
+    with open('/home/icedragon/tmp.html', 'w') as log:
+        log.write(tree.getall()[0])
+    posts = tree.xpath('//div[@id=\'recommendDepth\']//a')
+    return {
+        'title': '新华网 - 国际要闻',
+        'link': url,
+        'description': '新华网 - 国际要闻',
+        'author': 'flyingicedragon',
+        'items': list(map(parse, posts)),
+    }
--- a/rsshub/spiders/xinhuanet/yaodianjujiao.py
+++ b/rsshub/spiders/xinhuanet/yaodianjujiao.py
@@ -0,0 +1,17 @@
+from rsshub.spiders.xinhuanet.utils import parse_html as parse
+from rsshub.utils import DEFAULT_HEADERS, fetch
+
+domain = 'http://www.news.cn'
+
+
+def ctx():
+    url = f'{domain}'
+    tree = fetch(url, headers=DEFAULT_HEADERS)
+    posts = tree.xpath('//div[@id=\'depth\']//li/a')
+    return {
+        'title': '新华网 - 要点聚焦',
+        'link': url,
+        'description': '新华网 - 要点聚焦',
+        'author': 'flyingicedragon',
+        'items': list(map(parse, posts)),
+    }
--- a/rsshub/spiders/xinhuanet/zuixinbobao.py
+++ b/rsshub/spiders/xinhuanet/zuixinbobao.py
@@ -0,0 +1,17 @@
+from rsshub.spiders.xinhuanet.utils import parse_html as parse
+from rsshub.utils import DEFAULT_HEADERS, fetch
+
+domain = 'http://www.news.cn'
+
+
+def ctx():
+    url = f'{domain}'
+    tree = fetch(url, headers=DEFAULT_HEADERS)
+    posts = tree.xpath('//div[@id=\'latest\']//li/a')
+    return {
+        'title': '新华网 - 最新播报',
+        'link': url,
+        'description': '新华网 - 最新播报',
+        'author': 'flyingicedragon',
+        'items': list(map(parse, posts)),
+    }
--- a/rsshub/templates/main/feeds.html
+++ b/rsshub/templates/main/feeds.html
@@ -538,6 +538,27 @@
 <br>
 <!--item info end-->

+<!--item info start-->
+<div class="card text-left">
+    <div class="card-body">
+        <h4 class="card-title">新华网</h4>
+        <h6 class="text-muted">新华网 - 最新播报<a href="https://github.com/flyingicedragon" target="_blank" class="badge badge-secondary">by flyingicedragon</a></h6>
+        <p class="card-text">举例：<a href="https://pyrsshub.vercel.app/xinhuanet/zuixinbobao" target="_blank">https://pyrsshub.vercel.app/xinhuanet/zuixinbobao</a></p>
+        <p class="card-text">路由：<code>/xinhuanet/zuixinbobao</code></p>
+        <h6 class="text-muted">新华网 - 时政联播<a href="https://github.com/flyingicedragon" target="_blank" class="badge badge-secondary">by flyingicedragon</a></h6>
+        <p class="card-text">举例：<a href="https://pyrsshub.vercel.app/xinhuanet/shizhenglianbo" target="_blank">https://pyrsshub.vercel.app/xinhuanet/shizhenglianbo</a></p>
+        <p class="card-text">路由：<code>/xinhuanet/shizhenglianbo</code></p>
+        <h6 class="text-muted">新华网 - 要点聚焦<a href="https://github.com/flyingicedragon" target="_blank" class="badge badge-secondary">by flyingicedragon</a></h6>
+        <p class="card-text">举例：<a href="https://pyrsshub.vercel.app/xinhuanet/yaodianjujiao" target="_blank">https://pyrsshub.vercel.app/xinhuanet/yaodianjujiao</a></p>
+        <p class="card-text">路由：<code>/xinhuanet/yaodianjujiao</code></p>
+        <h6 class="text-muted">新华网 - 时政联播<a href="https://github.com/flyingicedragon" target="_blank" class="badge badge-secondary">by flyingicedragon</a></h6>
+        <p class="card-text">举例：<a href="https://pyrsshub.vercel.app/xinhuanet/world" target="_blank">https://pyrsshub.vercel.app/xinhuanet/world</a></p>
+        <p class="card-text">路由：<code>/xinhuanet/world</code></p>
+    </div>
+</div>
+<br>
+<!--item info end-->
+
 <!--item info start-->
 <div class="card text-left">
    <div class="card-body">