add xinhuanet (news.cn)

Signed-off-by: icedragon <flyingicedragon@qq.com>
2026-05-15 23:16:48 +00:00 · 2023-05-31 15:07:15 +08:00
parent 64096acd2e
commit a0c40774cf
7 changed files with 146 additions and 1 deletions
--- a/rsshub/spiders/xinhuanet/shizhenglianbo.py
+++ b/rsshub/spiders/xinhuanet/shizhenglianbo.py
@@ -0,0 +1,32 @@
+import json
+
+import requests
+
+from rsshub.utils import DEFAULT_HEADERS
+
+domain = 'http://www.news.cn'
+
+
+def parse(post):
+    item = {}
+    item['title'] = post['Title']
+    item['description'] = post['Abstract']
+    item['link'] = post['LinkUrl']
+    return item
+
+
+def ctx():
+    url = 'http://da.wa.news.cn/nodeart/page'
+    posts = requests.get(
+        url,
+        params={'nid': '113351', 'pgnum': '1', 'cnt': '20'},
+        headers=DEFAULT_HEADERS,
+    ).text
+    posts = json.loads(posts)['data']['list']
+    return {
+        'title': '新华网 - 时政联播',
+        'link': url,
+        'description': '新华网 - 时政联播',
+        'author': 'flyingicedragon',
+        'items': list(map(parse, posts)),
+    }
--- a/rsshub/spiders/xinhuanet/utils.py
+++ b/rsshub/spiders/xinhuanet/utils.py
@@ -0,0 +1,14 @@
+from rsshub.utils import DEFAULT_HEADERS, fetch
+
+
+def parse_html(post):
+    item = {}
+    item['title'] = post.xpath('text()').extract_first()
+    item['link'] = post.xpath('@href').extract_first()
+    print(item['link'])
+    item['description'] = (
+        fetch(item['link'], headers=DEFAULT_HEADERS)
+        .xpath('//div[@id=\'detail\']')
+        .get()
+    )
+    return item
--- a/rsshub/spiders/xinhuanet/world.py
+++ b/rsshub/spiders/xinhuanet/world.py
@@ -0,0 +1,19 @@
+from rsshub.spiders.xinhuanet.utils import parse_html as parse
+from rsshub.utils import DEFAULT_HEADERS, fetch
+
+domain = 'http://www.news.cn/world/index.html'
+
+
+def ctx():
+    url = f'{domain}'
+    tree = fetch(url, headers=DEFAULT_HEADERS)
+    with open('/home/icedragon/tmp.html', 'w') as log:
+        log.write(tree.getall()[0])
+    posts = tree.xpath('//div[@id=\'recommendDepth\']//a')
+    return {
+        'title': '新华网 - 国际要闻',
+        'link': url,
+        'description': '新华网 - 国际要闻',
+        'author': 'flyingicedragon',
+        'items': list(map(parse, posts)),
+    }
--- a/rsshub/spiders/xinhuanet/yaodianjujiao.py
+++ b/rsshub/spiders/xinhuanet/yaodianjujiao.py
@@ -0,0 +1,17 @@
+from rsshub.spiders.xinhuanet.utils import parse_html as parse
+from rsshub.utils import DEFAULT_HEADERS, fetch
+
+domain = 'http://www.news.cn'
+
+
+def ctx():
+    url = f'{domain}'
+    tree = fetch(url, headers=DEFAULT_HEADERS)
+    posts = tree.xpath('//div[@id=\'depth\']//li/a')
+    return {
+        'title': '新华网 - 要点聚焦',
+        'link': url,
+        'description': '新华网 - 要点聚焦',
+        'author': 'flyingicedragon',
+        'items': list(map(parse, posts)),
+    }
--- a/rsshub/spiders/xinhuanet/zuixinbobao.py
+++ b/rsshub/spiders/xinhuanet/zuixinbobao.py
@@ -0,0 +1,17 @@
+from rsshub.spiders.xinhuanet.utils import parse_html as parse
+from rsshub.utils import DEFAULT_HEADERS, fetch
+
+domain = 'http://www.news.cn'
+
+
+def ctx():
+    url = f'{domain}'
+    tree = fetch(url, headers=DEFAULT_HEADERS)
+    posts = tree.xpath('//div[@id=\'latest\']//li/a')
+    return {
+        'title': '新华网 - 最新播报',
+        'link': url,
+        'description': '新华网 - 最新播报',
+        'author': 'flyingicedragon',
+        'items': list(map(parse, posts)),
+    }