add xinhuanet (news.cn)

Signed-off-by: icedragon <flyingicedragon@qq.com>
This commit is contained in:
icedragon
2023-05-31 15:07:15 +08:00
parent 64096acd2e
commit a0c40774cf
7 changed files with 146 additions and 1 deletions

View File

@@ -0,0 +1,32 @@
import json
import requests
from rsshub.utils import DEFAULT_HEADERS
domain = 'http://www.news.cn'
def parse(post):
item = {}
item['title'] = post['Title']
item['description'] = post['Abstract']
item['link'] = post['LinkUrl']
return item
def ctx():
url = 'http://da.wa.news.cn/nodeart/page'
posts = requests.get(
url,
params={'nid': '113351', 'pgnum': '1', 'cnt': '20'},
headers=DEFAULT_HEADERS,
).text
posts = json.loads(posts)['data']['list']
return {
'title': '新华网 - 时政联播',
'link': url,
'description': '新华网 - 时政联播',
'author': 'flyingicedragon',
'items': list(map(parse, posts)),
}

View File

@@ -0,0 +1,14 @@
from rsshub.utils import DEFAULT_HEADERS, fetch
def parse_html(post):
item = {}
item['title'] = post.xpath('text()').extract_first()
item['link'] = post.xpath('@href').extract_first()
print(item['link'])
item['description'] = (
fetch(item['link'], headers=DEFAULT_HEADERS)
.xpath('//div[@id=\'detail\']')
.get()
)
return item

View File

@@ -0,0 +1,19 @@
from rsshub.spiders.xinhuanet.utils import parse_html as parse
from rsshub.utils import DEFAULT_HEADERS, fetch
domain = 'http://www.news.cn/world/index.html'
def ctx():
url = f'{domain}'
tree = fetch(url, headers=DEFAULT_HEADERS)
with open('/home/icedragon/tmp.html', 'w') as log:
log.write(tree.getall()[0])
posts = tree.xpath('//div[@id=\'recommendDepth\']//a')
return {
'title': '新华网 - 国际要闻',
'link': url,
'description': '新华网 - 国际要闻',
'author': 'flyingicedragon',
'items': list(map(parse, posts)),
}

View File

@@ -0,0 +1,17 @@
from rsshub.spiders.xinhuanet.utils import parse_html as parse
from rsshub.utils import DEFAULT_HEADERS, fetch
domain = 'http://www.news.cn'
def ctx():
url = f'{domain}'
tree = fetch(url, headers=DEFAULT_HEADERS)
posts = tree.xpath('//div[@id=\'depth\']//li/a')
return {
'title': '新华网 - 要点聚焦',
'link': url,
'description': '新华网 - 要点聚焦',
'author': 'flyingicedragon',
'items': list(map(parse, posts)),
}

View File

@@ -0,0 +1,17 @@
from rsshub.spiders.xinhuanet.utils import parse_html as parse
from rsshub.utils import DEFAULT_HEADERS, fetch
domain = 'http://www.news.cn'
def ctx():
url = f'{domain}'
tree = fetch(url, headers=DEFAULT_HEADERS)
posts = tree.xpath('//div[@id=\'latest\']//li/a')
return {
'title': '新华网 - 最新播报',
'link': url,
'description': '新华网 - 最新播报',
'author': 'flyingicedragon',
'items': list(map(parse, posts)),
}