fix xpath error in xinhuanet world

This commit is contained in:
Zhihai Liao
2023-07-01 22:23:25 +00:00
parent fec81fea05
commit b9b2fe90df

View File

@@ -1,3 +1,4 @@
from time import sleep
from rsshub.utils import DEFAULT_HEADERS, fetch
@@ -5,10 +6,11 @@ def parse_html(post):
item = {}
item['title'] = post.xpath('text()').extract_first()
item['link'] = post.xpath('@href').extract_first()
print(item['link'])
html = fetch(item['link'], headers=DEFAULT_HEADERS)
item['description'] = (
fetch(item['link'], headers=DEFAULT_HEADERS)
.xpath('//div[@id=\'detail\']')
html
.xpath('//div[@id="detail"]')
.get()
)
return item
sleep(1)
return item