From b9b2fe90dff054c9d8c8391bd2920d1755615aa4 Mon Sep 17 00:00:00 2001 From: Zhihai Liao Date: Sat, 1 Jul 2023 22:23:25 +0000 Subject: [PATCH] fix xpath error in xinhuanet world --- rsshub/spiders/xinhuanet/utils.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/rsshub/spiders/xinhuanet/utils.py b/rsshub/spiders/xinhuanet/utils.py index e5f83a6..89c1bd0 100644 --- a/rsshub/spiders/xinhuanet/utils.py +++ b/rsshub/spiders/xinhuanet/utils.py @@ -1,3 +1,4 @@ +from time import sleep from rsshub.utils import DEFAULT_HEADERS, fetch @@ -5,10 +6,11 @@ def parse_html(post): item = {} item['title'] = post.xpath('text()').extract_first() item['link'] = post.xpath('@href').extract_first() - print(item['link']) + html = fetch(item['link'], headers=DEFAULT_HEADERS) item['description'] = ( - fetch(item['link'], headers=DEFAULT_HEADERS) - .xpath('//div[@id=\'detail\']') + html + .xpath('//div[@id="detail"]') .get() ) - return item + sleep(1) + return item \ No newline at end of file