diff --git a/rsshub/spiders/xinhuanet/utils.py b/rsshub/spiders/xinhuanet/utils.py index e5f83a6..89c1bd0 100644 --- a/rsshub/spiders/xinhuanet/utils.py +++ b/rsshub/spiders/xinhuanet/utils.py @@ -1,3 +1,4 @@ +from time import sleep from rsshub.utils import DEFAULT_HEADERS, fetch @@ -5,10 +6,11 @@ def parse_html(post): item = {} item['title'] = post.xpath('text()').extract_first() item['link'] = post.xpath('@href').extract_first() - print(item['link']) + html = fetch(item['link'], headers=DEFAULT_HEADERS) item['description'] = ( - fetch(item['link'], headers=DEFAULT_HEADERS) - .xpath('//div[@id=\'detail\']') + html + .xpath('//div[@id="detail"]') .get() ) - return item + sleep(1) + return item \ No newline at end of file