Update worldbrief.py

2026-06-13 15:10:53 +00:00 · 2024-12-10 00:09:11 +08:00
parent a617c6991a
commit d63ccd114b
1 changed files with 25 additions and 11 deletions
--- a/rsshub/spiders/economist/worldbrief.py
+++ b/rsshub/spiders/economist/worldbrief.py
@@ -1,27 +1,41 @@
 import re
+import json
+from bs4 import BeautifulSoup
 from rsshub.utils import DEFAULT_HEADERS
 from rsshub.utils import fetch

 domain = 'https://www.economist.com'

-def parse(post):
+def parse_gobbet(gobbet):
    item = {}
-    item['title'] = post.css('div').css('p').get()
-    item['description'] = item['title'] 
-    item['title'] = re.sub(r'<[^>]*>', '', item['title']).strip()
-    item['link'] =  f"{domain}/the-world-in-brief" + '?from=' + item['title'][:30] 
+    # Remove HTML tags but keep the text
+    item['title'] = BeautifulSoup(gobbet, 'html.parser').get_text()
+    item['description'] = gobbet  # Keep HTML formatting for description
+    item['link'] = f"{domain}/the-world-in-brief?from={item['title'][:30]}"
    return item

 def ctx(category=''):
    url = f"{domain}/the-world-in-brief"
-    tree = fetch(url,headers=DEFAULT_HEADERS)
-    posts = tree.css('._gobbet')
+    html = fetch(url, headers=DEFAULT_HEADERS).get()
+    
+    # Find the __NEXT_DATA__ script
+    match = re.search(r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>', html)
+    if not match:
+        return {
+            'title': 'World Brief - Economist',
+            'link': url,
+            'description': 'The world in brief: Catch up quickly on the global stories that matter',
+            'author': 'hillerliao',
+            'items': []
+        }
+    
+    data = json.loads(match.group(1))
+    gobbets = data.get('props', {}).get('pageProps', {}).get('content', {}).get('gobbets', [])
    
    return {
-        'title': f'World Brief - Economist',
+        'title': 'World Brief - Economist',
        'link': url,
-        'description': f'The world in brief: Catch up quickly on the global stories that matter',
+        'description': 'The world in brief: Catch up quickly on the global stories that matter',
        'author': 'hillerliao',
-        'items': list(map(parse, posts)) 
+        'items': list(map(parse_gobbet, gobbets))
    }
-