From d63ccd114b92325e76f6f3ca443ac931067b7c01 Mon Sep 17 00:00:00 2001
From: Zhihai Liao <hillerliao@163.com>
Date: Tue, 10 Dec 2024 00:09:11 +0800
Subject: [PATCH] Update worldbrief.py

---
 rsshub/spiders/economist/worldbrief.py | 36 ++++++++++++++++++--------
 1 file changed, 25 insertions(+), 11 deletions(-)
diff --git a/rsshub/spiders/economist/worldbrief.py b/rsshub/spiders/economist/worldbrief.py
index d1803a8..0e84885 100644
--- a/rsshub/spiders/economist/worldbrief.py
+++ b/rsshub/spiders/economist/worldbrief.py
@@ -1,27 +1,41 @@
 import re
+import json
+from bs4 import BeautifulSoup
 from rsshub.utils import DEFAULT_HEADERS
 from rsshub.utils import fetch
 
 domain = 'https://www.economist.com'
 
-def parse(post):
+def parse_gobbet(gobbet):
     item = {}
-    item['title'] = post.css('div').css('p').get()
-    item['description'] = item['title'] 
-    item['title'] = re.sub(r'<[^>]*>', '', item['title']).strip()
-    item['link'] =  f"{domain}/the-world-in-brief" + '?from=' + item['title'][:30] 
+    # Remove HTML tags but keep the text
+    item['title'] = BeautifulSoup(gobbet, 'html.parser').get_text()
+    item['description'] = gobbet  # Keep HTML formatting for description
+    item['link'] = f"{domain}/the-world-in-brief?from={item['title'][:30]}"
     return item
 
 def ctx(category=''):
     url = f"{domain}/the-world-in-brief"
-    tree = fetch(url,headers=DEFAULT_HEADERS)
-    posts = tree.css('._gobbet')
+    html = fetch(url, headers=DEFAULT_HEADERS).get()
+    
+    # Find the __NEXT_DATA__ script
+    match = re.search(r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>', html)
+    if not match:
+        return {
+            'title': 'World Brief - Economist',
+            'link': url,
+            'description': 'The world in brief: Catch up quickly on the global stories that matter',
+            'author': 'hillerliao',
+            'items': []
+        }
+    
+    data = json.loads(match.group(1))
+    gobbets = data.get('props', {}).get('pageProps', {}).get('content', {}).get('gobbets', [])
     
     return {
-        'title': f'World Brief - Economist',
+        'title': 'World Brief - Economist',
         'link': url,
-        'description': f'The world in brief: Catch up quickly on the global stories that matter',
+        'description': 'The world in brief: Catch up quickly on the global stories that matter',
         'author': 'hillerliao',
-        'items': list(map(parse, posts)) 
+        'items': list(map(parse_gobbet, gobbets))
     }
-