mirror of
https://github.com/d0zingcat/RSSHub-python.git
synced 2026-06-13 15:10:53 +00:00
Update worldbrief.py
This commit is contained in:
@@ -1,27 +1,41 @@
|
||||
import re
|
||||
import json
|
||||
from bs4 import BeautifulSoup
|
||||
from rsshub.utils import DEFAULT_HEADERS
|
||||
from rsshub.utils import fetch
|
||||
|
||||
domain = 'https://www.economist.com'
|
||||
|
||||
def parse(post):
|
||||
def parse_gobbet(gobbet):
|
||||
item = {}
|
||||
item['title'] = post.css('div').css('p').get()
|
||||
item['description'] = item['title']
|
||||
item['title'] = re.sub(r'<[^>]*>', '', item['title']).strip()
|
||||
item['link'] = f"{domain}/the-world-in-brief" + '?from=' + item['title'][:30]
|
||||
# Remove HTML tags but keep the text
|
||||
item['title'] = BeautifulSoup(gobbet, 'html.parser').get_text()
|
||||
item['description'] = gobbet # Keep HTML formatting for description
|
||||
item['link'] = f"{domain}/the-world-in-brief?from={item['title'][:30]}"
|
||||
return item
|
||||
|
||||
def ctx(category=''):
|
||||
url = f"{domain}/the-world-in-brief"
|
||||
tree = fetch(url,headers=DEFAULT_HEADERS)
|
||||
posts = tree.css('._gobbet')
|
||||
html = fetch(url, headers=DEFAULT_HEADERS).get()
|
||||
|
||||
# Find the __NEXT_DATA__ script
|
||||
match = re.search(r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>', html)
|
||||
if not match:
|
||||
return {
|
||||
'title': 'World Brief - Economist',
|
||||
'link': url,
|
||||
'description': 'The world in brief: Catch up quickly on the global stories that matter',
|
||||
'author': 'hillerliao',
|
||||
'items': []
|
||||
}
|
||||
|
||||
data = json.loads(match.group(1))
|
||||
gobbets = data.get('props', {}).get('pageProps', {}).get('content', {}).get('gobbets', [])
|
||||
|
||||
return {
|
||||
'title': f'World Brief - Economist',
|
||||
'title': 'World Brief - Economist',
|
||||
'link': url,
|
||||
'description': f'The world in brief: Catch up quickly on the global stories that matter',
|
||||
'description': 'The world in brief: Catch up quickly on the global stories that matter',
|
||||
'author': 'hillerliao',
|
||||
'items': list(map(parse, posts))
|
||||
'items': list(map(parse_gobbet, gobbets))
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user