diff --git a/rsshub/blueprints/main.py b/rsshub/blueprints/main.py index d9a5aa7..7664c67 100644 --- a/rsshub/blueprints/main.py +++ b/rsshub/blueprints/main.py @@ -248,6 +248,11 @@ def economist_wordlbrief(category=''): from rsshub.spiders.economist.worldbrief import ctx return render_template('main/atom.xml', **filter_content(ctx(category))) +@bp.route('/futu/live/') +def futu_live(lang=''): + from rsshub.spiders.futu.live import ctx + return render_template('main/atom.xml', **filter_content(ctx(lang))) + @bp.route('/baidu/suggest/') def baidu_suggest(category=''): from rsshub.spiders.baidu.suggest import ctx diff --git a/rsshub/spiders/economist/worldbrief.py b/rsshub/spiders/economist/worldbrief.py index 85bb704..616590f 100644 --- a/rsshub/spiders/economist/worldbrief.py +++ b/rsshub/spiders/economist/worldbrief.py @@ -20,10 +20,10 @@ def parse_news(gobbet): """ 生成单条 news 的新闻内容,提取标题和正文。 """ - title = gobbet.strip() + title = re.sub(r'<[^>]+>', '', gobbet.strip()) item = { 'title': title, - 'description': title, # 简单设置正文为描述 + 'description': gobbet, # 简单设置正文为描述 'link': f"{domain}/the-world-in-brief?from={title[:30]}" # 生成链接 } return item @@ -32,7 +32,7 @@ def ctx(category=''): """ 解析 JSON 数据,提取所有brief news的内容。 """ - url = f"{domain}/" + url = f"{domain}/the-world-in-brief" html = fetch(url, headers=DEFAULT_HEADERS).get() soup = BeautifulSoup(html, 'html.parser') script_tag = soup.find('script', id="__NEXT_DATA__", type="application/json") @@ -43,15 +43,10 @@ def ctx(category=''): # Load JSON content data = json.loads(script_tag.string) - news_list = data.get('props', {}).get('pageProps', {}).get('worldInBrief', {}).get('text', [])[:-2] - - news_list_new = [] - for item in news_list: - if item['type'] == 'tag' and item['name'] == 'p': # 确保是段落 - news_list_new.append(extract_text(item['children'])) + news_list = data.get('props', {}).get('pageProps', {}).get('content', {}).get('gobbets', []) # 使用 parse_gobbet 解析每一条新闻 - items = [parse_news(news) for news in news_list_new] + items = [parse_news(news) for news in news_list] return { 'title': 'World Brief - Economist', diff --git a/rsshub/spiders/futu/live.py b/rsshub/spiders/futu/live.py new file mode 100644 index 0000000..ebb8cab --- /dev/null +++ b/rsshub/spiders/futu/live.py @@ -0,0 +1,49 @@ +import re +import json +import requests +from datetime import datetime +from rsshub.utils import DEFAULT_HEADERS + +domain = 'https://news.futunn.com' + +def parse_news(news): + title = news.get('content', '') if news.get('title', '')=='' else news.get('title', '') + + content = news.get('content', '') + detail_url = news.get('detailUrl', '') + time = datetime.utcfromtimestamp(int(news['time'])).strftime('%Y-%m-%dT%H:%M:%SZ') + + item = { + 'title': title, + 'description': content, + 'link': detail_url, + 'pubDate': time + } + + return item + +def ctx(lang=''): + """ + 解析 JSON 数据,提取所有live news的内容。 + """ + url = f"{domain}/news-site-api/main/get-flash-list?pageSize=50&lang={lang}" + response = requests.get(url, headers=DEFAULT_HEADERS) + data = response.json() + + # 检查数据是否有效 + if data['code'] != 0 or not data['data']['data']['news']: + return Response("No data available", mimetype='text/plain') + + news_list = data.get('data', {}).get('data', {}).get('news', []) + print(news_list) + + # 使用 parse_gobbet 解析每一条新闻 + items = [parse_news(news) for news in news_list] + + return { + 'title': 'Futunn Live News', + 'link': url, + 'description': 'Futunn Live News', + 'author': 'hillerliao', + 'items': items + }