remove html tag from infoq search result

This commit is contained in:
hillerliao
2022-10-22 11:26:59 +08:00
parent 27e7d2ee5f
commit 296287d11b

View File

@@ -1,5 +1,6 @@
import requests
import json
import re
from urllib.parse import unquote
from rsshub.utils import DEFAULT_HEADERS
from rsshub.utils import fetch
@@ -9,8 +10,8 @@ domain = 'https://s.geekbang.org'
def parse(post):
item = {}
item['title'] = post['title']
item['description'] = post['simple_content']
item['title'] = re.sub(r'<[^>]*>', '', post['title']).strip()
item['description'] = re.sub(r'<[^>]*>', '', post['simple_content']).strip()
item['link'] = post['content_url']
item['author'] = post['author']
item['pubDate'] = post['release_time']