mirror of
https://github.com/d0zingcat/RSSHub-python.git
synced 2026-05-16 07:26:46 +00:00
43 lines
1.1 KiB
Python
43 lines
1.1 KiB
Python
import re
|
|
import json
|
|
|
|
from bs4 import BeautifulSoup
|
|
import undetected_chromedriver as uc
|
|
|
|
from rsshub.utils import DEFAULT_HEADERS
|
|
|
|
domain = 'https://www.producthunt.com'
|
|
|
|
|
|
def parse(post):
|
|
item = {}
|
|
item['title'] = post['name']
|
|
item['description'] = post['tagline']
|
|
item['link'] = post['url']
|
|
return item
|
|
|
|
|
|
def ctx2(keyword='', period=''):
|
|
DEFAULT_HEADERS.update({'Referer': domain})
|
|
r_url = f'{domain}' + f'/search?q={keyword}&postedAfter={period}:days'
|
|
browser = uc.Chrome()
|
|
browser.get(r_url)
|
|
import time
|
|
time.sleep(3)
|
|
html = browser.page_source
|
|
|
|
soup = BeautifulSoup(html, 'html.parser')
|
|
script = soup.find('script', id='__NEXT_DATA__')
|
|
data = json.loads(script.text)['props']['apolloState']
|
|
browser.quit()
|
|
posts = [ v for k, v in data.items() if k.startswith('Product')]
|
|
|
|
items = list(map(parse, posts))
|
|
|
|
return {
|
|
'title': f'{keyword} - Producthunt',
|
|
'link': r_url,
|
|
'description': f'{keyword} - Producthunt',
|
|
'author': 'hillerliao',
|
|
'items': items
|
|
} |