Files
RSSHub-python/rsshub/spiders/producthunt/search.py
2022-12-15 22:41:42 +08:00

43 lines
1.1 KiB
Python

import re
import json
from bs4 import BeautifulSoup
import undetected_chromedriver as uc
from rsshub.utils import DEFAULT_HEADERS
domain = 'https://www.producthunt.com'
def parse(post):
item = {}
item['title'] = post['name']
item['description'] = post['tagline']
item['link'] = post['url']
return item
def ctx2(keyword='', period=''):
DEFAULT_HEADERS.update({'Referer': domain})
r_url = f'{domain}' + f'/search?q={keyword}&postedAfter={period}:days'
browser = uc.Chrome()
browser.get(r_url)
import time
time.sleep(3)
html = browser.page_source
soup = BeautifulSoup(html, 'html.parser')
script = soup.find('script', id='__NEXT_DATA__')
data = json.loads(script.text)['props']['apolloState']
browser.quit()
posts = [ v for k, v in data.items() if k.startswith('Product')]
items = list(map(parse, posts))
return {
'title': f'{keyword} - Producthunt',
'link': r_url,
'description': f'{keyword} - Producthunt',
'author': 'hillerliao',
'items': items
}