RSSHub-python/rsshub/spiders/producthunt/search.py

import re
import json

from bs4 import BeautifulSoup
import undetected_chromedriver as uc

from rsshub.utils import DEFAULT_HEADERS

domain = 'https://www.producthunt.com'


def parse(post):
    item = {}
    item['title'] = post['name']
    item['description'] = post['tagline']
    item['link'] = post['url']
    return item


def ctx2(keyword='', period=''):
    DEFAULT_HEADERS.update({'Referer': domain})
    r_url = f'{domain}' + f'/search?q={keyword}&postedAfter={period}:days'
    browser = uc.Chrome()
    browser.get(r_url)
    import time
    time.sleep(3)
    html = browser.page_source

    soup = BeautifulSoup(html, 'html.parser')
    script = soup.find('script', id='__NEXT_DATA__')
    data = json.loads(script.text)['props']['apolloState']
    browser.quit()
    posts = [ v for k, v in data.items() if k.startswith('Product')]

    items = list(map(parse, posts))

    return {
        'title': f'{keyword} - Producthunt',
        'link': r_url,
        'description': f'{keyword} - Producthunt',
        'author': 'hillerliao',
        'items': items
    }