add host to request header for jintiankansha

This commit is contained in:
hillerliao
2020-04-17 20:09:19 +08:00
parent 419695fc7e
commit c56d37a6a7
2 changed files with 2 additions and 1 deletions

View File

@@ -22,7 +22,7 @@ def ctx(category=''):
# req_params = {'pageSize': '10','startDate':'-1', 'keyword': category, 'pageIndex': '1'}
# posts = requests.post(url, \
# data=req_params, headers=DEFAULT_HEADERS)
req_params = f'?keyword={category}&pageSize=100'
req_params = f'?keyword={category}&pageSize=150'
posts = requests.get(url+req_params)
import json
posts = json.loads(posts.text)['resultSet']

View File

@@ -13,6 +13,7 @@ def parse(post):
def ctx(category=''):
url = f'{domain}/column/{category}'
DEFAULT_HEADERS.update({'Host': 'www.jintiankansha.me'})
tree = fetch(url, headers=DEFAULT_HEADERS)
posts = tree.css('.cell.item')
items = list(map(parse, posts))