Files
RSSHub-python/rsshub/utils.py
2020-02-05 22:57:23 +08:00

38 lines
1.3 KiB
Python

import re
from flask import Response
import requests
from parsel import Selector
DEFAULT_HEADERS = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'}
class XMLResponse(Response):
def __init__(self, response, **kwargs):
if 'mimetype' not in kwargs and 'contenttype' not in kwargs:
if response.startswith('<?xml'):
kwargs['mimetype'] = 'application/xml'
return super().__init__(response, **kwargs)
def fetch(url: str, headers: dict=DEFAULT_HEADERS, proxies: dict=None):
try:
res = requests.get(url, headers=headers, proxies=proxies)
res.raise_for_status()
except Exception as e:
print(f'[Err] {e}')
else:
html = res.text
tree = Selector(text=html)
return tree
def filter_content(items):
content = []
p1 = re.compile(r'(.*)(to|will|date|schedule) (.*)results', re.IGNORECASE)
p2 = re.compile(r'(.*)(schedule|announce|to) (.*)call', re.IGNORECASE)
p3 = re.compile(r'(.*)release (.*)date', re.IGNORECASE)
for item in items:
title = item['title']
if p1.match(title) or p2.match(title) or p3.match(title):
content.append(item)
return content