RSSHub-python/rsshub/spiders/sysu/ifcen.py

from rsshub.utils import fetch_by_puppeteer
import asyncio

domain = 'https://ifcen.sysu.edu.cn/'


def parse(selector):

    items = list()

    # 公告通知
    xpath = '//div[@id="news-2"]/ul//a'
    announces = selector.xpath(xpath + '/text()').getall()
    urls = selector.xpath(xpath + '/@href').getall()
    urls = [domain + i for i in urls]
    announces = ['公告通知 | ' + i for i in announces]
    for i in range(len(announces)):
        item = dict()
        item['title'] = announces[i]
        item['description'] = "网站严格反爬，请进入网站查看具体内容"
        item['link'] = urls[i]
        items.append(item)

    # 学院新闻
    xpath = '//*[@id="news-1"]/ul/li/a'
    news = selector.xpath(xpath + '/text()').getall()
    urls = selector.xpath(xpath + '/@href').getall()
    urls = [domain + i for i in urls]
    for i in range(len(news)):
        item = dict()
        item['title'] = news[i]
        item['description'] = "网站严格反爬，请进入网站查看具体内容"
        item['link'] = urls[i]
        items.append(item)

    # 人才工作
    xpath = '//*[@id="notice-1"]/div//a'
    works = selector.xpath(xpath + '/text()').getall()
    urls = selector.xpath(xpath + '/@href').getall()
    urls = [domain + i for i in urls]
    works = ['人才工作 | ' + i for i in works]
    for i in range(len(works)):
        item = dict()
        item['title'] = works[i]
        item['description'] = "网站严格反爬，请进入网站查看具体内容"
        item['link'] = urls[i]
        items.append(item)

    # 本科生教育
    xpath = '//*[@id="notice-2"]/div//a'
    ues = selector.xpath(xpath + '/text()').getall()
    urls = selector.xpath(xpath + '/@href').getall()
    urls = [domain + i for i in urls]
    ues = ['本科生教育 | ' + i for i in ues]
    for i in range(len(ues)):
        item = dict()
        item['title'] = ues[i]
        item['description'] = "网站严格反爬，请进入网站查看具体内容"
        item['link'] = urls[i]
        items.append(item)

    # 研究生教育
    xpath = '//*[@id="notice-3"]/div//a'
    pgs = selector.xpath(xpath + '/text()').getall()
    urls = selector.xpath(xpath + '/@href').getall()
    urls = [domain + i for i in urls]
    pgs = ['研究生教育 | ' + i for i in pgs]
    for i in range(len(pgs)):
        item = dict()
        item['title'] = pgs[i]
        item['description'] = "网站严格反爬，请进入网站查看具体内容"
        item['link'] = urls[i]
        items.append(item)

    # 科研信息
    xpath = '//*[@id="notice-4"]/div//a'
    research = selector.xpath(xpath + '/text()').getall()
    urls = selector.xpath(xpath + '/@href').getall()
    urls = [domain + i for i in urls]
    research = ['科研信息 | ' + i for i in research]
    for i in range(len(research)):
        item = dict()
        item['title'] = research[i]
        item['description'] = "网站严格反爬，请进入网站查看具体内容"
        item['link'] = urls[i]
        items.append(item)

    # 学工信息
    xpath = '//*[@id="notice-5"]/div//a'
    students = selector.xpath(xpath + '/text()').getall()
    urls = selector.xpath(xpath + '/@href').getall()
    urls = [domain + i for i in urls]
    students = ['学工信息 | ' + i for i in students]
    for i in range(len(students)):
        item = dict()
        item['title'] = students[i]
        item['description'] = "网站严格反爬，请进入网站查看具体内容"
        item['link'] = urls[i]
        items.append(item)

    # 党建通知
    xpath = '//*[@id="notice-6"]/div//a'
    party = selector.xpath(xpath + '/text()').getall()
    urls = selector.xpath(xpath + '/@href').getall()
    urls = [domain + i for i in urls]
    party = ['党建通知 | ' + i for i in party]
    for i in range(len(party)):
        item = dict()
        item['title'] = party[i]
        item['description'] = "网站严格反爬，请进入网站查看具体内容"
        item['link'] = urls[i]
        items.append(item)

    # 工会工作
    xpath = '//*[@id="notice-7"]/div//a'
    union = selector.xpath(xpath + '/text()').getall()
    urls = selector.xpath(xpath + '/@href').getall()
    urls = [domain + i for i in urls]
    union = ['工会工作 | ' + i for i in union]
    for i in range(len(union)):
        item = dict()
        item['title'] = union[i]
        item['description'] = "网站严格反爬，请进入网站查看具体内容"
        item['link'] = urls[i]
        items.append(item)

    xpath = '//*[@id="event-1"]/li//a'
    report = selector.xpath(xpath + '/text()').getall()
    author = selector.xpath('//*[@id="event-1"]/li//span[@class="content"]/text()').getall()
    urls = selector.xpath(xpath + '/@href').getall()
    urls = [domain + i for i in urls]
    for i in range(len(report)) :
        report[i] = report[i] + author[i]
    report = ['学术报告 | ' + i for i in report]
    for i in range(len(report)):
        item = dict()
        item['title'] = report[i]
        item['description'] = "网站严格反爬，请进入网站查看具体内容"
        item['link'] = urls[i]
        items.append(item)

    return items

def ctx(category=''):
    tree = asyncio.run(fetch_by_puppeteer(domain))
    return {
        'title': '中山大学中法核官网信息',
        'link': domain,
        'description': '中山大学中法核官网通知公告',
        'author': 'echo',
        'items': parse(tree)
    }