diff --git a/app/modules/indexer/parser/nexus_php.py b/app/modules/indexer/parser/nexus_php.py index 78ec2b8f..a03558b4 100644 --- a/app/modules/indexer/parser/nexus_php.py +++ b/app/modules/indexer/parser/nexus_php.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import re from typing import Optional +from urllib.parse import parse_qsl, urlencode, urlsplit, urlunsplit from lxml import etree @@ -10,6 +11,10 @@ from app.utils.string import StringUtils class NexusPhpSiteUserInfo(SiteParserBase): + """ + NexusPHP 站点用户信息解析器。 + """ + schema = SiteSchema.NexusPhp def _parse_site_page(self, html_text: str): @@ -233,14 +238,34 @@ class NexusPhpSiteUserInfo(SiteParserBase): # fix up page url if next_page: - if self.userid not in next_page: - next_page = f'{next_page}&userid={self.userid}&type=seeding' + next_page = self._fixup_next_page_url(next_page, self.userid) finally: if html is not None: del html return next_page + @staticmethod + def _fixup_next_page_url(next_page: str, userid: Optional[str]) -> Optional[str]: + """ + 修正做种下一页地址,无法补齐用户 ID 时停止翻页。 + + :param next_page: 页面中解析出的下一页地址 + :param userid: 当前站点用户 ID + :return: 修正后的下一页地址,无法构造时返回 None + """ + parsed_url = urlsplit(next_page) + query_params = dict(parse_qsl(parsed_url.query, keep_blank_values=True)) + + if query_params.get("userid"): + return next_page + if not userid: + return None + + query_params["userid"] = userid + query_params.setdefault("type", "seeding") + return urlunsplit(parsed_url._replace(query=urlencode(query_params))) + def _parse_user_detail_info(self, html_text: str): """ 解析用户额外信息,加入时间,等级 diff --git a/tests/test_nexus_php_parser.py b/tests/test_nexus_php_parser.py new file mode 100644 index 00000000..6ce16528 --- /dev/null +++ b/tests/test_nexus_php_parser.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- +from urllib.parse import parse_qs, urlsplit + +from app.modules.indexer.parser.nexus_php import NexusPhpSiteUserInfo + + +def _build_parser() -> NexusPhpSiteUserInfo: + """ + 构造 NexusPHP 解析器测试实例。 + """ + return NexusPhpSiteUserInfo( + site_name="NexusPHP", + url="https://example.com/", + site_cookie="", + apikey=None, + token=None, + ) + + +def test_nexus_php_seeding_next_page_stops_when_userid_missing(): + """ + userid 未识别且下一页也缺少 userid 时应停止翻页而不是抛出异常。 + """ + parser = _build_parser() + html_text = """ + + + + +
标题大小在做种
+ 下一页 + + + """ + + next_page = parser._parse_user_torrent_seeding_info(html_text, multi_page=True) + + assert next_page is None + + +def test_nexus_php_seeding_next_page_checks_userid_parameter_name(): + """ + 下一页链接缺少 userid 参数时,即使链接中包含用户 ID 字符串也应补齐 userid。 + """ + parser = _build_parser() + parser.userid = "12" + html_text = """ + + + + +
标题大小在做种
+ 下一页 + + + """ + + next_page = parser._parse_user_torrent_seeding_info(html_text, multi_page=True) + query_params = parse_qs(urlsplit(next_page).query) + + assert query_params["page"] == ["12"] + assert query_params["type"] == ["seeding"] + assert query_params["userid"] == ["12"]