fix: parse Audiences unread messages

This commit is contained in:
jxxghp
2026-05-16 16:41:11 +08:00
parent 3c055e2482
commit 9069dccb2a
2 changed files with 156 additions and 0 deletions

View File

@@ -13,6 +13,35 @@ from app.utils.string import StringUtils
class NexusAudiencesSiteUserInfo(NexusPhpSiteUserInfo):
schema = SiteSchema.NexusAudiences
def _parse_message_unread(self, html_text):
"""
解析 Audiences 新版顶部用户栏中的未读消息数。
"""
super()._parse_message_unread(html_text)
if self.message_unread:
return
html = etree.HTML(html_text)
try:
if not StringUtils.is_valid_html_element(html):
return
message_tools = html.xpath(
'//a[contains(@class, "site-userbar__compact-tool") and contains(@href, "messages.php") '
'and (contains(@class, "site-userbar__compact-tool--has-unread") '
'or .//*[contains(@class, "site-userbar__compact-tool-badge--unread")])]'
'|//a[contains(@href, "messages.php") '
'and (contains(@title, "收件箱") or contains(@aria-label, "收件箱"))]'
)
for message_link in message_tools:
unread = self.__parse_inbox_unread(message_link)
if unread is not None:
self.message_unread = unread
return
finally:
if html is not None:
del html
def _parse_user_traffic_info(self, html_text):
"""
解析用户流量信息
@@ -128,6 +157,47 @@ class NexusAudiencesSiteUserInfo(NexusPhpSiteUserInfo):
self.seeding = StringUtils.str_int(active_match.group(1))
self.leeching = StringUtils.str_int(active_match.group(2))
def __parse_inbox_unread(self, message_link):
"""
从 Audiences 收件箱入口提取未读数。
"""
inbox_texts = [
message_link.get("title"),
message_link.get("aria-label"),
*message_link.xpath(
'.//*[contains(@class, "site-userbar__compact-tool-badge--unread") '
'or contains(@class, "site-userbar__compact-tool-badge")]/text()'
)
]
for inbox_text in inbox_texts:
unread = self.__extract_inbox_unread(inbox_text)
if unread is not None:
return unread
return None
@staticmethod
def __extract_inbox_unread(text: str):
"""
Audiences 收件箱角标格式为 总数/未读数,例如 1749/172。
"""
if not text:
return None
text = re.sub(r"\s+", " ", text.replace("\xa0", " ")).strip()
if not text:
return None
inbox_count = re.search(r"(?:收件箱\s*)?(\d[\d,]*)\s*/\s*(\d[\d,]*)", text)
if inbox_count:
return StringUtils.str_int(inbox_count.group(2))
single_count = re.search(r"收件箱\s*(\d[\d,]*)", text)
if single_count:
return StringUtils.str_int(single_count.group(1))
return None
def _parse_seeding_pages(self):
if not self._torrent_seeding_page:
return

View File

@@ -44,3 +44,89 @@ def test_audiences_userbar_metrics_override_generic_nexus_regex():
assert parser.bonus == 1973896.2
assert parser.seeding == 355
assert parser.leeching == 7
def test_audiences_inbox_total_unread_badge_uses_unread_part():
parser = NexusAudiencesSiteUserInfo(
site_name="Audiences",
url="https://audiences.me/",
site_cookie="",
apikey=None,
token=None,
)
html_text = """
<html>
<body>
<div class="site-userbar__compact-actions">
<a class="site-userbar__compact-tool site-userbar__compact-tool--has-unread"
href="messages.php"
title="收件箱 1749/172"
aria-label="收件箱 1749/172">
<i class="fas fa-inbox" aria-hidden="true"></i>
<strong>收件箱</strong>
<span class="site-userbar__compact-tool-badge site-userbar__compact-tool-badge--unread">1749/172</span>
</a>
<a class="site-userbar__compact-tool"
href="messages.php?action=viewmailbox&amp;box=-1"
title="发件箱 0"
aria-label="发件箱 0">
<strong>发件箱</strong>
<span class="site-userbar__compact-tool-badge">0</span>
</a>
</div>
</body>
</html>
"""
parser._parse_message_unread(html_text)
assert parser.message_unread == 172
def test_audiences_table_unread_links_ignore_content_rows():
parser = NexusAudiencesSiteUserInfo(
site_name="Audiences",
url="https://audiences.me/",
site_cookie="",
apikey=None,
token=None,
)
html_text = """
<html>
<body>
<table>
<tr>
<td class="rowfollow" align="center">
<img class="unreadpm" src="pic/trans.gif" alt="Unread" title="未读">
</td>
<td class="rowfollow" align="left">
<a href="messages.php?action=viewmessage&amp;id=4318225">种子被删除</a>
</td>
<td class="rowfollow" align="left">系统</td>
<td class="rowfollow" nowrap=""><span title="2026-05-07 23:01:58">8天17时前</span></td>
<td class="rowfollow"><input class="checkbox" type="checkbox" name="messages[]" value="4318225"></td>
</tr>
<tr>
<td colspan="5" style="padding: 8px;">消息摘要内容</td>
</tr>
<tr>
<td class="rowfollow" align="center">
<img class="readpm" src="pic/trans.gif" alt="Read" title="已读">
</td>
<td class="rowfollow" align="left">
<a href="messages.php?action=viewmessage&amp;id=4318000">已读消息</a>
</td>
<td class="rowfollow" align="left">系统</td>
<td class="rowfollow" nowrap=""><span title="2026-05-07 23:01:58">8天17时前</span></td>
<td class="rowfollow"><input class="checkbox" type="checkbox" name="messages[]" value="4318000"></td>
</tr>
</table>
</body>
</html>
"""
msg_links = []
next_page = parser._parse_message_unread_links(html_text, msg_links)
assert msg_links == ["messages.php?action=viewmessage&id=4318225"]
assert next_page is None