From 44a6f86830ffd62d742964a13d3fc89edddf256e Mon Sep 17 00:00:00 2001 From: ccbikai Date: Thu, 8 Aug 2024 12:14:29 +0800 Subject: [PATCH] feat: improve regex for title extraction Enhanced regex pattern to more accurately capture titles in posts by adjusting the lookahead assertion to better handle various punctuation marks and URLs. This improves the robustness of title extraction, ensuring more consistent and correct results across different post formats. --- src/lib/telegram/index.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/telegram/index.js b/src/lib/telegram/index.js index 5d1b45f..bd13182 100644 --- a/src/lib/telegram/index.js +++ b/src/lib/telegram/index.js @@ -115,7 +115,7 @@ function getPost($, item, { channel, staticProxy, index = 0 }) { const content = $(item).find('.js-message_reply_text')?.length > 0 ? modifyHTMLContent($, $(item).find('.tgme_widget_message_text.js-message_text'), { index }) : modifyHTMLContent($, $(item).find('.tgme_widget_message_text'), { index }) - const title = content?.text()?.match(/[^。\n]*(?=[。\n]|http)/g)?.[0] ?? content?.text() ?? '' + const title = content?.text()?.match(/^.*?(?=[。::]|http\S)/g)?.[0] ?? content?.text() ?? '' const id = $(item).attr('data-post')?.replace(`${channel}/`, '') const tags = $(content).find('a[href^="?q="]')?.each((_index, a) => {