feat: improve regex for title extraction

Enhanced regex pattern to more accurately capture titles in posts by adjusting the lookahead assertion to better handle various punctuation marks and URLs. This improves the robustness of title extraction, ensuring more consistent and correct results across different post formats.
This commit is contained in:
ccbikai
2024-08-08 12:14:29 +08:00
parent 0b0d489b24
commit 44a6f86830

View File

@@ -115,7 +115,7 @@ function getPost($, item, { channel, staticProxy, index = 0 }) {
const content = $(item).find('.js-message_reply_text')?.length > 0
? modifyHTMLContent($, $(item).find('.tgme_widget_message_text.js-message_text'), { index })
: modifyHTMLContent($, $(item).find('.tgme_widget_message_text'), { index })
const title = content?.text()?.match(/[^。\n]*(?=[。\n]|http)/g)?.[0] ?? content?.text() ?? ''
const title = content?.text()?.match(/^.*?(?=[。:]|http\S)/g)?.[0] ?? content?.text() ?? ''
const id = $(item).attr('data-post')?.replace(`${channel}/`, '')
const tags = $(content).find('a[href^="?q="]')?.each((_index, a) => {