From d9f41423bf2b7ddbf947d9ebad3a008acc40f90a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9F=90M=E6=B0=8F?= Date: Wed, 7 Aug 2024 11:20:25 +0000 Subject: [PATCH 1/9] chore: modify getPost function to handle different message text classes --- src/lib/telegram/index.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/lib/telegram/index.js b/src/lib/telegram/index.js index dddc5d7..1b28fe0 100644 --- a/src/lib/telegram/index.js +++ b/src/lib/telegram/index.js @@ -92,7 +92,9 @@ function modifyHTMLContent($, content, { index } = {}) { function getPost($, item, { channel, staticProxy, index = 0 }) { item = item ? $(item).find('.tgme_widget_message') : $('.tgme_widget_message') - const content = modifyHTMLContent($, $(item).find('.tgme_widget_message_text'), { index }) + const content = $(item).find('.js-message_reply_text').length > 0 + ? modifyHTMLContent($, $(item).find('.tgme_widget_message_text.js-message_text'), { index }) + : modifyHTMLContent($, $(item).find('.tgme_widget_message_text'), { index }) const title = content?.text()?.match(/[^。\n]*(?=[。\n]|http)/g)?.[0] ?? content?.text() ?? '' const id = $(item).attr('data-post')?.replace(`${channel}/`, '') From 4fa62bf68d6530dc3c3e7ca2a6f330ba9eb894e0 Mon Sep 17 00:00:00 2001 From: ccbikai Date: Wed, 7 Aug 2024 21:00:55 +0800 Subject: [PATCH 2/9] feat: add sanitize-html for content filtering Enhance RSS feed content safety by integrating sanitize-html to allow specific media tags and attributes, ensuring a secure and controlled presentation of content. --- package.json | 3 ++- pnpm-lock.yaml | 36 ++++++++++++++++++++++++++++++++++-- src/pages/rss.xml.js | 11 +++++++++-- 3 files changed, 45 insertions(+), 5 deletions(-) diff --git a/package.json b/package.json index 72b70c8..7dd14a2 100644 --- a/package.json +++ b/package.json @@ -22,7 +22,8 @@ "cheerio": "1.0.0-rc.12", "dayjs": "^1.11.12", "lru-cache": "^11.0.0", - "ofetch": "^1.3.4" + "ofetch": "^1.3.4", + "sanitize-html": "^2.13.0" }, "devDependencies": { "@antfu/eslint-config": "^2.24.1", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index e577ddd..fff0b33 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -32,6 +32,9 @@ importers: ofetch: specifier: ^1.3.4 version: 1.3.4 + sanitize-html: + specifier: ^2.13.0 + version: 2.13.0 devDependencies: '@antfu/eslint-config': specifier: ^2.24.1 @@ -2261,6 +2264,10 @@ packages: deep-is@0.1.4: resolution: {integrity: sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==} + deepmerge@4.3.1: + resolution: {integrity: sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==} + engines: {node: '>=0.10.0'} + defu@6.1.4: resolution: {integrity: sha512-mEQCMmwJu317oSz8CwdIOdwf3xMif1ttiM8LTufzc3g6kR+9Pe236twL8j3IYT1F7GfRgGcW6MWxzZjLIkuHIg==} @@ -2407,11 +2414,11 @@ packages: engines: {node: '>=0.8.0'} escape-string-regexp@4.0.0: - resolution: {integrity: sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==} + resolution: {integrity: sha1-FLqDpdNz49MR5a/KKc9b+tllvzQ=} engines: {node: '>=10'} escape-string-regexp@5.0.0: - resolution: {integrity: sha512-/veY75JbMK4j1yjvuUxuVsiS/hr/4iHs9FTT6cgTexxdE0Ly/glccBAkloH/DofkjRbZU3bnoj38mOmhkZ0lHw==} + resolution: {integrity: sha1-RoMSa1ALYXYvLb66zhgG6L4xscg=} engines: {node: '>=12'} eslint-compat-utils@0.5.1: @@ -3020,6 +3027,10 @@ packages: resolution: {integrity: sha512-+Pgi+vMuUNkJyExiMBt5IlFoMyKnr5zhJ4Uspz58WOhBF5QoIZkFyNHIbBAtHwzVAgk5RtndVNsDRN61/mmDqg==} engines: {node: '>=12'} + is-plain-object@5.0.0: + resolution: {integrity: sha1-RCf1CrNCnpAl6n1S6QQ6nvQVk0Q=} + engines: {node: '>=0.10.0'} + is-stream@3.0.0: resolution: {integrity: sha512-LnQR4bZ9IADDRSkvpqMGvt/tEJWclzklNgSw48V5EAaAeDd6qGvN8ei6k5p0tvxSR171VmGyHuTiAOfxAbr8kA==} engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0} @@ -3592,6 +3603,9 @@ packages: parse-latin@7.0.0: resolution: {integrity: sha512-mhHgobPPua5kZ98EF4HWiH167JWBfl4pvAIXXdbaVohtK7a6YBOy56kvhCqduqyo/f3yrHFWmqmiMg/BkBkYYQ==} + parse-srcset@1.0.2: + resolution: {integrity: sha1-8r0iH2zJcKk42IVWq8WJyqqiveE=} + parse5-htmlparser2-tree-adapter@7.0.0: resolution: {integrity: sha512-B77tOZrqqfUfnVcOrUvfdLbz4pu4RopLD/4vmu3HUPswwTA8OH0EMW9BlWR2B0RCoiZRAHEUu7IxeP1Pd1UU+g==} @@ -4069,6 +4083,9 @@ packages: safe-buffer@5.2.1: resolution: {integrity: sha1-Hq+fqb2x/dTsdfWPnNtOa3gn7sY=} + sanitize-html@2.13.0: + resolution: {integrity: sha512-Xff91Z+4Mz5QiNSLdLWwjgBDm5b1RU6xBT0+12rapjiaR7SwfRdjw8f+6Rir2MXKLrDicRFHdb51hGOAxmsUIA==} + sass-formatter@0.7.9: resolution: {integrity: sha512-CWZ8XiSim+fJVG0cFLStwDvft1VI7uvXdCNJYXhDvowiv+DsbD1nXLiQ4zrE5UBvj5DWZJ93cwN0NX5PMsr1Pw==} @@ -7019,6 +7036,8 @@ snapshots: deep-is@0.1.4: {} + deepmerge@4.3.1: {} + defu@6.1.4: {} delegates@1.0.0: {} @@ -7978,6 +7997,8 @@ snapshots: is-plain-obj@4.1.0: {} + is-plain-object@5.0.0: {} + is-stream@3.0.0: {} is-unicode-supported@1.3.0: {} @@ -8740,6 +8761,8 @@ snapshots: unist-util-visit-children: 3.0.0 vfile: 6.0.2 + parse-srcset@1.0.2: {} + parse5-htmlparser2-tree-adapter@7.0.0: dependencies: domhandler: 5.0.3 @@ -9243,6 +9266,15 @@ snapshots: safe-buffer@5.2.1: {} + sanitize-html@2.13.0: + dependencies: + deepmerge: 4.3.1 + escape-string-regexp: 4.0.0 + htmlparser2: 8.0.2 + is-plain-object: 5.0.0 + parse-srcset: 1.0.2 + postcss: 8.4.40 + sass-formatter@0.7.9: dependencies: suf-log: 2.5.3 diff --git a/src/pages/rss.xml.js b/src/pages/rss.xml.js index 9adc848..99b3826 100644 --- a/src/pages/rss.xml.js +++ b/src/pages/rss.xml.js @@ -1,5 +1,5 @@ import rss from '@astrojs/rss' - +import sanitizeHtml from 'sanitize-html' import { getChannelInfo } from '../lib/telegram' export const prerender = false @@ -22,7 +22,14 @@ export async function GET(Astro) { title: item.title, description: item.description, pubDate: new Date(item.datetime), - content: item.content, + content: sanitizeHtml(item.content, { + allowedTags: sanitizeHtml.defaults.allowedTags.concat(['img', 'video', 'audio']), + allowedAttributes: { + video: ['src', 'width', 'height', 'poster'], + audio: ['src', 'controls'], + img: ['src', 'width', 'height', 'loading'], + }, + }), })), }) } From 8db461e881a50ad1836b8cf1cd6781b5f9435d32 Mon Sep 17 00:00:00 2001 From: ccbikai Date: Wed, 7 Aug 2024 21:46:08 +0800 Subject: [PATCH 3/9] feat: add audio handling in posts Enhances post rendering by integrating audio element processing, ensuring audio files are correctly proxied and controls are enabled for user interaction. This improvement expands multimedia support within posts, enhancing user experience and engagement. --- src/lib/telegram/index.js | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/lib/telegram/index.js b/src/lib/telegram/index.js index 53ca3e9..bfe3de8 100644 --- a/src/lib/telegram/index.js +++ b/src/lib/telegram/index.js @@ -62,6 +62,13 @@ function getVideo($, item, { staticProxy, index }) { return $.html(video) + $.html(roundVideo) } +function getAudio($, item, { staticProxy }) { + const audio = $(item).find('.tgme_widget_message_voice') + audio?.attr('src', staticProxy + audio?.attr('src')) + ?.attr('controls', true) + return $.html(audio) +} + function getLinkPreview($, item, { staticProxy, index }) { const link = $(item).find('.tgme_widget_message_link_preview') const title = $(item).find('.link_preview_title')?.text() || $(item).find('.link_preview_site_name')?.text() @@ -111,13 +118,13 @@ function getPost($, item, { channel, staticProxy, index = 0 }) { $.html($(item).find('.tgme_widget_message_reply')?.wrapInner('')?.wrapInner('
')), getImages($, item, { staticProxy, id, index, title }), getVideo($, item, { staticProxy, id, index, title }), + getAudio($, item, { staticProxy, id, index, title }), content?.html(), getImageStickers($, item, { staticProxy, index }), getVideoStickers($, item, { staticProxy, index }), // $(item).find('.tgme_widget_message_sticker_wrap')?.html(), $(item).find('.tgme_widget_message_poll')?.html(), $.html($(item).find('.tgme_widget_message_document_wrap')), - $.html($(item).find('.tgme_widget_message_voice')?.attr('controls', true)), $.html($(item).find('.tgme_widget_message_location_wrap')), getLinkPreview($, item, { staticProxy, index }), ].filter(Boolean).join('').replace(/(url\(["'])((https?:)?\/\/)/g, (match, p1, p2, _p3) => { From 2a4a3be3ccc926227b646a6583a6e8ecafd09fdc Mon Sep 17 00:00:00 2001 From: ccbikai Date: Wed, 7 Aug 2024 21:49:38 +0800 Subject: [PATCH 4/9] feat: enhance null safety in content retrieval Improved content retrieval logic to handle potential null values, enhancing robustness and reliability of the Telegram message parsing functionality. --- src/lib/telegram/index.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/telegram/index.js b/src/lib/telegram/index.js index b1d4969..5b4a04e 100644 --- a/src/lib/telegram/index.js +++ b/src/lib/telegram/index.js @@ -99,7 +99,7 @@ function modifyHTMLContent($, content, { index } = {}) { function getPost($, item, { channel, staticProxy, index = 0 }) { item = item ? $(item).find('.tgme_widget_message') : $('.tgme_widget_message') - const content = $(item).find('.js-message_reply_text').length > 0 + const content = $(item).find('.js-message_reply_text')?.length > 0 ? modifyHTMLContent($, $(item).find('.tgme_widget_message_text.js-message_text'), { index }) : modifyHTMLContent($, $(item).find('.tgme_widget_message_text'), { index }) const title = content?.text()?.match(/[^。\n]*(?=[。\n]|http)/g)?.[0] ?? content?.text() ?? '' From e7cd5d61ca70eee0ef6e5b3d114b9b6649c0a1e3 Mon Sep 17 00:00:00 2001 From: ccbikai Date: Wed, 7 Aug 2024 21:56:03 +0800 Subject: [PATCH 5/9] feat: enhance reply handling in posts Refactor to improve handling of reply links within posts by introducing a dedicated function, ensuring more accurate and consistent URL modifications. This enhancement simplifies future maintenance and scalability of the codebase. --- src/lib/telegram/index.js | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/lib/telegram/index.js b/src/lib/telegram/index.js index 5b4a04e..5d1b45f 100644 --- a/src/lib/telegram/index.js +++ b/src/lib/telegram/index.js @@ -83,6 +83,19 @@ function getLinkPreview($, item, { staticProxy, index }) { return $.html(link) } +function getReply($, item, { channel }) { + const reply = $(item).find('.tgme_widget_message_reply') + reply?.wrapInner('')?.wrapInner('
') + + const href = reply?.attr('href') + if (href) { + const url = new URL(href) + reply?.attr('href', `${url.pathname}`.replace(channel, 'posts')) + } + + return $.html(reply) +} + function modifyHTMLContent($, content, { index } = {}) { $(content).find('.emoji')?.attr('style', '') $(content).find('a')?.each((_index, a) => { @@ -117,7 +130,7 @@ function getPost($, item, { channel, staticProxy, index = 0 }) { tags, text: content?.text(), content: [ - $.html($(item).find('.tgme_widget_message_reply')?.wrapInner('')?.wrapInner('
')), + getReply($, item, { channel }), getImages($, item, { staticProxy, id, index, title }), getVideo($, item, { staticProxy, id, index, title }), getAudio($, item, { staticProxy, id, index, title }), From 44a6f86830ffd62d742964a13d3fc89edddf256e Mon Sep 17 00:00:00 2001 From: ccbikai Date: Thu, 8 Aug 2024 12:14:29 +0800 Subject: [PATCH 6/9] feat: improve regex for title extraction Enhanced regex pattern to more accurately capture titles in posts by adjusting the lookahead assertion to better handle various punctuation marks and URLs. This improves the robustness of title extraction, ensuring more consistent and correct results across different post formats. --- src/lib/telegram/index.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/telegram/index.js b/src/lib/telegram/index.js index 5d1b45f..bd13182 100644 --- a/src/lib/telegram/index.js +++ b/src/lib/telegram/index.js @@ -115,7 +115,7 @@ function getPost($, item, { channel, staticProxy, index = 0 }) { const content = $(item).find('.js-message_reply_text')?.length > 0 ? modifyHTMLContent($, $(item).find('.tgme_widget_message_text.js-message_text'), { index }) : modifyHTMLContent($, $(item).find('.tgme_widget_message_text'), { index }) - const title = content?.text()?.match(/[^。\n]*(?=[。\n]|http)/g)?.[0] ?? content?.text() ?? '' + const title = content?.text()?.match(/^.*?(?=[。::]|http\S)/g)?.[0] ?? content?.text() ?? '' const id = $(item).attr('data-post')?.replace(`${channel}/`, '') const tags = $(content).find('a[href^="?q="]')?.each((_index, a) => { From 39e2ccbd8d219b08e46a32351c111fb1878aee1f Mon Sep 17 00:00:00 2001 From: ccbikai Date: Thu, 8 Aug 2024 12:23:41 +0800 Subject: [PATCH 7/9] feat: clarify Telegram channel setup in docs Improved documentation clarity on configuring Telegram channel usernames, added troubleshooting section for common deployment issues, and emphasized the necessity of public channels and correct username format to ensure successful deployment. --- README.md | 11 ++++++++++- README.zh-cn.md | 11 ++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6587ec6..e371160 100644 --- a/README.md +++ b/README.md @@ -61,7 +61,7 @@ For detailed tutorials, see [Deploy your Astro site](https://docs.astro.build/en ## ⚒️ Configuration ```env -## Telegram channel name, required +## Telegram Channel Username, must be configured. The string of characters following t.me/ CHANNEL=miantiao_me ## Language and timezone settings, language options see [dayjs](https://github.com/iamkun/dayjs/tree/dev/src/locale) @@ -97,6 +97,15 @@ HOST=telegram.dog STATIC_PROXY= ``` +## Frequently Asked Questions + +1. Why is the content empty after deployment? + - Check if the channel is public, it must be public + - The channel username is a string, not a number + - Turn off the "Restricting Saving Content" setting in the channel + - Redeploy after modifying environment variables + - Telegram blocks public display of some sensitive channels, you can verify by visiting `https://t.me/s/channelusername`. + ## ☕ Sponsor 1. [Follow me on Telegram](https://t.me/miantiao_me) diff --git a/README.zh-cn.md b/README.zh-cn.md index 510916e..ea06fab 100644 --- a/README.zh-cn.md +++ b/README.zh-cn.md @@ -61,7 +61,7 @@ ## ⚒️ 配置 ```env -## Telegram 频道名称,必须配置 +## Telegram 频道用户名,必须配置。 t.me/ 后面那串字符 CHANNEL=miantiao_me ## 语言和时区设置,语言选项见[dayjs](https://github.com/iamkun/dayjs/tree/dev/src/locale) @@ -95,6 +95,15 @@ HOST=telegram.dog STATIC_PROXY= ``` +## 常问问题 + +1. 为什么部署后内容为空? + - 检查频道是否是公开的,必须是公开的 + - 频道用户名是字符串,不是数字 + - 关闭频道 Restricting Saving Content 设置项 + - 修改完环境变量后需要重新部署 + - Telegram 会屏蔽一些敏感频道的公开展示, 可以通过访问 `https://t.me/s/频道用户名` 确认 + ## ☕ 赞助 1. [在 Telegram 关注我](https://t.me/miantiao_me) From cbfd74b516b9609c553ad56b47c5fb7f7179c5ed Mon Sep 17 00:00:00 2001 From: ccbikai Date: Thu, 8 Aug 2024 12:25:14 +0800 Subject: [PATCH 8/9] Update FAQ section headers for clarity and consistency Refactor FAQ headers to use emojis for improved readability and visual appeal across multiple language README files. --- README.md | 2 +- README.zh-cn.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e371160..4b49a3a 100644 --- a/README.md +++ b/README.md @@ -97,7 +97,7 @@ HOST=telegram.dog STATIC_PROXY= ``` -## Frequently Asked Questions +## 🙋🏻 FAQs 1. Why is the content empty after deployment? - Check if the channel is public, it must be public diff --git a/README.zh-cn.md b/README.zh-cn.md index ea06fab..dc840ef 100644 --- a/README.zh-cn.md +++ b/README.zh-cn.md @@ -95,7 +95,7 @@ HOST=telegram.dog STATIC_PROXY= ``` -## 常问问题 +## 🙋🏻 常问问题 1. 为什么部署后内容为空? - 检查频道是否是公开的,必须是公开的 From fc1dd2b4f559df63617e5a60e4adf953c5b795b1 Mon Sep 17 00:00:00 2001 From: ccbikai Date: Thu, 8 Aug 2024 20:59:52 +0800 Subject: [PATCH 9/9] feat: specify SEO and RSS paths in docs Clarify SEO and RSS endpoints for better user understanding. --- README.md | 4 ++-- README.zh-cn.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 4b49a3a..ebc2bb9 100644 --- a/README.md +++ b/README.md @@ -9,9 +9,9 @@ English | [简体中文](./README.zh-cn.md) ## ✨ Features - **Turn your Telegram Channel into a MicroBlog** -- **SEO friendly** +- **SEO friendly** `/sitemap.xml` - **0 JS on the browser side** -- **RSS and RSS JSON** +- **RSS and RSS JSON** `/rss.xml` `/rss.json` ## 🪧 Demo diff --git a/README.zh-cn.md b/README.zh-cn.md index dc840ef..ec3cdc9 100644 --- a/README.zh-cn.md +++ b/README.zh-cn.md @@ -9,9 +9,9 @@ ## ✨ 特性 - **将 Telegram Channel 转为微博客** -- **SEO 友好** +- **SEO 友好** `/sitemap.xml` - **浏览器端 0 JS** -- **提供 RSS 和 RSS JSON** +- **提供 RSS 和 RSS JSON** `/rss.xml` `/rss.json` ## 🪧 演示