mirror of
https://github.com/d0zingcat/NotionNext.git
synced 2026-05-13 23:16:47 +00:00
feat(getPageContentText): implement getFullTextContent for enhanced text extraction
- Add new getFullTextContent function to handle various Notion text formats - Support equation extraction from decorated text - Handle special characters like '⁍' and '‣' with proper content resolution - Process date mentions, link mentions, and other reference types - Replace getTextContent with getFullTextContent in getTextArray function - Maintain backward compatibility with existing text processing
This commit is contained in:
@@ -1,5 +1,3 @@
|
||||
import { getTextContent } from 'notion-utils'
|
||||
|
||||
/**
|
||||
* 获取属性值,优先从 overrides 中读取,否则按顺序从 properties 中读取,最后返回默认值
|
||||
* @param {Object} properties 原始属性对象
|
||||
@@ -15,6 +13,62 @@ function getPropertyValue(properties, keys, overrides = {}, defaultValue = '') {
|
||||
return defaultValue
|
||||
}
|
||||
|
||||
/**
|
||||
* 提取 Notion 装饰文本的纯文本内容。
|
||||
* 可选传入 resolveRef 来解析引用(例如 '‣' 指向的页面标题)
|
||||
*
|
||||
* @param {Array} text - Notion Decoration[] 格式的文本数组
|
||||
* @returns {string}
|
||||
*/
|
||||
function getFullTextContent(text) {
|
||||
if (!text) return ''
|
||||
|
||||
if (!Array.isArray(text)) return String(text)
|
||||
|
||||
return text.reduce((result, item) => {
|
||||
const value = item[0]
|
||||
const decorations = item[1]
|
||||
|
||||
if (value === '⁍') {
|
||||
// 检查是否有公式
|
||||
const equation = decorations?.find(d => d[0] === 'e')
|
||||
if (equation) {
|
||||
return result + equation[1] // 提取 LaTeX 内容
|
||||
}
|
||||
return result // 否则什么都不加
|
||||
}
|
||||
|
||||
if (value === '‣') {
|
||||
const ref = Array.isArray(decorations) ? decorations[0] : null
|
||||
const type = ref?.[0]
|
||||
const data = ref?.[1]
|
||||
|
||||
switch (type) {
|
||||
case 'd':
|
||||
// 日期字符串
|
||||
const date =
|
||||
data?.start_date ||
|
||||
data?.start_time ||
|
||||
data?.end_date ||
|
||||
data?.end_time ||
|
||||
'[Date]'
|
||||
return result + date
|
||||
case 'lm':
|
||||
// Link Mention
|
||||
const title = data?.title || data?.href || '[Link]'
|
||||
return result + title
|
||||
// 用户 ID,这里不展开,默认忽略或标记
|
||||
case 'u':
|
||||
default:
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
// 默认拼接普通文本
|
||||
return result + value
|
||||
}, '')
|
||||
}
|
||||
|
||||
export function getPageContentText(post, pageBlockMap) {
|
||||
/**
|
||||
* 将对象的指定字段拼接到字符串
|
||||
@@ -39,7 +93,7 @@ export function getPageContentText(post, pageBlockMap) {
|
||||
}
|
||||
|
||||
function getTextArray(textArray) {
|
||||
const text = textArray ? getTextContent(textArray) : ''
|
||||
const text = textArray ? getFullTextContent(textArray) : ''
|
||||
if (text && text !== 'Untitled') {
|
||||
return text
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user