From 722aa2daface07931a47d897047ad543d447c31d Mon Sep 17 00:00:00 2001
From: anime <street-anime-olive@duck.com>
Date: Tue, 8 Jul 2025 00:21:29 +0800
Subject: [PATCH 01/14] feat(getPageContentText): refactor content text
 extraction logic

- add special case handling for various block types
- maintain same functionality while improving structure

Close #2151
---
 pages/search/[keyword]/index.js | 160 +++++++++++++++++++++++---------
 1 file changed, 114 insertions(+), 46 deletions(-)

diff --git a/pages/search/[keyword]/index.js b/pages/search/[keyword]/index.js
index 4116e689..f0e63668 100644
--- a/pages/search/[keyword]/index.js
+++ b/pages/search/[keyword]/index.js
@@ -3,6 +3,7 @@ import { getDataFromCache } from '@/lib/cache/cache_manager'
 import { siteConfig } from '@/lib/config'
 import { getGlobalData } from '@/lib/db/getSiteData'
 import { DynamicLayout } from '@/themes/theme'
+import { checkStrIsUuid } from '@/lib/utils'
 
 const Index = props => {
   const theme = siteConfig('THEME', BLOG.THEME, props.NOTION_CONFIG)
@@ -58,42 +59,6 @@ export function getStaticPaths() {
   }
 }
 
-/**
- * 将对象的指定字段拼接到字符串
- * @param sourceTextArray
- * @param targetObj
- * @param key
- * @returns {*}
- */
-function appendText(sourceTextArray, targetObj, key) {
-  if (!targetObj) {
-    return sourceTextArray
-  }
-  const textArray = targetObj[key]
-  const text = textArray ? getTextContent(textArray) : ''
-  if (text && text !== 'Untitled') {
-    return sourceTextArray.concat(text)
-  }
-  return sourceTextArray
-}
-
-/**
- * 递归获取层层嵌套的数组
- * @param {*} textArray
- * @returns
- */
-function getTextContent(textArray) {
-  if (typeof textArray === 'object' && isIterable(textArray)) {
-    let result = ''
-    for (const textObj of textArray) {
-      result = result + getTextContent(textObj)
-    }
-    return result
-  } else if (typeof textArray === 'string') {
-    return textArray
-  }
-}
-
 /**
  * 对象是否可以遍历
  * @param {*} obj
@@ -124,12 +89,12 @@ async function filterByMemCache(allPosts, keyword) {
         : ''
     const articleInfo = post.title + post.summary + tagContent + categoryContent
     let hit = articleInfo.toLowerCase().indexOf(keyword) > -1
-    const indexContent = getPageContentText(post, page)
+    const contentTextList = getPageContentText(post, page)
     // console.log('全文搜索缓存', cacheKey, page != null)
     post.results = []
     let hitCount = 0
-    for (const i of indexContent) {
-      const c = indexContent[i]
+    for (const i of contentTextList) {
+      const c = contentTextList[i]
       if (!c) {
         continue
       }
@@ -152,17 +117,120 @@ async function filterByMemCache(allPosts, keyword) {
 }
 
 export function getPageContentText(post, pageBlockMap) {
-  let indexContent = []
+  /**
+   * 将对象的指定字段拼接到字符串
+   * @param sourceTextArray
+   * @param targetObj
+   * @param key
+   * @returns string
+   */
+  function getText(targetObj) {
+    if (!targetObj) {
+      return ''
+    }
+    const textArray = targetObj['title'] || targetObj['caption']
+    return getTextArray(textArray)
+  }
+
+  function getTextArray(textArray) {
+    const text = textArray ? getTextContent(textArray) : ''
+    if (text && text !== 'Untitled') {
+      return text
+    }
+    return ''
+  }
+
+  const removeTypeFlag = ['a', 'p', '‣']
+
+  /**
+   * 递归获取层层嵌套的数组
+   * @param {*} textArray
+   * @returns string
+   */
+  function getTextContent(textArray) {
+    if (typeof textArray === 'object' && isIterable(textArray)) {
+      let result = ''
+      for (const textObj of textArray) {
+        if (textArray.length > 1 && removeTypeFlag.includes(textArray[0])) {
+          return result
+        }
+        result = result + getTextContent(textObj)
+      }
+      return result
+    } else if (typeof textArray === 'string') {
+      if (checkStrIsUuid(textArray) && pageBlockMap.block[textArray]) {
+        return getBlockContentText(textArray)
+      } else if (textArray === pageBlockMap.block[postId].value.space_id) {
+        return ''
+      }
+      return textArray
+    }
+  }
+
+  function getTransclusionReference(block) {
+    const result = []
+    const blockPointer = block.format.transclusion_reference_pointer
+    const blockPointerId = blockPointer.id
+    if (blockPointer) {
+      const blockContentList = pageBlockMap.block[blockPointerId].value.content
+      for (const blockContent of blockContentList) {
+        result.push(getBlockContentText(blockContent))
+      }
+    }
+    return result.join('')
+  }
+
+  function getBlockContentText(id) {
+    const block = pageBlockMap?.block[id].value
+    const blockType = block.type
+    switch (blockType) {
+      case 'transclusion_reference':
+        return getTransclusionReference(block)
+      case 'table':
+        return getTableText(block.content)
+      case 'page':
+        if (id !== postId) {
+          return getText(block.properties)
+        }
+        return ''
+      case 'breadcrumb':
+      case 'divider':
+        return ''
+      case 'quote':
+      default:
+        const properties = block?.properties
+        return getText(properties)
+    }
+  }
+
+  function getTableText(tableRowIds) {
+    const result = []
+    for (const blockRowId of tableRowIds) {
+      if (pageBlockMap.block[blockRowId]) {
+        const blockRow = pageBlockMap.block[blockRowId].value
+        const blockRowProperties = blockRow.properties
+        for (const blockRowPropertyValue of Object.values(blockRowProperties)) {
+          result.push(getTextArray(blockRowPropertyValue))
+        }
+      }
+    }
+    return result.join('')
+  }
+
+  const postId = post.id
+  let contentTextList = []
   // 防止搜到加密文章的内容
   if (pageBlockMap && pageBlockMap.block && !post.password) {
     const contentIds = Object.keys(pageBlockMap.block)
-    contentIds.forEach(id => {
-      const properties = pageBlockMap?.block[id]?.value?.properties
-      indexContent = appendText(indexContent, properties, 'title')
-      indexContent = appendText(indexContent, properties, 'caption')
-    })
+    for (const id of contentIds) {
+      const blockContentText = getBlockContentText(id)
+      if (blockContentText) {
+        contentTextList.push(blockContentText)
+      }
+    }
   }
-  return indexContent.join('')
+  console.log(contentTextList.join(''))
+  return contentTextList.join('')
 }
 
 export default Index

From b4ba7d8f23a41294421c307e4633755ac421c907 Mon Sep 17 00:00:00 2001
From: anime <street-anime-olive@duck.com>
Date: Tue, 8 Jul 2025 00:28:58 +0800
Subject: [PATCH 02/14] refactor(getPageContentText): Relocate
 getPageContentText to a dedicated file and eliminate the redundant isIterable
 function.

---
 lib/notion/getPageContentText.js | 118 ++++++++++++++++++++++++++++
 lib/plugins/algolia.js           |   2 +-
 lib/utils/post.js                |   2 +-
 pages/search/[keyword]/index.js  | 127 +------------------------------
 4 files changed, 121 insertions(+), 128 deletions(-)
 create mode 100644 lib/notion/getPageContentText.js

diff --git a/lib/notion/getPageContentText.js b/lib/notion/getPageContentText.js
new file mode 100644
index 00000000..e636fab4
--- /dev/null
+++ b/lib/notion/getPageContentText.js
@@ -0,0 +1,118 @@
+import { checkStrIsUuid, isIterable } from '@/lib/utils'
+
+export function getPageContentText(post, pageBlockMap) {
+  /**
+   * 将对象的指定字段拼接到字符串
+   * @param sourceTextArray
+   * @param targetObj
+   * @param key
+   * @returns string
+   */
+  function getText(targetObj) {
+    if (!targetObj) {
+      return ''
+    }
+    const textArray = targetObj['title'] || targetObj['caption']
+    return getTextArray(textArray)
+  }
+
+  function getTextArray(textArray) {
+    const text = textArray ? getTextContent(textArray) : ''
+    if (text && text !== 'Untitled') {
+      return text
+    }
+    return ''
+  }
+
+  const removeTypeFlag = ['a', 'p', '‣']
+
+  /**
+   * 递归获取层层嵌套的数组
+   * @param {*} textArray
+   * @returns string
+   */
+  function getTextContent(textArray) {
+    if (typeof textArray === 'object' && isIterable(textArray)) {
+      let result = ''
+      for (const textObj of textArray) {
+        if (textArray.length > 1 && removeTypeFlag.includes(textArray[0])) {
+          return result
+        }
+        result = result + getTextContent(textObj)
+      }
+      return result
+    } else if (typeof textArray === 'string') {
+      if (checkStrIsUuid(textArray) && pageBlockMap.block[textArray]) {
+        return getBlockContentText(textArray)
+      } else if (textArray === pageBlockMap.block[postId].value.space_id) {
+        return ''
+      }
+      return textArray
+    }
+  }
+
+  function getTransclusionReference(block) {
+    const result = []
+    const blockPointer = block.format.transclusion_reference_pointer
+    const blockPointerId = blockPointer.id
+    if (blockPointer) {
+      const blockContentList = pageBlockMap.block[blockPointerId].value.content
+      for (const blockContent of blockContentList) {
+        result.push(getBlockContentText(blockContent))
+      }
+    }
+    return result.join('')
+  }
+
+  function getBlockContentText(id) {
+    const block = pageBlockMap?.block[id].value
+    const blockType = block.type
+    switch (blockType) {
+      case 'transclusion_reference':
+        return getTransclusionReference(block)
+      case 'table':
+        return getTableText(block.content)
+      case 'page':
+        if (id !== postId) {
+          return getText(block.properties)
+        }
+        return ''
+      case 'breadcrumb':
+      case 'divider':
+        return ''
+      case 'quote':
+      default:
+        const properties = block?.properties
+        return getText(properties)
+    }
+  }
+
+  function getTableText(tableRowIds) {
+    const result = []
+    for (const blockRowId of tableRowIds) {
+      if (pageBlockMap.block[blockRowId]) {
+        const blockRow = pageBlockMap.block[blockRowId].value
+        const blockRowProperties = blockRow.properties
+        for (const blockRowPropertyValue of Object.values(blockRowProperties)) {
+          result.push(getTextArray(blockRowPropertyValue))
+        }
+      }
+    }
+    return result.join('')
+  }
+
+  const postId = post.id
+  let contentTextList = []
+  // 防止搜到加密文章的内容
+  if (pageBlockMap && pageBlockMap.block && !post.password) {
+    const contentIds = Object.keys(pageBlockMap.block)
+    for (const id of contentIds) {
+      const blockContentText = getBlockContentText(id)
+      if (blockContentText) {
+        contentTextList.push(blockContentText)
+      }
+    }
+  }
+  console.log(contentTextList.join(''))
+  return contentTextList.join('')
+}
diff --git a/lib/plugins/algolia.js b/lib/plugins/algolia.js
index e6c76422..fceb7acc 100644
--- a/lib/plugins/algolia.js
+++ b/lib/plugins/algolia.js
@@ -1,6 +1,6 @@
 import BLOG from '@/blog.config'
-import { getPageContentText } from '@/pages/search/[keyword]'
 import algoliasearch from 'algoliasearch'
+import { getPageContentText } from '@/lib/notion/getPageContentText'
 
 /**
  * 生成全文索引
diff --git a/lib/utils/post.js b/lib/utils/post.js
index a3ee0f91..e4a22d2e 100644
--- a/lib/utils/post.js
+++ b/lib/utils/post.js
@@ -6,11 +6,11 @@ import { getPostBlocks } from '@/lib/db/getSiteData'
 import { getPageTableOfContents } from '@/lib/notion/getPageTableOfContents'
 import { siteConfig } from '@/lib/config'
 import { getDataFromCache, setDataToCache } from '@/lib/cache/cache_manager'
-import { getPageContentText } from '@/pages/search/[keyword]'
 import { getAiSummary } from '@/lib/plugins/aiSummary'
 import BLOG from '@/blog.config'
 import { uploadDataToAlgolia } from '@/lib/plugins/algolia'
 import { countWords } from '@/lib/plugins/wordCount'
+import { getPageContentText } from '@/lib/notion/getPageContentText'
 
 /**
  * 获取文章的关联推荐文章列表，目前根据标签关联性筛选
diff --git a/pages/search/[keyword]/index.js b/pages/search/[keyword]/index.js
index f0e63668..cb486895 100644
--- a/pages/search/[keyword]/index.js
+++ b/pages/search/[keyword]/index.js
@@ -3,7 +3,7 @@ import { getDataFromCache } from '@/lib/cache/cache_manager'
 import { siteConfig } from '@/lib/config'
 import { getGlobalData } from '@/lib/db/getSiteData'
 import { DynamicLayout } from '@/themes/theme'
-import { checkStrIsUuid } from '@/lib/utils'
+import { getPageContentText } from '@/lib/notion/getPageContentText'
 
 const Index = props => {
   const theme = siteConfig('THEME', BLOG.THEME, props.NOTION_CONFIG)
@@ -59,14 +59,6 @@ export function getStaticPaths() {
   }
 }
 
-/**
- * 对象是否可以遍历
- * @param {*} obj
- * @returns
- */
-const isIterable = obj =>
-  obj != null && typeof obj[Symbol.iterator] === 'function'
-
 /**
  * 在内存缓存中进行全文索引
  * @param {*} allPosts
@@ -116,121 +108,4 @@ async function filterByMemCache(allPosts, keyword) {
   return filterPosts
 }
 
-export function getPageContentText(post, pageBlockMap) {
-  /**
-   * 将对象的指定字段拼接到字符串
-   * @param sourceTextArray
-   * @param targetObj
-   * @param key
-   * @returns string
-   */
-  function getText(targetObj) {
-    if (!targetObj) {
-      return ''
-    }
-    const textArray = targetObj['title'] || targetObj['caption']
-    return getTextArray(textArray)
-  }
-
-  function getTextArray(textArray) {
-    const text = textArray ? getTextContent(textArray) : ''
-    if (text && text !== 'Untitled') {
-      return text
-    }
-    return ''
-  }
-
-  const removeTypeFlag = ['a', 'p', '‣']
-
-  /**
-   * 递归获取层层嵌套的数组
-   * @param {*} textArray
-   * @returns string
-   */
-  function getTextContent(textArray) {
-    if (typeof textArray === 'object' && isIterable(textArray)) {
-      let result = ''
-      for (const textObj of textArray) {
-        if (textArray.length > 1 && removeTypeFlag.includes(textArray[0])) {
-          return result
-        }
-        result = result + getTextContent(textObj)
-      }
-      return result
-    } else if (typeof textArray === 'string') {
-      if (checkStrIsUuid(textArray) && pageBlockMap.block[textArray]) {
-        return getBlockContentText(textArray)
-      } else if (textArray === pageBlockMap.block[postId].value.space_id) {
-        return ''
-      }
-      return textArray
-    }
-  }
-
-  function getTransclusionReference(block) {
-    const result = []
-    const blockPointer = block.format.transclusion_reference_pointer
-    const blockPointerId = blockPointer.id
-    if (blockPointer) {
-      const blockContentList = pageBlockMap.block[blockPointerId].value.content
-      for (const blockContent of blockContentList) {
-        result.push(getBlockContentText(blockContent))
-      }
-    }
-    return result.join('')
-  }
-
-  function getBlockContentText(id) {
-    const block = pageBlockMap?.block[id].value
-    const blockType = block.type
-    switch (blockType) {
-      case 'transclusion_reference':
-        return getTransclusionReference(block)
-      case 'table':
-        return getTableText(block.content)
-      case 'page':
-        if (id !== postId) {
-          return getText(block.properties)
-        }
-        return ''
-      case 'breadcrumb':
-      case 'divider':
-        return ''
-      case 'quote':
-      default:
-        const properties = block?.properties
-        return getText(properties)
-    }
-  }
-
-  function getTableText(tableRowIds) {
-    const result = []
-    for (const blockRowId of tableRowIds) {
-      if (pageBlockMap.block[blockRowId]) {
-        const blockRow = pageBlockMap.block[blockRowId].value
-        const blockRowProperties = blockRow.properties
-        for (const blockRowPropertyValue of Object.values(blockRowProperties)) {
-          result.push(getTextArray(blockRowPropertyValue))
-        }
-      }
-    }
-    return result.join('')
-  }
-
-  const postId = post.id
-  let contentTextList = []
-  // 防止搜到加密文章的内容
-  if (pageBlockMap && pageBlockMap.block && !post.password) {
-    const contentIds = Object.keys(pageBlockMap.block)
-    for (const id of contentIds) {
-      const blockContentText = getBlockContentText(id)
-      if (blockContentText) {
-        contentTextList.push(blockContentText)
-      }
-    }
-  }
-  console.log(contentTextList.join(''))
-  return contentTextList.join('')
-}
-
 export default Index

From 2a89027bb6c26d4164d1015f4ec036b1438ae046 Mon Sep 17 00:00:00 2001
From: anime <street-anime-olive@duck.com>
Date: Tue, 8 Jul 2025 00:32:16 +0800
Subject: [PATCH 03/14] chore(getPageContentText): add todo comments for future
 improvements

- Add todo comment for cleaning up more useless tags
- Add todo comment for handling more block types
---
 lib/notion/getPageContentText.js | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/notion/getPageContentText.js b/lib/notion/getPageContentText.js
index e636fab4..9c29cc44 100644
--- a/lib/notion/getPageContentText.js
+++ b/lib/notion/getPageContentText.js
@@ -24,6 +24,7 @@ export function getPageContentText(post, pageBlockMap) {
     return ''
   }
 
+  // todo: 清除更多无用标签
   const removeTypeFlag = ['a', 'p', '‣']
 
   /**
@@ -67,6 +68,7 @@ export function getPageContentText(post, pageBlockMap) {
   function getBlockContentText(id) {
     const block = pageBlockMap?.block[id].value
     const blockType = block.type
+    // todo: 处理更多类型
     switch (blockType) {
       case 'transclusion_reference':
         return getTransclusionReference(block)

From d22e8bb177f2a48b616f4b25b97ca4cdfa3fed3d Mon Sep 17 00:00:00 2001
From: anime <street-anime-olive@duck.com>
Date: Tue, 8 Jul 2025 15:50:54 +0800
Subject: [PATCH 04/14] feat(getPageContentText): add null checks for block
 references

- Add validation for transclusion reference pointer existence
- Return empty string when block is not found
- Prevent potential errors from undefined block references
---
 lib/notion/getPageContentText.js | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/lib/notion/getPageContentText.js b/lib/notion/getPageContentText.js
index 9c29cc44..20c5a451 100644
--- a/lib/notion/getPageContentText.js
+++ b/lib/notion/getPageContentText.js
@@ -56,7 +56,7 @@ export function getPageContentText(post, pageBlockMap) {
     const result = []
     const blockPointer = block.format.transclusion_reference_pointer
     const blockPointerId = blockPointer.id
-    if (blockPointer) {
+    if (blockPointer && pageBlockMap.block[blockPointerId].value) {
       const blockContentList = pageBlockMap.block[blockPointerId].value.content
       for (const blockContent of blockContentList) {
         result.push(getBlockContentText(blockContent))
@@ -67,6 +67,9 @@ export function getPageContentText(post, pageBlockMap) {
 
   function getBlockContentText(id) {
     const block = pageBlockMap?.block[id].value
+    if (!block) {
+      return ''
+    }
     const blockType = block.type
     // todo: 处理更多类型
     switch (blockType) {

From b52f81815461bd428ce8759978da79ebc12205e6 Mon Sep 17 00:00:00 2001
From: anime <street-anime-olive@duck.com>
Date: Wed, 9 Jul 2025 13:11:52 +0800
Subject: [PATCH 05/14] feat(getPageContentText): replace custom text
 extraction with notion-utils

The getTextContent function was removed and replaced with an import from
notion-utils, simplifying the text extraction logic for Notion page
content. This change removes the custom recursive implementation and
uses the standardized utility function instead.
---
 lib/notion/getPageContentText.js | 30 +-----------------------------
 1 file changed, 1 insertion(+), 29 deletions(-)

diff --git a/lib/notion/getPageContentText.js b/lib/notion/getPageContentText.js
index 20c5a451..fa93c9a8 100644
--- a/lib/notion/getPageContentText.js
+++ b/lib/notion/getPageContentText.js
@@ -1,4 +1,4 @@
-import { checkStrIsUuid, isIterable } from '@/lib/utils'
+import { getTextContent } from 'notion-utils'
 
 export function getPageContentText(post, pageBlockMap) {
   /**
@@ -24,34 +24,6 @@ export function getPageContentText(post, pageBlockMap) {
     return ''
   }
 
-  // todo: 清除更多无用标签
-  const removeTypeFlag = ['a', 'p', '‣']
-
-  /**
-   * 递归获取层层嵌套的数组
-   * @param {*} textArray
-   * @returns string
-   */
-  function getTextContent(textArray) {
-    if (typeof textArray === 'object' && isIterable(textArray)) {
-      let result = ''
-      for (const textObj of textArray) {
-        if (textArray.length > 1 && removeTypeFlag.includes(textArray[0])) {
-          return result
-        }
-        result = result + getTextContent(textObj)
-      }
-      return result
-    } else if (typeof textArray === 'string') {
-      if (checkStrIsUuid(textArray) && pageBlockMap.block[textArray]) {
-        return getBlockContentText(textArray)
-      } else if (textArray === pageBlockMap.block[postId].value.space_id) {
-        return ''
-      }
-      return textArray
-    }
-  }
-
   function getTransclusionReference(block) {
     const result = []
     const blockPointer = block.format.transclusion_reference_pointer

From 44ff4bcb230a6f1145df8b934727062e72f22028 Mon Sep 17 00:00:00 2001
From: anime <street-anime-olive@duck.com>
Date: Wed, 9 Jul 2025 13:14:41 +0800
Subject: [PATCH 06/14] feat(getPageContentText): remove debug console.log from
 getPageContentText

Commented out console.log statement in getPageContentText.js
to remove debug output while maintaining the functionality.
---
 lib/notion/getPageContentText.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/notion/getPageContentText.js b/lib/notion/getPageContentText.js
index fa93c9a8..19130991 100644
--- a/lib/notion/getPageContentText.js
+++ b/lib/notion/getPageContentText.js
@@ -90,6 +90,6 @@ export function getPageContentText(post, pageBlockMap) {
       }
     }
   }
-  console.log(contentTextList.join(''))
+  // console.log(contentTextList.join(''))
   return contentTextList.join('')
 }

From 72d64d7184db69bb9b01f82bf585d38ac541935f Mon Sep 17 00:00:00 2001
From: anime <street-anime-olive@duck.com>
Date: Wed, 9 Jul 2025 13:51:43 +0800
Subject: [PATCH 07/14] feat(getPageContentText): enhance getPageContentText to
 handle nested block content

- refactor getText function to process block properties and content
- add support for recursive processing of nested block content
- improve null checks and error handling in block processing
- update getBlockContentText to handle block value safely
---
 lib/notion/getPageContentText.js | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/lib/notion/getPageContentText.js b/lib/notion/getPageContentText.js
index 19130991..e4b79b37 100644
--- a/lib/notion/getPageContentText.js
+++ b/lib/notion/getPageContentText.js
@@ -3,17 +3,23 @@ import { getTextContent } from 'notion-utils'
 export function getPageContentText(post, pageBlockMap) {
   /**
    * 将对象的指定字段拼接到字符串
-   * @param sourceTextArray
-   * @param targetObj
-   * @param key
+   * @param block
    * @returns string
    */
-  function getText(targetObj) {
-    if (!targetObj) {
+  function getText(block) {
+    const result = []
+    const properties = block.properties
+    if (!properties) {
       return ''
     }
-    const textArray = targetObj['title'] || targetObj['caption']
-    return getTextArray(textArray)
+    const textArray = properties['title'] || properties['caption']
+    result.push(getTextArray(textArray))
+    if (block['content']?.length > 0) {
+      for (const blockContent of block.content) {
+        result.push(getBlockContentText(blockContent))
+      }
+    }
+    return result.join('')
   }
 
   function getTextArray(textArray) {
@@ -38,7 +44,7 @@ export function getPageContentText(post, pageBlockMap) {
   }
 
   function getBlockContentText(id) {
-    const block = pageBlockMap?.block[id].value
+    const block = pageBlockMap?.block[id]?.value
     if (!block) {
       return ''
     }
@@ -51,7 +57,7 @@ export function getPageContentText(post, pageBlockMap) {
         return getTableText(block.content)
       case 'page':
         if (id !== postId) {
-          return getText(block.properties)
+          return getText(block)
         }
         return ''
       case 'breadcrumb':
@@ -59,8 +65,7 @@ export function getPageContentText(post, pageBlockMap) {
         return ''
       case 'quote':
       default:
-        const properties = block?.properties
-        return getText(properties)
+        return getText(block)
     }
   }
 

From 2a0f4fd49c086d92f3f98284dd45ff019621e4df Mon Sep 17 00:00:00 2001
From: anime <street-anime-olive@duck.com>
Date: Wed, 9 Jul 2025 13:58:48 +0800
Subject: [PATCH 08/14] feat(getPageContentText): ensure proper spacing in
 concatenated text content

Modify getPageContentText.js to use space delimiter when joining text
blocks instead of empty string. This change affects three functions:
getPageContentText, getTextArray, and getBlockContentText to improve
text readability and proper spacing between concatenated content blocks.
---
 lib/notion/getPageContentText.js | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/notion/getPageContentText.js b/lib/notion/getPageContentText.js
index e4b79b37..79fe5689 100644
--- a/lib/notion/getPageContentText.js
+++ b/lib/notion/getPageContentText.js
@@ -19,7 +19,7 @@ export function getPageContentText(post, pageBlockMap) {
         result.push(getBlockContentText(blockContent))
       }
     }
-    return result.join('')
+    return result.join(' ')
   }
 
   function getTextArray(textArray) {
@@ -40,7 +40,7 @@ export function getPageContentText(post, pageBlockMap) {
         result.push(getBlockContentText(blockContent))
       }
     }
-    return result.join('')
+    return result.join(' ')
   }
 
   function getBlockContentText(id) {
@@ -80,7 +80,7 @@ export function getPageContentText(post, pageBlockMap) {
         }
       }
     }
-    return result.join('')
+    return result.join(' ')
   }
 
   const postId = post.id

From 3ad7605abe0551d3a583fb52c34517728a76639d Mon Sep 17 00:00:00 2001
From: anime <street-anime-olive@duck.com>
Date: Wed, 9 Jul 2025 14:09:37 +0800
Subject: [PATCH 09/14] fix: prevent processing content for page type blocks

- Skip content processing when block type is 'page'
- Update debug log message for content text concatenation
---
 lib/notion/getPageContentText.js | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/notion/getPageContentText.js b/lib/notion/getPageContentText.js
index 79fe5689..18672cc0 100644
--- a/lib/notion/getPageContentText.js
+++ b/lib/notion/getPageContentText.js
@@ -14,7 +14,7 @@ export function getPageContentText(post, pageBlockMap) {
     }
     const textArray = properties['title'] || properties['caption']
     result.push(getTextArray(textArray))
-    if (block['content']?.length > 0) {
+    if (block.type !== 'page' && block['content']?.length > 0) {
       for (const blockContent of block.content) {
         result.push(getBlockContentText(blockContent))
       }
@@ -95,6 +95,6 @@ export function getPageContentText(post, pageBlockMap) {
       }
     }
   }
-  // console.log(contentTextList.join(''))
+  console.log('开始', contentTextList.join(''), '结束')
   return contentTextList.join('')
 }

From 9321b2dfaed30d631784472c91d05ae2eaa3d273 Mon Sep 17 00:00:00 2001
From: anime <street-anime-olive@duck.com>
Date: Wed, 9 Jul 2025 14:39:47 +0800
Subject: [PATCH 10/14] feat(getPageContentText): update getPageContentText to
 use post.content array

- modify content extraction logic to use post.content array
- avoid extra but wrong result by checking pageBlockMap.block
---
 lib/notion/getPageContentText.js | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/notion/getPageContentText.js b/lib/notion/getPageContentText.js
index 18672cc0..94f791a5 100644
--- a/lib/notion/getPageContentText.js
+++ b/lib/notion/getPageContentText.js
@@ -84,12 +84,12 @@ export function getPageContentText(post, pageBlockMap) {
   }
 
   const postId = post.id
+  const postContent = post.content
   let contentTextList = []
   // 防止搜到加密文章的内容
-  if (pageBlockMap && pageBlockMap.block && !post.password) {
-    const contentIds = Object.keys(pageBlockMap.block)
-    for (const id of contentIds) {
-      const blockContentText = getBlockContentText(id)
+  if (postContent.length > 0 && !post.password) {
+    for (const postContentId of postContent) {
+      const blockContentText = getBlockContentText(postContentId)
       if (blockContentText) {
         contentTextList.push(blockContentText)
       }

From 23550a61d3580d593a98e0b6bf886311e133d6ea Mon Sep 17 00:00:00 2001
From: anime <street-anime-olive@duck.com>
Date: Wed, 9 Jul 2025 15:19:47 +0800
Subject: [PATCH 11/14] feat(getPageContentText): add flexible property value
 retrieval and support for more block types

- Add getPropertyValue helper function for flexible property retrieval
- Enhance getText function to accept custom keys for property lookup
- Add support for additional block types: image, bookmark, callout, header
- Improve documentation with JSDoc comments for better code understanding
---
 lib/notion/getPageContentText.js | 31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/lib/notion/getPageContentText.js b/lib/notion/getPageContentText.js
index 94f791a5..dc94c8fc 100644
--- a/lib/notion/getPageContentText.js
+++ b/lib/notion/getPageContentText.js
@@ -1,18 +1,34 @@
 import { getTextContent } from 'notion-utils'
 
+/**
+ * 获取属性值，优先从 overrides 中读取，否则按顺序从 properties 中读取，最后返回默认值
+ * @param {Object} properties 原始属性对象
+ * @param {Array} keys 优先级字段名列表
+ * @param {Object} overrides 自定义覆盖对象（可选）
+ * @param {string} defaultValue 默认值（可选）
+ */
+function getPropertyValue(properties, keys, overrides = {}, defaultValue = '') {
+  for (const key of keys) {
+    if (overrides[key]) return overrides[key]
+    if (properties[key]) return properties[key]
+  }
+  return defaultValue
+}
+
 export function getPageContentText(post, pageBlockMap) {
   /**
    * 将对象的指定字段拼接到字符串
    * @param block
+   * @param customKeys 优先级字段名列表
    * @returns string
    */
-  function getText(block) {
+  function getText(block, customKeys = ['title', 'caption']) {
     const result = []
     const properties = block.properties
     if (!properties) {
       return ''
     }
-    const textArray = properties['title'] || properties['caption']
+    const textArray = getPropertyValue(properties, customKeys)
     result.push(getTextArray(textArray))
     if (block.type !== 'page' && block['content']?.length > 0) {
       for (const blockContent of block.content) {
@@ -61,9 +77,20 @@ export function getPageContentText(post, pageBlockMap) {
         }
         return ''
       case 'breadcrumb':
+      case 'external_object_instance':
       case 'divider':
         return ''
+      case 'image':
+        return getText(block, ['alt_text', 'title'])
+      // 除title以外,还有额外的link和description可供索引，但认为不需要
+      case 'bookmark':
       case 'quote':
+      case 'callout':
+      case 'header':
+      case 'sub_header':
+      case 'code':
+      case 'equation':
+      case 'text':
       default:
         return getText(block)
     }

From f9ac624498832b0d1b661b2bd158e1d92c9483f6 Mon Sep 17 00:00:00 2001
From: anime <street-anime-olive@duck.com>
Date: Wed, 9 Jul 2025 16:00:15 +0800
Subject: [PATCH 12/14] feat(getPageContentText): implement getFullTextContent
 for enhanced text extraction
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add new getFullTextContent function to handle various Notion text formats
- Support equation extraction from decorated text
- Handle special characters like '⁍' and '‣' with proper content resolution
- Process date mentions, link mentions, and other reference types
- Replace getTextContent with getFullTextContent in getTextArray function
- Maintain backward compatibility with existing text processing
---
 lib/notion/getPageContentText.js | 60 ++++++++++++++++++++++++++++++--
 1 file changed, 57 insertions(+), 3 deletions(-)

diff --git a/lib/notion/getPageContentText.js b/lib/notion/getPageContentText.js
index dc94c8fc..d8095588 100644
--- a/lib/notion/getPageContentText.js
+++ b/lib/notion/getPageContentText.js
@@ -1,5 +1,3 @@
-import { getTextContent } from 'notion-utils'
-
 /**
  * 获取属性值，优先从 overrides 中读取，否则按顺序从 properties 中读取，最后返回默认值
  * @param {Object} properties 原始属性对象
@@ -15,6 +13,62 @@ function getPropertyValue(properties, keys, overrides = {}, defaultValue = '') {
   return defaultValue
 }
 
+/**
+ * 提取 Notion 装饰文本的纯文本内容。
+ * 可选传入 resolveRef 来解析引用（例如 '‣' 指向的页面标题）
+ *
+ * @param {Array} text - Notion Decoration[] 格式的文本数组
+ * @returns {string}
+ */
+function getFullTextContent(text) {
+  if (!text) return ''
+
+  if (!Array.isArray(text)) return String(text)
+
+  return text.reduce((result, item) => {
+    const value = item[0]
+    const decorations = item[1]
+
+    if (value === '⁍') {
+      // 检查是否有公式
+      const equation = decorations?.find(d => d[0] === 'e')
+      if (equation) {
+        return result + equation[1] // 提取 LaTeX 内容
+      }
+      return result // 否则什么都不加
+    }
+
+    if (value === '‣') {
+      const ref = Array.isArray(decorations) ? decorations[0] : null
+      const type = ref?.[0]
+      const data = ref?.[1]
+
+      switch (type) {
+        case 'd':
+          // 日期字符串
+          const date =
+            data?.start_date ||
+            data?.start_time ||
+            data?.end_date ||
+            data?.end_time ||
+            '[Date]'
+          return result + date
+        case 'lm':
+          // Link Mention
+          const title = data?.title || data?.href || '[Link]'
+          return result + title
+        // 用户 ID，这里不展开，默认忽略或标记
+        case 'u':
+        default:
+          return result
+      }
+    }
+
+    // 默认拼接普通文本
+    return result + value
+  }, '')
+}
+
 export function getPageContentText(post, pageBlockMap) {
   /**
    * 将对象的指定字段拼接到字符串
@@ -39,7 +93,7 @@ export function getPageContentText(post, pageBlockMap) {
   }
 
   function getTextArray(textArray) {
-    const text = textArray ? getTextContent(textArray) : ''
+    const text = textArray ? getFullTextContent(textArray) : ''
     if (text && text !== 'Untitled') {
       return text
     }

From 9886e4d146cf7270d7b833e7dc847bde797192c6 Mon Sep 17 00:00:00 2001
From: anime <street-anime-olive@duck.com>
Date: Wed, 9 Jul 2025 16:12:12 +0800
Subject: [PATCH 13/14] feat(getPageContentText): add references to NotionX
 types and comment out debug log

- Add reference links to NotionX type definitions for better documentation
- Comment out debug console.log statement to clean up output
- Maintain existing functionality while improving code clarity
---
 lib/notion/getPageContentText.js | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/notion/getPageContentText.js b/lib/notion/getPageContentText.js
index d8095588..06a11f7f 100644
--- a/lib/notion/getPageContentText.js
+++ b/lib/notion/getPageContentText.js
@@ -42,7 +42,7 @@ function getFullTextContent(text) {
       const ref = Array.isArray(decorations) ? decorations[0] : null
       const type = ref?.[0]
       const data = ref?.[1]
-
+      // todo: 处理更多类型 https://github.com/NotionX/react-notion-x/blob/9ee2d9334e260ee3600f4f8d7212f66b641b19cc/packages/notion-types/src/core.ts#L108
       switch (type) {
         case 'd':
           // 日期字符串
@@ -119,7 +119,7 @@ export function getPageContentText(post, pageBlockMap) {
       return ''
     }
     const blockType = block.type
-    // todo: 处理更多类型
+    // todo: 处理更多类型 https://github.com/NotionX/react-notion-x/blob/9ee2d9334e260ee3600f4f8d7212f66b641b19cc/packages/notion-types/src/block.ts#L3
     switch (blockType) {
       case 'transclusion_reference':
         return getTransclusionReference(block)
@@ -176,6 +176,6 @@ export function getPageContentText(post, pageBlockMap) {
       }
     }
   }
-  console.log('开始', contentTextList.join(''), '结束')
+  // console.log('开始', contentTextList.join(''), '结束')
   return contentTextList.join('')
 }

From ea1b76f5b7c2f10ce80efea85e022a64d8d9ca06 Mon Sep 17 00:00:00 2001
From: anime <street-anime-olive@duck.com>
Date: Wed, 9 Jul 2025 16:22:38 +0800
Subject: [PATCH 14/14] fix(getPageContentText): add null check for postContent
 before accessing length property

Ensure postContent exists before checking its length property to prevent
potential runtime errors when postContent is null or undefined.
---
 lib/notion/getPageContentText.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/notion/getPageContentText.js b/lib/notion/getPageContentText.js
index 06a11f7f..0e52083b 100644
--- a/lib/notion/getPageContentText.js
+++ b/lib/notion/getPageContentText.js
@@ -168,7 +168,7 @@ export function getPageContentText(post, pageBlockMap) {
   const postContent = post.content
   let contentTextList = []
   // 防止搜到加密文章的内容
-  if (postContent.length > 0 && !post.password) {
+  if (postContent && postContent.length > 0 && !post.password) {
     for (const postContentId of postContent) {
       const blockContentText = getBlockContentText(postContentId)
       if (blockContentText) {