mirror of
https://github.com/d0zingcat/NotionNext.git
synced 2026-05-14 15:09:22 +00:00
抓取数据逻辑优化;减少次数,提高速率,节约流量
This commit is contained in:
@@ -7,7 +7,7 @@ import { getConfigMapFromConfigPage } from '@/lib/notion/getNotionConfig'
|
||||
import getPageProperties, {
|
||||
adjustPageProperties
|
||||
} from '@/lib/notion/getPageProperties'
|
||||
import { getPostBlocks, getSingleBlock } from '@/lib/notion/getPostBlocks'
|
||||
import { fetchInBatches, getPostBlocks } from '@/lib/notion/getPostBlocks'
|
||||
import { compressImage, mapImgUrl } from '@/lib/notion/mapImage'
|
||||
import { deepClone } from '@/lib/utils'
|
||||
import { idToUuid } from 'notion-utils'
|
||||
@@ -371,13 +371,14 @@ const EmptyData = pageId => {
|
||||
* @returns {Promise<JSX.Element|null|*>}
|
||||
*/
|
||||
async function getDataBaseInfoByNotionAPI({ pageId, from }) {
|
||||
console.log('[Fetching Data]', pageId, from)
|
||||
const pageRecordMap = await getPostBlocks(pageId, from)
|
||||
if (!pageRecordMap) {
|
||||
console.error('can`t get Notion Data ; Which id is: ', pageId)
|
||||
return {}
|
||||
}
|
||||
pageId = idToUuid(pageId)
|
||||
const block = pageRecordMap.block || {}
|
||||
let block = pageRecordMap.block || {}
|
||||
const rawMetadata = block[pageId]?.value
|
||||
// Check Type Page-Database和Inline-Database
|
||||
if (
|
||||
@@ -402,6 +403,7 @@ async function getDataBaseInfoByNotionAPI({ pageId, from }) {
|
||||
collectionView,
|
||||
viewIds
|
||||
)
|
||||
|
||||
if (pageIds?.length === 0) {
|
||||
console.error(
|
||||
'获取到的文章列表为空,请检查notion模板',
|
||||
@@ -415,29 +417,22 @@ async function getDataBaseInfoByNotionAPI({ pageId, from }) {
|
||||
// console.log('有效Page数量', pageIds?.length)
|
||||
}
|
||||
|
||||
// 获取每篇文章基础数据
|
||||
// 抓取主数据库最多抓取1000个blocks,溢出的数block这里统一抓取一遍
|
||||
const blockIdsNeedFetch = []
|
||||
for (let i = 0; i < pageIds.length; i++) {
|
||||
const id = pageIds[i]
|
||||
const value = block[id]?.value
|
||||
if (!value) {
|
||||
// 如果找不到文章对应的block,说明发生了溢出,使用pageID再去请求
|
||||
const pageBlock = await getSingleBlock(id, from)
|
||||
if (pageBlock.block[id].value) {
|
||||
const properties =
|
||||
(await getPageProperties(
|
||||
id,
|
||||
pageBlock.block[id].value,
|
||||
schema,
|
||||
null,
|
||||
getTagOptions(schema)
|
||||
)) || null
|
||||
if (properties) {
|
||||
collectionData.push(properties)
|
||||
}
|
||||
}
|
||||
continue
|
||||
blockIdsNeedFetch.push(id)
|
||||
}
|
||||
}
|
||||
const fetchedBlocks = await fetchInBatches(blockIdsNeedFetch)
|
||||
block = Object.assign({}, block, fetchedBlocks)
|
||||
|
||||
// 获取每篇文章基础数据
|
||||
for (let i = 0; i < pageIds.length; i++) {
|
||||
const id = pageIds[i]
|
||||
const value = block[id]?.value || fetchedBlocks[id]?.value
|
||||
const properties =
|
||||
(await getPageProperties(
|
||||
id,
|
||||
@@ -446,6 +441,7 @@ async function getDataBaseInfoByNotionAPI({ pageId, from }) {
|
||||
null,
|
||||
getTagOptions(schema)
|
||||
)) || null
|
||||
|
||||
if (properties) {
|
||||
collectionData.push(properties)
|
||||
}
|
||||
|
||||
@@ -41,7 +41,7 @@ export async function getSingleBlock(id, from) {
|
||||
return pageBlock
|
||||
}
|
||||
|
||||
pageBlock = await getPageWithRetry(id, from)
|
||||
pageBlock = await getPageWithRetry(id, 'single_' + from)
|
||||
|
||||
if (pageBlock) {
|
||||
await setDataToCache(cacheKey, pageBlock)
|
||||
@@ -153,3 +153,38 @@ function filterPostBlocks(id, blockMap, slice) {
|
||||
}
|
||||
return clonePageBlock
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据[]ids,批量抓取blocks
|
||||
* 在获取数据库文章列表时,超过一定数量的block会被丢弃,因此根据pageId批量抓取block
|
||||
* @param {*} ids
|
||||
* @param {*} batchSize
|
||||
* @returns
|
||||
*/
|
||||
export const fetchInBatches = async (ids, batchSize = 100) => {
|
||||
const authToken = BLOG.NOTION_ACCESS_TOKEN || null
|
||||
const api = new NotionAPI({
|
||||
authToken,
|
||||
userTimeZone: Intl.DateTimeFormat().resolvedOptions().timeZone
|
||||
})
|
||||
|
||||
let fetchedBlocks = {}
|
||||
for (let i = 0; i < ids.length; i += batchSize) {
|
||||
const batch = ids.slice(i, i + batchSize)
|
||||
console.log('[API-->>请求] Fetching missing blocks', ids.length)
|
||||
const start = new Date().getTime()
|
||||
const pageChunk = await api.getBlocks(batch)
|
||||
const end = new Date().getTime()
|
||||
console.log(
|
||||
`[API<<--响应] 耗时:${end - start}ms Fetching missing blocks count:${ids.length} `
|
||||
)
|
||||
|
||||
console.log('[API<<--响应]')
|
||||
fetchedBlocks = Object.assign(
|
||||
{},
|
||||
fetchedBlocks,
|
||||
pageChunk?.recordMap?.block
|
||||
)
|
||||
}
|
||||
return fetchedBlocks
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user