抓取数据逻辑优化;减少次数,提高速率,节约流量

This commit is contained in:
tangly1024.com
2024-05-15 17:46:25 +08:00
parent b6d656f05b
commit e1877b3d7a
2 changed files with 51 additions and 20 deletions

View File

@@ -41,7 +41,7 @@ export async function getSingleBlock(id, from) {
return pageBlock
}
pageBlock = await getPageWithRetry(id, from)
pageBlock = await getPageWithRetry(id, 'single_' + from)
if (pageBlock) {
await setDataToCache(cacheKey, pageBlock)
@@ -153,3 +153,38 @@ function filterPostBlocks(id, blockMap, slice) {
}
return clonePageBlock
}
/**
* 根据[]ids批量抓取blocks
* 在获取数据库文章列表时超过一定数量的block会被丢弃因此根据pageId批量抓取block
* @param {*} ids
* @param {*} batchSize
* @returns
*/
export const fetchInBatches = async (ids, batchSize = 100) => {
const authToken = BLOG.NOTION_ACCESS_TOKEN || null
const api = new NotionAPI({
authToken,
userTimeZone: Intl.DateTimeFormat().resolvedOptions().timeZone
})
let fetchedBlocks = {}
for (let i = 0; i < ids.length; i += batchSize) {
const batch = ids.slice(i, i + batchSize)
console.log('[API-->>请求] Fetching missing blocks', ids.length)
const start = new Date().getTime()
const pageChunk = await api.getBlocks(batch)
const end = new Date().getTime()
console.log(
`[API<<--响应] 耗时:${end - start}ms Fetching missing blocks count:${ids.length} `
)
console.log('[API<<--响应]')
fetchedBlocks = Object.assign(
{},
fetchedBlocks,
pageChunk?.recordMap?.block
)
}
return fetchedBlocks
}