Skip to content

Commit

Permalink
Fix fetchFile API file save is lost
Browse files Browse the repository at this point in the history
  • Loading branch information
coder-hxl committed Feb 5, 2023
1 parent 5bd0f08 commit 0edc053
Show file tree
Hide file tree
Showing 6 changed files with 108 additions and 76 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"private": true,
"name": "x-crawl",
"version": "0.1.4",
"version": "0.1.5",
"author": "CoderHxl",
"description": "XCrawl is a Nodejs multifunctional crawler library. Crawl HTML, JSON, file resources, etc. through simple configuration.",
"license": "MIT",
Expand Down
2 changes: 1 addition & 1 deletion publish/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "x-crawl",
"version": "0.1.4",
"version": "0.1.5",
"author": "CoderHxl",
"description": "XCrawl is a Nodejs multifunctional crawler library. Crawl HTML, JSON, file resources, etc. through simple configuration.",
"license": "MIT",
Expand Down
75 changes: 42 additions & 33 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,15 @@ import path from 'node:path'
import { JSDOM } from 'jsdom'

import { batchRequest, syncBatchRequest, request } from './request'
import { isArray, isString, isUndefined, log, logError } from './utils'
import {
isArray,
isString,
isUndefined,
log,
logError,
logNumber,
logSuccess
} from './utils'

import {
IXCrawlBaseConifg,
Expand Down Expand Up @@ -135,39 +143,40 @@ export default class XCrawl {
intervalTime
)

return new Promise((resolve) => {
const container: IFetchCommon<IFileInfo> = []

requestRes.forEach((requestResItem, index) => {
const { id, statusCode, headers, data } = requestResItem

const mimeType = headers['content-type'] ?? ''
const suffix = mimeType.split('/').pop()
const fileName = new Date().getTime().toString()
const filePath = path.resolve(
fileConfig.storeDir,
`${fileName}.${suffix}`
)

fs.createWriteStream(filePath, 'binary').write(data, (err) => {
if (err) {
log(logError(`File save error at id ${id}: ${err.message}`))
} else {
const fileInfo: IFileInfo = {
fileName,
mimeType,
size: data.length,
filePath
}

container.push({ id, statusCode, headers, data: fileInfo })
}

if (index === requestRes.length - 1) {
resolve(container)
}
const container: IFetchCommon<IFileInfo> = []

requestRes.forEach((requestResItem) => {
const { id, headers, data } = requestResItem

const mimeType = headers['content-type'] ?? ''
const suffix = mimeType.split('/').pop()
const fileName = new Date().getTime().toString()
const filePath = path.resolve(
fileConfig.storeDir,
`${fileName}.${suffix}`
)

try {
fs.writeFileSync(filePath, data)

container.push({
...requestResItem,
data: { fileName, mimeType, size: data.length, filePath }
})
})
} catch (error: any) {
log(logError(`File save error at id ${id}: ${error.message}`))
}
})

const saveTotal = requestRes.length
const success = container.length
const error = requestRes.length - container.length
log(
`saveTotal: ${logNumber(saveTotal)}, success: ${logSuccess(
success
)}, error: ${logError(error)}`
)

return container
}
}
4 changes: 2 additions & 2 deletions src/request.ts
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ export async function batchRequest(
error.forEach((message) => log(logError(message)))

log(
`total: ${logNumber(requestConifgs.length)}, success: ${logSuccess(
`requestsTotal: ${logNumber(requestConifgs.length)}, success: ${logSuccess(
success.length
)}, error: ${logError(error.length)}`
)
Expand Down Expand Up @@ -255,7 +255,7 @@ export async function syncBatchRequest(
log(logSuccess('All requests are over!'))

log(
`total: ${logNumber(requestConifgs.length)}, success: ${logSuccess(
`requestsTotal: ${logNumber(requestConifgs.length)}, success: ${logSuccess(
successTotal
)}, error: ${logError(errorTotal)}`
)
Expand Down
2 changes: 1 addition & 1 deletion test/start/index.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

99 changes: 61 additions & 38 deletions test/start/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,54 +3,77 @@ import XCrawl from '../../src'

const testXCrawl = new XCrawl({
timeout: 10000,
intervalTime: {
max: 3000,
min: 2000
},
intervalTime: { max: 2000, min: 1000 },
mode: 'async'
})

testXCrawl
.fetchData({
requestConifg: [
{ url: 'http://localhost:3001/home' },
{ url: 'http://localhost:9001/api/home/wonderfulplace' },
{ url: 'http://localhost:9001/api/home/goodprice' },
{ url: 'http://localhost:3001/home' },
{ url: 'http://localhost:9001/ai/home/goodprice' }
]
})
.then((res) => {
// console.log(res)
// testXCrawl
// .fetchData({
// requestConifg: [
// { url: 'http://localhost:3001/home' },
// { url: 'http://localhost:9001/api/home/wonderfulplace' },
// { url: 'http://localhost:9001/api/home/goodprice' },
// { url: 'http://localhost:3001/home' },
// { url: 'http://localhost:9001/ai/home/goodprice' }
// ]
// })
// .then((res) => {
// // console.log(res)
// })

testXCrawl.fetchHTML({ url: 'https://www.bilibili.com/' }).then((res) => {
const { jsdom } = res.data

const document = jsdom.window.document
const imgBoxEl = document.querySelectorAll('.bili-video-card__cover')

const imgUrls: string[] = []
imgBoxEl.forEach((item, index) => {
const img = item.lastChild as HTMLImageElement

if (index % 2) {
imgUrls.push('https:' + img.src)
} else {
imgUrls.push(img.src)
}
})

// testXCrawl.fetchHTML({ url: 'https://www.bilibili.com/' }).then((res) => {
// const { jsdom } = res.data
console.log(imgUrls)

// const document = jsdom.window.document
// const imgBoxEl = document.querySelectorAll('.bili-video-card__cover')
const requestConifg = imgUrls.map((url) => ({ url }))

// const imgUrls: string[] = []
// imgBoxEl.forEach((item, index) => {
// const img = item.lastChild as HTMLImageElement
testXCrawl
.fetchFile({
requestConifg,
fileConfig: { storeDir: path.resolve(__dirname, './upload') }
})
.then((res) => {
// console.log(res)
})
})

// if (index % 2) {
// imgUrls.push('https:' + img.src)
// } else {
// imgUrls.push(img.src)
// testXCrawl
// .fetchData({
// requestConifg: {
// url: 'http://localhost:9001/api/area/阳江市',
// method: 'POST',
// data: {
// type: 'plus',
// offset: 0,
// size: 20
// }
// }
// })
// .then((res) => {
// const room = res[0].data.data.list[0]
// const requestConifg = room.pictureUrls.map((item: any) => ({
// url: item
// }))

// console.log(imgUrls)

// const requestConifg = imgUrls.map((url) => ({ url }))

// testXCrawl
// .fetchFile({
// testXCrawl.fetchFile({
// requestConifg,
// fileConfig: { storeDir: path.resolve(__dirname, './upload') }
// })
// .then((res) => {
// console.log(res)
// fileConfig: {
// storeDir: path.resolve(__dirname, './upload')
// }
// })
// })
// })

0 comments on commit 0edc053

Please sign in to comment.