diff --git a/provider/docs/bin/create-archive.ts b/provider/docs/bin/create-archive.ts index 0c5d123a..536e192a 100644 --- a/provider/docs/bin/create-archive.ts +++ b/provider/docs/bin/create-archive.ts @@ -69,12 +69,14 @@ if (!kind || !ARCHIVE_KINDS[kind]) { const archiveHandler = ARCHIVE_KINDS[kind] const options = archiveHandler.toOptions ? archiveHandler.toOptions(optionsRaw) : optionsRaw +const t0 = performance.now() const archive = await archiveHandler.createFn(options) - -const data = JSON.stringify(archive, null, 2) +const data = JSON.stringify(archive) console.error( - `# Archive complete: ${archive.docs.length} docs (${(data.length / 1024 / 1024).toFixed(1)} MB), content ID: ${ - archive.contentID - }` + `# Archive complete [${Math.round(performance.now() - t0)}ms]: ${archive.docs.length} docs (${( + data.length / + 1024 / + 1024 + ).toFixed(1)} MB), content ID: ${archive.contentID}, description ${JSON.stringify(archive.description)}` ) process.stdout.write(data) diff --git a/provider/docs/bin/create-index.ts b/provider/docs/bin/create-index.ts index 46bc9aa5..d91e4645 100644 --- a/provider/docs/bin/create-index.ts +++ b/provider/docs/bin/create-index.ts @@ -18,9 +18,21 @@ if (args.length !== 0) { } const archive: CorpusArchive = await readJSONFromStdin() -console.error(`# Indexing archive: ${archive.docs.length} docs, content ID ${archive.contentID}`) +console.error( + `# Using archive: ${archive.docs.length} docs, content ID ${archive.contentID}, description ${JSON.stringify( + archive.description + )}` +) +const t0 = performance.now() const index = await createCorpusIndex(archive, { contentExtractor: extractContentUsingMozillaReadability }) +const data = JSON.stringify(index) +console.error( + `# Index complete [${Math.round(performance.now() - t0)}ms]: ${index.docs.length} docs (${ + data.length / 1024 / 1024 + } MB)` +) +process.stdout.write(data) function readJSONFromStdin(): Promise { return new Promise((resolve, reject) => {