From e132f9a49056deffbe373f834063505931810521 Mon Sep 17 00:00:00 2001 From: TimH Date: Thu, 4 May 2017 23:54:54 +0200 Subject: [PATCH 1/2] fixed worldcat script as only the first 5000 entries are shown --- WorldCat/worldcat.js | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/WorldCat/worldcat.js b/WorldCat/worldcat.js index 0a891d3..8cf4e6a 100644 --- a/WorldCat/worldcat.js +++ b/WorldCat/worldcat.js @@ -19,6 +19,8 @@ module.exports = function (returnToMaster) { console.log("Worldcat pushing " + url); request(url, function (error, response, body) { + linkCounter++; + if (error) { console.log("Error while requesting worldcat: " + error) return @@ -31,7 +33,7 @@ module.exports = function (returnToMaster) { if (resultsinfoCounter == 1) { total_records = $(this).text(); } - resultsinfoCounter ++; + resultsinfoCounter++; }); console.log("total records: " + total_records); total_links = ceil((numeral(total_records)).value() / 10); @@ -55,9 +57,9 @@ module.exports = function (returnToMaster) { }); - linkCounter++; - - if (linkCounter > total_links) { + // worldcat only shows the first 5000 pages + console.log(linkCounter) + if (linkCounter == 500) { clearInterval(refreshIntervalId); var output = JSON.stringify(musicians); //convert it back to json fs.writeFileSync('./scrapedoutput/worldcat/worldcat.json', output, 'utf8'); // write it back @@ -68,6 +70,11 @@ module.exports = function (returnToMaster) { page = page + 10; url = "https://www.worldcat.org/search?q=dt%3Asco&fq=yr%3A1800&dblist=638&qt=page_number_link&start=" + page; + // worldcat only shows the first 5000 pages + if (page > 5000) { + clearInterval(refreshIntervalId); + } + } From cd759788075a61c780b907b339065707645e0669 Mon Sep 17 00:00:00 2001 From: TimH Date: Fri, 5 May 2017 09:27:54 +0200 Subject: [PATCH 2/2] removed unnecessary comment --- cli.js | 5 ----- 1 file changed, 5 deletions(-) diff --git a/cli.js b/cli.js index 3d23949..6bd79d5 100644 --- a/cli.js +++ b/cli.js @@ -1,8 +1,3 @@ -/* -When executing worldcat, use: -node --max_old_space_size=4096 cli.js worldcat - */ - const commander = require('commander'); const path = require('path'); const fs = require('fs');