Skip to content
This repository has been archived by the owner on Apr 12, 2019. It is now read-only.

Fixed worldcat #91

Open
wants to merge 2 commits into
base: develop
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions WorldCat/worldcat.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ module.exports = function (returnToMaster) {

console.log("Worldcat pushing " + url);
request(url, function (error, response, body) {
linkCounter++;

if (error) {
console.log("Error while requesting worldcat: " + error)
return
Expand All @@ -31,7 +33,7 @@ module.exports = function (returnToMaster) {
if (resultsinfoCounter == 1) {
total_records = $(this).text();
}
resultsinfoCounter ++;
resultsinfoCounter++;
});
console.log("total records: " + total_records);
total_links = ceil((numeral(total_records)).value() / 10);
Expand All @@ -55,9 +57,9 @@ module.exports = function (returnToMaster) {

});

linkCounter++;

if (linkCounter > total_links) {
// worldcat only shows the first 5000 pages
console.log(linkCounter)
if (linkCounter == 500) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Theres a 0 missing here no?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nope, that's right as we get 10 entries per link, so thats 500*10=5000 in total.
But I just removed an unnecessary comment from the cli.js that is no longer needed (because earlier the script ran out of RAM )

clearInterval(refreshIntervalId);
var output = JSON.stringify(musicians); //convert it back to json
fs.writeFileSync('./scrapedoutput/worldcat/worldcat.json', output, 'utf8'); // write it back
Expand All @@ -68,6 +70,11 @@ module.exports = function (returnToMaster) {
page = page + 10;
url = "https://www.worldcat.org/search?q=dt%3Asco&fq=yr%3A1800&dblist=638&qt=page_number_link&start=" + page;

// worldcat only shows the first 5000 pages
if (page > 5000) {
clearInterval(refreshIntervalId);
}


}

Expand Down