Skip to content

Commit

Permalink
adding new ground-truth that fixes link filtering bug
Browse files Browse the repository at this point in the history
  • Loading branch information
gautamh committed Nov 1, 2017
1 parent cc79be5 commit cf6e212
Showing 1 changed file with 13 additions and 5 deletions.
18 changes: 13 additions & 5 deletions ground-truth-bundle.js
Original file line number Diff line number Diff line change
Expand Up @@ -118364,6 +118364,18 @@ function cossim(x, y) {
return dot_product / (Math.sqrt(mag_x) * Math.sqrt(mag_y));
}

/** Checks if two hostnames have the same second-level domain
*
* @param {string} hostnameA - the first url to be compared
* @param {string} hostnameB - the second url to be compared
*/
function checkSameDomain(hostnameA, hostnameB) {
var domainsA = hostnameA.split('.').reverse();
var domainsB = hostnameB.split('.').reverse();

return (domainsA[0] == domainsB[0] && domainsA[1] == domainsB[1]);
}

//pipeline functions
/** Request callback which gets links from page HTML and passes them to a callback
*
Expand Down Expand Up @@ -118395,11 +118407,7 @@ function filterDomain(links, originalUrl, callback) {
}
trimmedOriginalHostname = originalUrlObject.hostname.replace("www.", "");
trimmedNewHostname = newUrlObject.hostname.replace("www.", "");
if (trimmedOriginalHostname.includes(trimmedNewHostname) || trimmedNewHostname.includes(trimmedOriginalHostname)) {
return false;
} else {
return true;
}
return !checkSameDomain(trimmedOriginalHostname, trimmedNewHostname);
});
console.log("originalUrl: " + originalUrl);
callback(null, outgoingLinks, originalUrl);
Expand Down

0 comments on commit cf6e212

Please sign in to comment.