From cf6e212e1d4218782ec65a65c3b8c597c1a4269f Mon Sep 17 00:00:00 2001 From: Gautam Hathi Date: Tue, 31 Oct 2017 23:14:15 -0700 Subject: [PATCH] adding new ground-truth that fixes link filtering bug --- ground-truth-bundle.js | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/ground-truth-bundle.js b/ground-truth-bundle.js index aa77446..fc24cb1 100644 --- a/ground-truth-bundle.js +++ b/ground-truth-bundle.js @@ -118364,6 +118364,18 @@ function cossim(x, y) { return dot_product / (Math.sqrt(mag_x) * Math.sqrt(mag_y)); } +/** Checks if two hostnames have the same second-level domain + * + * @param {string} hostnameA - the first url to be compared + * @param {string} hostnameB - the second url to be compared + */ +function checkSameDomain(hostnameA, hostnameB) { + var domainsA = hostnameA.split('.').reverse(); + var domainsB = hostnameB.split('.').reverse(); + + return (domainsA[0] == domainsB[0] && domainsA[1] == domainsB[1]); +} + //pipeline functions /** Request callback which gets links from page HTML and passes them to a callback * @@ -118395,11 +118407,7 @@ function filterDomain(links, originalUrl, callback) { } trimmedOriginalHostname = originalUrlObject.hostname.replace("www.", ""); trimmedNewHostname = newUrlObject.hostname.replace("www.", ""); - if (trimmedOriginalHostname.includes(trimmedNewHostname) || trimmedNewHostname.includes(trimmedOriginalHostname)) { - return false; - } else { - return true; - } + return !checkSameDomain(trimmedOriginalHostname, trimmedNewHostname); }); console.log("originalUrl: " + originalUrl); callback(null, outgoingLinks, originalUrl);