Skip to content

Commit

Permalink
do not calculate numeric metrics on tables with low row count or uniq…
Browse files Browse the repository at this point in the history
…ue columns
  • Loading branch information
Maxim Moinat committed Jan 16, 2020
1 parent b387479 commit 9dac4af
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ public Table(Table table) {
db = table.db;
name = table.name;
rowCount = table.rowCount;
rowsCheckedCount = table.rowsCheckedCount;
rowCheckedCount = table.rowCheckedCount;
comment = table.comment;
fields = new ArrayList<Field>(table.fields);
isStem = table.isStem;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ public interface ScanFieldName {
String Q1 = "25%";
String Q2 = "Median";
String Q3 = "75%";
String MAX = "Max%";
String MAX = "Max";
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -606,8 +606,13 @@ public void calculateNumericMetrics() {
return;
}

// To prevent low count values showing, only calculate if enough rows scanned and column not unique
if (this.nProcessed < 100 || this.uniqueCount == this.nProcessed) {
return;
}

if (tooManyValues) {
System.out.println("Estimations! Increase 'maxValues' for a better estimate");
System.out.println("Numeric metrics are estimations! Increase 'maxValues' for a better estimate");
}

// Unpack the values to a list of pairs; calculate sum, total count and mean
Expand Down

0 comments on commit 9dac4af

Please sign in to comment.