From d44f27c7eeac936a836cd35c5ca760549dbd8c84 Mon Sep 17 00:00:00 2001 From: Chris Nuernberger Date: Mon, 13 Jan 2025 14:34:50 -0700 Subject: [PATCH] Fixes #443 - this approach preserves metadata on the dataset --- src/tech/v3/dataset_api.clj | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/src/tech/v3/dataset_api.clj b/src/tech/v3/dataset_api.clj index 1a3f8664..fb3706a0 100644 --- a/src/tech/v3/dataset_api.clj +++ b/src/tech/v3/dataset_api.clj @@ -216,10 +216,26 @@ user> (ds/rowvec-at stocks -1) ((rowvecs ds) idx)) +(defn empty-column-names + "Return a sequence of column names whose empty set length matches the row count of the dataset." + [ds] + (let [rc (row-count ds)] + (->> (columns ds) + (lznc/map #(when (== rc (long (dtype/ecount (missing %)))) + (:name (meta %)))) + (lznc/remove nil?)))) + + +(defn remove-empty-columns + "Remove all columns that have no data - missing set length equals row count." + [ds] + (remove-columns ds (empty-column-names ds))) + + (export-symbols tech.v3.dataset.io - ->dataset - ->>dataset - write!) + ->dataset + ->>dataset + write!) (defn dataset-parser