Skip to content

Commit

Permalink
Merge branch 'refs/heads/implodeFix' into groupByAggregateFix
Browse files Browse the repository at this point in the history
  • Loading branch information
Jolanrensen committed Aug 1, 2024
2 parents 23046d8 + e4a3af5 commit 1d2c7f9
Show file tree
Hide file tree
Showing 33 changed files with 276 additions and 104 deletions.
2 changes: 2 additions & 0 deletions core/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ dependencies {

api(libs.kotlin.datetimeJvm)
implementation(libs.kotlinpoet)
implementation(libs.sl4j)
implementation(libs.kotlinLogging)

testImplementation(libs.junit)
testImplementation(libs.kotestAssertions) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -197,15 +197,19 @@ internal fun commonParent(vararg classes: KClass<*>): KClass<*>? = commonParent(
internal fun Iterable<KClass<*>>.withMostSuperclasses(): KClass<*>? = maxByOrNull { it.allSuperclasses.size }

internal fun Iterable<KClass<*>>.createType(nullable: Boolean, upperBound: KType? = null): KType =
if (upperBound == null) {
(withMostSuperclasses() ?: Any::class).createStarProjectedType(nullable)
} else {
val upperClass = upperBound.classifier as KClass<*>
val baseClass = filter { it.isSubclassOf(upperClass) }.withMostSuperclasses() ?: withMostSuperclasses()
if (baseClass == null) {
upperBound.withNullability(nullable)
} else {
upperBound.projectTo(baseClass).withNullability(nullable)
when {
!iterator().hasNext() -> upperBound?.withNullability(nullable) ?: nothingType(nullable)

upperBound == null -> (withMostSuperclasses() ?: Any::class).createStarProjectedType(nullable)

else -> {
val upperClass = upperBound.classifier as KClass<*>
val baseClass = filter { it.isSubclassOf(upperClass) }.withMostSuperclasses() ?: withMostSuperclasses()
if (baseClass == null) {
upperBound.withNullability(nullable)
} else {
upperBound.projectTo(baseClass).withNullability(nullable)
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ internal fun <T> Iterable<T>.anyNull(): Boolean = any { it == null }
internal fun emptyPath(): ColumnPath = ColumnPath(emptyList())

@PublishedApi
internal fun <T : Number> KClass<T>.zero(): T =
internal fun <T : Number> KClass<T>.zeroOrNull(): T? =
when (this) {
Int::class -> 0 as T
Byte::class -> 0.toByte() as T
Expand All @@ -131,10 +131,14 @@ internal fun <T : Number> KClass<T>.zero(): T =
Float::class -> 0.toFloat() as T
BigDecimal::class -> BigDecimal.ZERO as T
BigInteger::class -> BigInteger.ZERO as T
Number::class -> 0 as T
else -> TODO()
Number::class -> 0 as? T
else -> null
}

@PublishedApi
internal fun <T : Number> KClass<T>.zero(): T =
zeroOrNull() ?: throw NotImplementedError("Zero value for $this is not supported")

internal fun <T> catchSilent(body: () -> T): T? =
try {
body()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.jetbrains.kotlinx.dataframe.impl.api

import io.github.oshai.kotlinlogging.KotlinLogging
import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.AnyRow
import org.jetbrains.kotlinx.dataframe.ColumnsSelector
Expand All @@ -11,13 +12,13 @@ import org.jetbrains.kotlinx.dataframe.api.ConvertSchemaDsl
import org.jetbrains.kotlinx.dataframe.api.ConverterScope
import org.jetbrains.kotlinx.dataframe.api.ExcessiveColumns
import org.jetbrains.kotlinx.dataframe.api.Infer
import org.jetbrains.kotlinx.dataframe.api.add
import org.jetbrains.kotlinx.dataframe.api.all
import org.jetbrains.kotlinx.dataframe.api.allNulls
import org.jetbrains.kotlinx.dataframe.api.asColumnGroup
import org.jetbrains.kotlinx.dataframe.api.concat
import org.jetbrains.kotlinx.dataframe.api.convertTo
import org.jetbrains.kotlinx.dataframe.api.emptyDataFrame
import org.jetbrains.kotlinx.dataframe.api.getColumnPaths
import org.jetbrains.kotlinx.dataframe.api.isEmpty
import org.jetbrains.kotlinx.dataframe.api.map
import org.jetbrains.kotlinx.dataframe.api.name
Expand All @@ -29,12 +30,14 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
import org.jetbrains.kotlinx.dataframe.columns.ColumnKind
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
import org.jetbrains.kotlinx.dataframe.columns.UnresolvedColumnsPolicy
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
import org.jetbrains.kotlinx.dataframe.exceptions.ExcessiveColumnsException
import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException
import org.jetbrains.kotlinx.dataframe.impl.emptyPath
import org.jetbrains.kotlinx.dataframe.impl.schema.createEmptyColumn
import org.jetbrains.kotlinx.dataframe.impl.getColumnPaths
import org.jetbrains.kotlinx.dataframe.impl.schema.createEmptyDataFrame
import org.jetbrains.kotlinx.dataframe.impl.schema.createNullFilledColumn
import org.jetbrains.kotlinx.dataframe.impl.schema.extractSchema
import org.jetbrains.kotlinx.dataframe.impl.schema.render
import org.jetbrains.kotlinx.dataframe.kind
Expand All @@ -45,6 +48,8 @@ import kotlin.reflect.KType
import kotlin.reflect.full.withNullability
import kotlin.reflect.jvm.jvmErasure

private val logger = KotlinLogging.logger {}

private open class Converter(val transform: ConverterScope.(Any?) -> Any?, val skipNulls: Boolean)

private class Filler(val columns: ColumnsSelector<*, *>, val expr: RowExpression<*, *>)
Expand Down Expand Up @@ -252,22 +257,16 @@ internal fun AnyFrame.convertToImpl(
}
}.toMutableList()

// when the target is nullable but the source does not contain a column, fill it in with nulls / empty dataframes
// when the target is nullable but the source does not contain a column,
// fill it in with nulls / empty dataframes
val size = this.size.nrow
schema.columns.forEach { (name, targetColumn) ->
val isNullable =
// like value column of type Int?
targetColumn.nullable ||
// like value column of type Int? (backup check)
targetColumn.type.isMarkedNullable ||
// like DataRow<Something?> for a group column (all columns in the group will be nullable)
targetColumn.contentType?.isMarkedNullable == true ||
// frame column can be filled with empty dataframes
targetColumn.kind == ColumnKind.Frame

if (name !in visited) {
newColumns += targetColumn.createEmptyColumn(name, size)
if (!isNullable) {
try {
newColumns += targetColumn.createNullFilledColumn(name, size)
} catch (e: IllegalStateException) {
logger.debug(e) { "" }
// if this could not be done automatically, they need to be filled manually
missingPaths.add(path + name)
}
}
Expand All @@ -279,14 +278,39 @@ internal fun AnyFrame.convertToImpl(
val marker = MarkersExtractor.get(clazz)
var result = convertToSchema(marker.schema, emptyPath())

/*
* Here we handle all registered fillers of the user.
* Fillers are registered in the DSL like:
* ```kt
* df.convertTo<Target> {
* fill { col1 and col2 }.with { something }
* fill { col3 }.with { somethingElse }
* }
* ```
* Users can use this to fill up any column that was missing during the conversion.
* They can also fill up and thus overwrite any existing column here.
*/
dsl.fillers.forEach { filler ->
val paths = result.getColumnPaths(filler.columns)
missingPaths.removeAll(paths.toSet())
result = result.update { paths.toColumnSet() }.with {
filler.expr(this, this)
// get all paths from the `fill { col1 and col2 }` part
val paths = result.getColumnPaths(UnresolvedColumnsPolicy.Create, filler.columns).toSet()

// split the paths into those that are already in the df and those that are missing
val (newPaths, existingPaths) = paths.partition { it in missingPaths }

// first fill cols that are already in the df using the `with {}` part of the dsl
result = result.update { existingPaths.toColumnSet() }.with { filler.expr(this, this) }

// then create any missing ones by filling using the `with {}` part of the dsl
result = newPaths.fold(result) { df, newPath ->
df.add(newPath, Infer.Type) { filler.expr(this, this) }
}

// remove the paths that are now filled
missingPaths -= paths
}

// Inform the user which target columns could not be created in the conversion
// The user will need to supply extra information for these, like `fill {}` them.
if (missingPaths.isNotEmpty()) {
throw IllegalArgumentException(
"The following columns were not found in DataFrame: ${
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ import org.jetbrains.kotlinx.dataframe.columns.size
import org.jetbrains.kotlinx.dataframe.columns.values
import org.jetbrains.kotlinx.dataframe.impl.columns.addPath
import org.jetbrains.kotlinx.dataframe.impl.columns.asAnyFrameColumn
import org.jetbrains.kotlinx.dataframe.impl.renderType
import org.jetbrains.kotlinx.dataframe.index
import org.jetbrains.kotlinx.dataframe.kind
import org.jetbrains.kotlinx.dataframe.type
import kotlin.reflect.jvm.jvmErasure

internal fun describeImpl(cols: List<AnyCol>): DataFrame<ColumnDescription> {
fun List<AnyCol>.collectAll(atAnyDepth: Boolean): List<AnyCol> =
Expand Down Expand Up @@ -65,7 +65,7 @@ internal fun describeImpl(cols: List<AnyCol>): DataFrame<ColumnDescription> {
if (hasLongPaths) {
ColumnDescription::path from { it.path() }
}
ColumnDescription::type from { buildTypeName(it) }
ColumnDescription::type from { renderType(it.type) }
ColumnDescription::count from { it.size }
ColumnDescription::unique from { it.countDistinct() }
ColumnDescription::nulls from { it.values.count { it == null } }
Expand Down Expand Up @@ -94,12 +94,3 @@ internal fun describeImpl(cols: List<AnyCol>): DataFrame<ColumnDescription> {

return df.cast()
}

private fun buildTypeName(it: AnyCol): String {
val rawJavaType = it.type.jvmErasure.simpleName.toString()
return if (it.type.isMarkedNullable) {
"$rawJavaType?"
} else {
rawJavaType
}
}
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
package org.jetbrains.kotlinx.dataframe.impl.columns

import org.jetbrains.kotlinx.dataframe.BuildConfig
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
import org.jetbrains.kotlinx.dataframe.impl.isArray
import org.jetbrains.kotlinx.dataframe.impl.isPrimitiveArray
import kotlin.reflect.KClass
import kotlin.reflect.KType
import kotlin.reflect.full.isSubclassOf

internal abstract class DataColumnImpl<T>(
protected val values: List<T>,
Expand All @@ -12,6 +17,31 @@ internal abstract class DataColumnImpl<T>(
) : DataColumn<T>,
DataColumnInternal<T> {

private infix fun <T> T?.matches(type: KType) =
when {
this == null -> type.isMarkedNullable

this.isPrimitiveArray ->
type.isPrimitiveArray &&
this!!::class.qualifiedName == type.classifier?.let { (it as KClass<*>).qualifiedName }

this.isArray -> type.isArray

// cannot check the precise type of array
else -> this!!::class.isSubclassOf(type.classifier as KClass<*>)
}

init {
// Check for [Issue #713](https://github.com/Kotlin/dataframe/issues/713).
// This only runs with `kotlin.dataframe.debug=true` in gradle.properties.
if (BuildConfig.DEBUG) {
require(values.all { it matches type }) {
val types = values.map { if (it == null) "Nothing?" else it!!::class.simpleName }.distinct()
"Values of column '$name' have types '$types' which are not compatible given with column type '$type'"
}
}
}

protected val distinct = distinct ?: lazy { values.toSet() }

override fun name() = name
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,9 @@ internal fun AnyCol.extractSchema(): ColumnSchema =
@PublishedApi
internal fun getSchema(kClass: KClass<*>): DataFrameSchema = MarkersExtractor.get(kClass).schema

/**
* Create "empty" column based on the toplevel of [this] [ColumnSchema].
*/
internal fun ColumnSchema.createEmptyColumn(name: String): AnyCol =
when (this) {
is ColumnSchema.Value -> DataColumn.createValueColumn<Any?>(name, emptyList(), type)
Expand All @@ -110,14 +113,22 @@ internal fun ColumnSchema.createEmptyColumn(name: String): AnyCol =
else -> error("Unexpected ColumnSchema: $this")
}

/** Create "empty" column, filled with either null or empty dataframes. */
internal fun ColumnSchema.createEmptyColumn(name: String, numberOfRows: Int): AnyCol =
/**
* Creates a column based on [this] [ColumnSchema] filled with `null` or empty dataframes.
* @throws IllegalStateException if the column is not nullable and [numberOfRows]` > 0`.
*/
internal fun ColumnSchema.createNullFilledColumn(name: String, numberOfRows: Int): AnyCol =
when (this) {
is ColumnSchema.Value -> DataColumn.createValueColumn(
name = name,
values = List(numberOfRows) { null },
type = type,
)
is ColumnSchema.Value -> {
if (!type.isMarkedNullable && numberOfRows > 0) {
error("Cannot create a null-filled value column of type $type as it's not nullable.")
}
DataColumn.createValueColumn(
name = name,
values = List(numberOfRows) { null },
type = type,
)
}

is ColumnSchema.Group -> DataColumn.createColumnGroup(
name = name,
Expand All @@ -130,7 +141,7 @@ internal fun ColumnSchema.createEmptyColumn(name: String, numberOfRows: Int): An
schema = lazyOf(schema),
)

else -> error("Unexpected ColumnSchema: $this")
else -> error("Cannot create null-filled column of unexpected ColumnSchema: $this")
}

internal fun DataFrameSchema.createEmptyDataFrame(): AnyFrame =
Expand All @@ -143,7 +154,7 @@ internal fun DataFrameSchema.createEmptyDataFrame(numberOfRows: Int): AnyFrame =
DataFrame.empty(numberOfRows)
} else {
columns.map { (name, schema) ->
schema.createEmptyColumn(name, numberOfRows)
schema.createNullFilledColumn(name, numberOfRows)
}.toDataFrame()
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
package org.jetbrains.kotlinx.dataframe.jupyter

import org.jetbrains.kotlinx.dataframe.BuildConfig
import org.jetbrains.kotlinx.dataframe.io.DisplayConfiguration

public class JupyterConfiguration {
public val display: DisplayConfiguration = DisplayConfiguration()

/** Version of the library. */
public val version: String = BuildConfig.VERSION

/** DSL accessor. */
public operator fun invoke(block: JupyterConfiguration.() -> Unit): JupyterConfiguration = apply(block)
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.jetbrains.kotlinx.dataframe.math

import org.jetbrains.kotlinx.dataframe.api.skipNA_default
import org.jetbrains.kotlinx.dataframe.impl.renderType
import java.math.BigDecimal
import kotlin.reflect.KType
import kotlin.reflect.full.withNullability
Expand Down Expand Up @@ -31,7 +32,10 @@ internal fun <T : Number> Sequence<T>.mean(type: KType, skipNA: Boolean = skipNA

Number::class -> (this as Sequence<Number>).map { it.toDouble() }.mean(skipNA)

else -> throw IllegalArgumentException("Unable to compute mean for type $type")
// this means the sequence is empty
Nothing::class -> Double.NaN

else -> throw IllegalArgumentException("Unable to compute the mean for type ${renderType(type)}")
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package org.jetbrains.kotlinx.dataframe.math

import org.jetbrains.kotlinx.dataframe.api.ddof_default
import org.jetbrains.kotlinx.dataframe.api.skipNA_default
import org.jetbrains.kotlinx.dataframe.impl.renderType
import java.math.BigDecimal
import kotlin.reflect.KType
import kotlin.reflect.full.withNullability
Expand All @@ -13,11 +14,10 @@ internal fun <T : Number> Iterable<T?>.std(
ddof: Int = ddof_default,
): Double {
if (type.isMarkedNullable) {
if (skipNA) {
return filterNotNull().std(type.withNullability(false), true, ddof)
} else {
if (contains(null)) return Double.NaN
return std(type.withNullability(false), skipNA, ddof)
return when {
skipNA -> filterNotNull().std(type = type.withNullability(false), skipNA = true, ddof = ddof)
contains(null) -> Double.NaN
else -> std(type = type.withNullability(false), skipNA = false, ddof = ddof)
}
}
return when (type.classifier) {
Expand All @@ -26,7 +26,8 @@ internal fun <T : Number> Iterable<T?>.std(
Int::class, Short::class, Byte::class -> (this as Iterable<Int>).std(ddof)
Long::class -> (this as Iterable<Long>).std(ddof)
BigDecimal::class -> (this as Iterable<BigDecimal>).std(ddof)
else -> throw IllegalArgumentException("Unsupported type ${type.classifier}")
Nothing::class -> Double.NaN
else -> throw IllegalArgumentException("Unable to compute the std for type ${renderType(type)}")
}
}

Expand Down
Loading

0 comments on commit 1d2c7f9

Please sign in to comment.