Skip to content

Commit

Permalink
introducing parsing of Char? columns. It works the same as String par…
Browse files Browse the repository at this point in the history
…sing, but can never result in Char and can never fail (since it can parse to String)
  • Loading branch information
Jolanrensen committed Dec 11, 2024
1 parent 5c54f58 commit eee84c2
Show file tree
Hide file tree
Showing 6 changed files with 76 additions and 9 deletions.
4 changes: 4 additions & 0 deletions core/api/core.api
Original file line number Diff line number Diff line change
Expand Up @@ -6501,8 +6501,12 @@ public final class org/jetbrains/kotlinx/dataframe/api/ParseKt {
public static synthetic fun parse$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun parseAnyFrameNullable (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
public static synthetic fun parseAnyFrameNullable$default (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
public static final fun parseChar (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
public static synthetic fun parseChar$default (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
public static final fun tryParse (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
public static synthetic fun tryParse$default (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
public static final fun tryParseChar (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
public static synthetic fun tryParseChar$default (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
}

public final class org/jetbrains/kotlinx/dataframe/api/ParserOptions {
Expand Down
40 changes: 40 additions & 0 deletions core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import java.time.format.DateTimeFormatter
import java.util.Locale
import kotlin.reflect.KProperty
import kotlin.reflect.KType
import kotlin.reflect.typeOf

/**
* ### Global Parser Options
Expand Down Expand Up @@ -197,6 +198,28 @@ public class ParserOptions(
/** @include [tryParseImpl] */
public fun DataColumn<String?>.tryParse(options: ParserOptions? = null): DataColumn<*> = tryParseImpl(options)

/**
* Tries to parse a column of chars into a column of a different type.
* Each parser in [Parsers] is run in order until a valid parser is found,
* a.k.a. that parser was able to parse all values in the column successfully. If a parser
* fails to parse any value, the next parser is tried. If all the others fail, the final parser
* returns strings.
*
* Parsers that are [covered by][StringParser.coveredBy] other parsers are skipped.
*
* @param options options for parsing, like providing a locale or a custom date-time formatter
* @throws IllegalStateException if no valid parser is found (unlikely, unless the `String` parser is disabled)
* @return a new column with parsed values
*/
@JvmName("tryParseChar")
public fun DataColumn<Char?>.tryParse(options: ParserOptions? = null): DataColumn<*> {
// skip the Char parser, as we're trying to parse away from Char
val providedSkipTypes = options?.skipTypes ?: DataFrame.parser.skipTypes
val parserOptions = (options ?: ParserOptions()).copy(skipTypes = providedSkipTypes + typeOf<Char>())

return map { it?.toString() }.tryParse(parserOptions)
}

public fun <T> DataFrame<T>.parse(options: ParserOptions? = null): DataFrame<T> =
parse(options) {
colsAtAnyDepth { !it.isColumnGroup() }
Expand All @@ -220,6 +243,23 @@ public fun <T> DataFrame<T>.parse(options: ParserOptions? = null): DataFrame<T>
public fun DataColumn<String?>.parse(options: ParserOptions? = null): DataColumn<*> =
tryParse(options).also { if (it.typeClass == String::class) error("Can't guess column type") }

/**
* Tries to parse a column of chars as strings into a column of a different type.
* Each parser in [Parsers] is run in order until a valid parser is found,
* a.k.a. that parser was able to parse all values in the column successfully. If a parser
* fails to parse any value, the next parser is tried.
*
* If all fail, the column is returned as `String`, this can never fail.
*
* Parsers that are [covered by][StringParser.coveredBy] other parsers are skipped.
*
* @param options options for parsing, like providing a locale or a custom date-time formatter
* @return a new column with parsed values
*/
@JvmName("parseChar")
public fun DataColumn<Char?>.parse(options: ParserOptions? = null): DataColumn<*> =
tryParse(options) // no need to throw an exception, as Char can always be parsed as String

@JvmName("parseAnyFrameNullable")
public fun DataColumn<AnyFrame?>.parse(options: ParserOptions? = null): DataColumn<AnyFrame?> =
map { it?.parse(options) }
Original file line number Diff line number Diff line change
Expand Up @@ -564,29 +564,28 @@ internal fun <T> DataFrame<T>.parseImpl(options: ParserOptions?, columns: Column
when {
// when a frame column is requested to be parsed,
// parse each value/frame column at any depth inside each DataFrame in the frame column
col.isFrameColumn() -> {
col.isFrameColumn() ->
col.map {
it.parseImpl(options) {
colsAtAnyDepth { !it.isColumnGroup() }
}
}
}

// when a column group is requested to be parsed,
// parse each column in the group
col.isColumnGroup() -> {
col.isColumnGroup() ->
col.parseImpl(options) { all() }
.asColumnGroup(col.name())
.asDataColumn()
}

// Base case, parse the column as String if it's a `Char?` column
col.isSubtypeOf<Char?>() ->
col.cast<Char?>().map { it?.toString() }.tryParseImpl(options)

// Base case, parse the column if it's a `String?` column
col.isSubtypeOf<String?>() -> {
col.isSubtypeOf<String?>() ->
col.cast<String?>().tryParseImpl(options)
}

else -> {
col
}
else -> col
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,8 @@ class ConvertTests {

// but
columnOf('1', '2').convertToString().convertToInt() shouldBe columnOf(1, 2)
// or
columnOf('1', '2').parse() shouldBe columnOf(1, 2)
}

@Test
Expand Down
16 changes: 16 additions & 0 deletions core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,22 @@ import java.time.Duration as JavaDuration
import java.time.Instant as JavaInstant

class ParseTests {

@Test
fun `parse chars to string`() {
val char = columnOf('a', 'b', 'c')
char.parse() shouldBe columnOf("a", "b", "c")
char.tryParse() shouldBe columnOf("a", "b", "c")
char.parse().cast<String>().parse() shouldBe char
}

@Test
fun `parse chars to int`() {
val char = columnOf('1', '2', '3')
char.parse() shouldBe columnOf(1, 2, 3)
char.tryParse() shouldBe columnOf(1, 2, 3)
}

@Test
fun parseDate() {
val currentLocale = Locale.getDefault()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@ class ParserTests {
DataFrame.parser.resetToDefault()
}

@Test
fun `parse to Char`() {
val col by columnOf("a", "b")
col.parse().type() shouldBe typeOf<Char>()
}

@Test(expected = IllegalStateException::class)
fun `parse should throw`() {
val col by columnOf("a", "bc")
Expand Down

0 comments on commit eee84c2

Please sign in to comment.