manually updating generated sources

Kotlin · Dec 9, 2024 · 5870fd0 · 5870fd0
1 parent c110f5c
commit 5870fd0
Show file tree

Hide file tree

Showing 25 changed files with 6,292 additions and 0 deletions.
diff --git a/...ated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DelimParams.kt b/...ated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DelimParams.kt
@@ -0,0 +1,247 @@
+package org.jetbrains.kotlinx.dataframe.documentation
+
+import io.deephaven.csv.CsvSpecs
+import org.apache.commons.csv.CSVFormat
+import org.jetbrains.kotlinx.dataframe.DataFrame
+import org.jetbrains.kotlinx.dataframe.api.ParserOptions
+import org.jetbrains.kotlinx.dataframe.api.parser
+import org.jetbrains.kotlinx.dataframe.impl.io.typesDeephavenAlreadyParses
+import org.jetbrains.kotlinx.dataframe.io.AdjustCSVFormat
+import org.jetbrains.kotlinx.dataframe.io.AdjustCsvSpecs
+import org.jetbrains.kotlinx.dataframe.io.ColType
+import org.jetbrains.kotlinx.dataframe.io.Compression
+import org.jetbrains.kotlinx.dataframe.io.QuoteMode
+
+/**
+ * Contains both the default values of csv/tsv parameters and the parameter KDocs.
+ */
+@Suppress("ktlint:standard:class-naming", "ClassName", "KDocUnresolvedReference")
+internal object DelimParams {
+
+    /** @param path The file path to read. Can also be compressed as `.gz` or `.zip`, see [Compression]. */
+    interface PATH_READ
+
+    /** @param file The file to read. Can also be compressed as `.gz` or `.zip`, see [Compression]. */
+    interface FILE_READ
+
+    /** @param url The URL from which to fetch the data. Can also be compressed as `.gz` or `.zip`, see [Compression]. */
+    interface URL_READ
+
+    /** @param fileOrUrl The file path or URL to read the data from. Can also be compressed as `.gz` or `.zip`, see [Compression]. */
+    interface FILE_OR_URL_READ
+
+    /** @param inputStream Represents the file to read. */
+    interface INPUT_STREAM_READ
+
+    /** @param text The raw data to read in the form of a [String]. */
+    interface TEXT_READ
+
+    /** @param file The file to write to. */
+    interface FILE_WRITE
+
+    /** @param path The path pointing to a file to write to. */
+    interface PATH_WRITE
+
+    /** @param writer The [Appendable] to write to. */
+    interface WRITER_WRITE
+
+    /**
+     * @param delimiter The field delimiter character. Default: ','.
+     *
+     *   Ignored if [hasFixedWidthColumns] is `true`.
+     */
+    const val CSV_DELIMITER: Char = ','
+
+    /**
+     * @param delimiter The field delimiter character. Default: '\t'.
+     *
+     *   Ignored if [hasFixedWidthColumns] is `true`.
+     */
+    const val TSV_DELIMITER: Char = '\t'
+
+    /**
+     * @param delimiter The field delimiter character. Default: ','.
+     *
+     *   Ignored if [hasFixedWidthColumns] is `true`.
+     */
+    const val DELIM_DELIMITER: Char = ','
+
+    /**
+     * @param header Optional column titles. Default: empty list.
+     *
+     *   If non-empty, the data will be read with [header] as the column titles
+     *   (use [skipLines] if there's a header in the data).
+     *   If empty (default), the header will be read from the data.
+     */
+    val HEADER: List<String> = emptyList()
+
+    /**
+     * @param hasFixedWidthColumns Whether the data has fixed-width columns instead of a single delimiter.
+     *   Default: `false`.
+     *
+     *   Fixed-width columns can occur, for instance, in multi-space delimited data, where the columns are separated
+     *   by multiple spaces instead of a single delimiter, so columns are visually aligned.
+     *   Columns widths are determined by the header in the data (if present), or manually by setting
+     *   [fixedColumnWidths].
+     */
+    val HAS_FIXED_WIDTH_COLUMNS: Boolean = false
+
+    /**
+     * @param fixedColumnWidths The fixed column widths. Default: empty list.
+     *
+     *   Requires [hasFixedWidthColumns]. If empty, the column widths will be determined by the header in the data
+     *   (if present), else, this manually sets the column widths.
+     *   The number of widths should match the number of columns.
+     */
+    val FIXED_COLUMN_WIDTHS: List<Int> = emptyList()
+
+    /**
+     * @param compression The compression of the data.
+     *   Default: [Compression.None], unless detected otherwise from the input file or url.
+     */
+    val COMPRESSION: Compression<*> = Compression.None
+
+    /**
+     * @param colTypes The expected [ColType] per column name. Default: empty map, a.k.a. infer every column type.
+     *
+     *   If supplied for a certain column name (inferred from data or given by [header]),
+     *   the parser will parse the column with the specified name as the specified type, else it will infer the type.
+     *
+     *   e.g. `colTypes = `[mapOf][mapOf]`("colName" `[to][to]` `[ColType][ColType]`.`[Int][ColType.Int]`)`.
+     *   You can also set [ColType][ColType]`.`[DEFAULT][ColType.DEFAULT]` `[to][to]` `[ColType][ColType]`.X`
+     *   to set a _default_ column type, like [ColType.String].
+     */
+    val COL_TYPES: Map<String, ColType> = emptyMap()
+
+    /**
+     * @param skipLines The number of lines to skip before reading the header and data. Default: `0`.
+     *
+     *   Useful for files with metadata, or comments at the beginning, or to give a custom [header].
+     */
+    const val SKIP_LINES: Long = 0L
+
+    /**
+     * @param readLines The maximum number of lines to read from the data. Default: `null`.
+     *
+     *   If `null`, all lines will be read.
+     */
+    val READ_LINES: Long? = null
+
+    /**
+     * @param parserOptions Optional [parsing options][ParserOptions] for columns initially read as [String].
+     *   Default, `null`.
+     *
+     *   Can configure locale, date format, double parsing, skipping types, etc.
+     *
+     *   If [parserOptions] or any of the arguments are `null`, the global parser configuration
+     *   ([DataFrame.parser][DataFrame.Companion.parser]) will be queried.
+     *
+     *   The only exceptions are:
+     *   - [useFastDoubleParser][ParserOptions.useFastDoubleParser], which will default to `true`,
+     *   regardless of the global setting.
+     *   - [nullStrings][ParserOptions.nullStrings], which, if `null`,
+     *   will take the global setting + [["", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil"]][org.jetbrains.kotlinx.dataframe.io.DEFAULT_DELIM_NULL_STRINGS].
+     *   - [skipTypes][ParserOptions.skipTypes], which will always add [typesDeephavenAlreadyParses] to
+     *   the given types or the global setting.
+     */
+    val PARSER_OPTIONS: ParserOptions? = null
+
+    /**
+     * @param ignoreEmptyLines Whether to skip intermediate empty lines. Default: `false`.
+     *
+     *   If `false`, empty lines will be interpreted as having _empty_ values if [allowMissingColumns].
+     */
+    const val IGNORE_EMPTY_LINES: Boolean = false
+
+    /**
+     * @param allowMissingColumns Wether to allow rows with fewer columns than the header. Default: `true`.
+     *
+     *   If `true`, rows that are too short will be interpreted as _empty_ values.
+     */
+    const val ALLOW_MISSING_COLUMNS: Boolean = true
+
+    /**
+     * @param ignoreExcessColumns Whether to ignore rows with more columns than the header. Default: `true`.
+     *
+     *   If `true`, rows that are too long will have those columns dropped.
+     */
+    const val IGNORE_EXCESS_COLUMNS: Boolean = true
+
+    /**
+     * @param quote The quote character. Default: `"`.
+     *
+     *   Used when field- or line delimiters should be interpreted as literal text.
+     *
+     *   For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
+     */
+    const val QUOTE: Char = '"'
+
+    /**
+     * @param ignoreSurroundingSpaces Whether to ignore leading and trailing blanks around non-quoted fields.
+     *   Default: `true`.
+     */
+    const val IGNORE_SURROUNDING_SPACES: Boolean = true
+
+    /**
+     * @param trimInsideQuoted Whether to ignore leading and trailing blanks inside quoted fields.
+     *   Default: `false`.
+     */
+    const val TRIM_INSIDE_QUOTED: Boolean = false
+
+    /**
+     * @param parseParallel Whether to parse the data in parallel. Default: `true`.
+     *
+     *   If `true`, the data will be read and parsed in parallel by the Deephaven parser.
+     *   This is usually faster, but can be turned off for debugging.
+     */
+    const val PARSE_PARALLEL: Boolean = true
+
+    /**
+     * @param adjustCsvSpecs Optional extra [CsvSpecs] configuration. Default: `{ it }`.
+     *
+     *   Before instantiating the [CsvSpecs], the [CsvSpecs.Builder] will be passed to this lambda.
+     *   This will allow you to configure/overwrite any CSV / TSV parsing options.
+     */
+    val ADJUST_CSV_SPECS: AdjustCsvSpecs = { it }
+
+    /** @param includeHeader Whether to include the header in the output. Default: `true`. */
+    const val INCLUDE_HEADER: Boolean = true
+
+    /**
+     * @param quoteMode The [QuoteMode] to use when writing CSV / TSV files.
+     *   Default: [QuoteMode.MINIMAL].
+     */
+    val QUOTE_MODE: QuoteMode = QuoteMode.MINIMAL
+
+    /**
+     * @param escapeChar The escape character to use when writing CSV / TSV files with [QuoteMode.NONE].
+     *   Default: `null`. This will double-quote the value.
+     */
+    val ESCAPE_CHAR: Char? = null
+
+    /**
+     * @param commentChar The character that indicates a comment line in a CSV / TSV file.
+     *   Default: `'#'`.
+     */
+    const val COMMENT_CHAR: Char = '#'
+
+    /**
+     * @param recordSeparator The character that separates records in a CSV / TSV file.
+     *   Default: `'\n'`, a Unix-newline.
+     */
+    const val RECORD_SEPARATOR: String = "\n"
+
+    /**
+     * @param headerComments A list of comments to include at the beginning of the CSV / TSV file.
+     *   Default: empty list.
+     */
+    val HEADER_COMMENTS: List<String> = emptyList()
+
+    /**
+     * @param adjustCsvFormat Optional extra [CSVFormat] configuration. Default: `{ it }`.
+     *
+     *   Before instantiating the [CSVFormat], the [CSVFormat.Builder] will be passed to this lambda.
+     *   This will allow you to configure/overwrite any CSV / TSV writing options.
+     */
+    val ADJUST_CSV_FORMAT: AdjustCSVFormat = { it }
+}
diff --git a/.../generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/utils.kt b/.../generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/utils.kt
@@ -0,0 +1,69 @@
+package org.jetbrains.kotlinx.dataframe.documentation
+
+import kotlin.annotation.AnnotationTarget.ANNOTATION_CLASS
+import kotlin.annotation.AnnotationTarget.CLASS
+import kotlin.annotation.AnnotationTarget.CONSTRUCTOR
+import kotlin.annotation.AnnotationTarget.FIELD
+import kotlin.annotation.AnnotationTarget.FILE
+import kotlin.annotation.AnnotationTarget.FUNCTION
+import kotlin.annotation.AnnotationTarget.LOCAL_VARIABLE
+import kotlin.annotation.AnnotationTarget.PROPERTY
+import kotlin.annotation.AnnotationTarget.PROPERTY_GETTER
+import kotlin.annotation.AnnotationTarget.PROPERTY_SETTER
+import kotlin.annotation.AnnotationTarget.TYPE
+import kotlin.annotation.AnnotationTarget.TYPEALIAS
+import kotlin.annotation.AnnotationTarget.VALUE_PARAMETER
+
+/**
+ * Any `Documentable` annotated with this annotation will be excluded from the generated sources by
+ * the documentation processor.
+ *
+ * **NOTE: DO NOT RENAME!**
+ */
+@Target(
+    CLASS,
+    ANNOTATION_CLASS,
+    PROPERTY,
+    FIELD,
+    LOCAL_VARIABLE,
+    VALUE_PARAMETER,
+    CONSTRUCTOR,
+    FUNCTION,
+    PROPERTY_GETTER,
+    PROPERTY_SETTER,
+    TYPE,
+    TYPEALIAS,
+    FILE,
+)
+internal annotation class ExcludeFromSources
+
+/**
+ * Any `Documentable` annotated with this annotation will be exported to HTML by the documentation
+ * processor.
+ *
+ * You can use @exportAsHtmlStart and @exportAsHtmlEnd to specify a range of the doc to
+ * export to HTML.
+ *
+ * **NOTE: DO NOT RENAME!**
+ *
+ * @param theme Whether to include a simple theme in the HTML file. Default is `true`.
+ * @param stripReferences Whether to strip `[references]` from the HTML file. Default is `true`.
+ *  This is useful when you want to include the HTML file in a website, where the references are not
+ *  needed or would break.
+ */
+@Target(
+    CLASS,
+    ANNOTATION_CLASS,
+    PROPERTY,
+    FIELD,
+    LOCAL_VARIABLE,
+    VALUE_PARAMETER,
+    CONSTRUCTOR,
+    FUNCTION,
+    PROPERTY_GETTER,
+    PROPERTY_SETTER,
+    TYPE,
+    TYPEALIAS,
+    FILE,
+)
+internal annotation class ExportAsHtml(val theme: Boolean = true, val stripReferences: Boolean = true)
diff --git a/...es/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/DataFrameCustomDoubleParser.kt b/...es/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/DataFrameCustomDoubleParser.kt
@@ -0,0 +1,24 @@
+package org.jetbrains.kotlinx.dataframe.impl.io
+
+import io.deephaven.csv.containers.ByteSlice
+import io.deephaven.csv.tokenization.Tokenizer.CustomDoubleParser
+import org.jetbrains.kotlinx.dataframe.api.ParserOptions
+
+/**
+ * Wrapper around [FastDoubleParser] so we can use it from Deephaven.
+ */
+internal class DataFrameCustomDoubleParser(parserOptions: ParserOptions? = null) : CustomDoubleParser {
+
+    private val fastDoubleParser = FastDoubleParser(parserOptions)
+
+    override fun parse(bs: ByteSlice): Double =
+        try {
+            fastDoubleParser.parseOrNull(bs.data(), bs.begin(), bs.size())
+        } catch (_: Exception) {
+            null
+        } ?: throw NumberFormatException()
+
+    override fun parse(cs: CharSequence): Double =
+        fastDoubleParser.parseOrNull(cs.toString())
+            ?: throw NumberFormatException()
+}