Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expose ICUs u_hasBinaryProperty #1029

Merged
merged 2 commits into from
Feb 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions skiko/src/commonMain/kotlin/org/jetbrains/skia/CodePoint.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package org.jetbrains.skia

// TODO Remove once it's available in common stdlib https://youtrack.jetbrains.com/issue/KT-23251
internal typealias CodePoint = Int

/**
* The minimum value of a supplementary code point, `\u0x10000`.
*/
private const val MIN_SUPPLEMENTARY_CODE_POINT: Int = 0x10000

/**
* Converts a surrogate pair to a unicode code point.
*/
internal fun toCodePoint(high: Char, low: Char): CodePoint =
(((high - Char.MIN_HIGH_SURROGATE) shl 10) or (low - Char.MIN_LOW_SURROGATE)) + MIN_SUPPLEMENTARY_CODE_POINT

internal fun Int.charCount(): Int = if (this >= MIN_SUPPLEMENTARY_CODE_POINT) 2 else 1

internal val CharSequence.codePoints
get() = codePointsAt(0)

internal fun CharSequence.codePointsAt(index: Int) = sequence {
var current = index
while (current < length) {
val codePoint = codePointAt(current)
yield(codePoint)
current += codePoint.charCount()
}
}

internal val CharSequence.codePointsAsIntArray: IntArray
get() = codePoints.toList().toIntArray()

/**
* Returns the character (Unicode code point) at the specified index.
*/
internal fun CharSequence.codePointAt(index: Int): CodePoint {
val high = this[index]
if (high.isHighSurrogate() && index + 1 < this.length) {
val low = this[index + 1]
if (low.isLowSurrogate()) {
return toCodePoint(high, low)
}
}
return high.code
}
2 changes: 0 additions & 2 deletions skiko/src/commonMain/kotlin/org/jetbrains/skia/Expects.kt
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@ package org.jetbrains.skia

internal expect fun <R> commonSynchronized(lock: Any, block: () -> R)

internal expect fun String.intCodePoints(): IntArray

internal expect fun defaultLanguageTag(): String

expect class Pattern {
Expand Down
2 changes: 1 addition & 1 deletion skiko/src/commonMain/kotlin/org/jetbrains/skia/Font.kt
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ class Font : Managed {
* @return the corresponding glyph ids for each character.
*/
fun getStringGlyphs(s: String): ShortArray {
return getUTF32Glyphs(s.intCodePoints())
return getUTF32Glyphs(s.codePointsAsIntArray)
}

/**
Expand Down
2 changes: 1 addition & 1 deletion skiko/src/commonMain/kotlin/org/jetbrains/skia/Typeface.kt
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ class Typeface internal constructor(ptr: NativePointer) : RefCnt(ptr) {
* @return the corresponding glyph ids for each character.
*/
fun getStringGlyphs(s: String): ShortArray {
return getUTF32Glyphs(s.intCodePoints())
return getUTF32Glyphs(s.codePointsAsIntArray)
}

/**
Expand Down
155 changes: 149 additions & 6 deletions skiko/src/commonMain/kotlin/org/jetbrains/skia/icu/Unicode.kt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
package org.jetbrains.skia.icu

import org.jetbrains.skia.ExternalSymbolName
import org.jetbrains.skia.ModuleImport
import org.jetbrains.skia.*
import org.jetbrains.skia.impl.Library.Companion.staticLoad
import org.jetbrains.skia.impl.Stats

/**
* CharDirection represents Bidi_Class Unicode character property.
Expand All @@ -11,6 +11,7 @@ import org.jetbrains.skia.impl.Library.Companion.staticLoad
* See https://unicode-org.github.io/icu-docs/apidoc/dev/icu4c/uchar_8h.html
* See https://www.unicode.org/reports/tr9/
*/
@Suppress("MemberVisibilityCanBePrivate", "unused", "SpellCheckingInspection")
object CharDirection {
init {
staticLoad()
Expand Down Expand Up @@ -44,9 +45,151 @@ object CharDirection {
* Returns the bidirectional category value for the code point.
* Same as java.lang.Character.getDirectionality()
*/
fun of(codePoint: Int): Int = charDirection(codePoint)
fun of(codePoint: Int): Int {
Stats.onNativeCall()
return _nCharDirection(codePoint)
}
}

@ExternalSymbolName("org_jetbrains_skia_icu_Unicode_charDirection")
@ModuleImport("./skiko.mjs", "org_jetbrains_skia_icu_Unicode_charDirection")
private external fun charDirection(codePoint: Int): Int
/**
* Bundles functions to inspect Unicode character properties.
*/
@Suppress("MemberVisibilityCanBePrivate", "unused", "SpellCheckingInspection")
object CharProperties {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing KDoc for public API

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added.

init {
staticLoad()
}

const val ALPHABETIC = 0
const val BINARY_START = ALPHABETIC
const val ASCII_HEX_DIGIT = 1
const val BIDI_CONTROL = 2
const val BIDI_MIRRORED = 3
const val DASH = 4
const val DEFAULT_IGNORABLE_CODE_POINT = 5
const val DEPRECATED = 6
const val DIACRITIC = 7
const val EXTENDER = 8
const val FULL_COMPOSITION_EXCLUSION = 9
const val GRAPHEME_BASE = 10
const val GRAPHEME_EXTEND = 11
const val GRAPHEME_LINK = 12
const val HEX_DIGIT = 13
const val HYPHEN = 14
const val ID_CONTINUE = 15
const val ID_START = 16
const val IDEOGRAPHIC = 17
const val IDS_BINARY_OPERATOR = 18
const val IDS_TRINARY_OPERATOR = 19
const val JOIN_CONTROL = 20
const val LOGICAL_ORDER_EXCEPTION = 21
const val LOWERCASE = 22
const val MATH = 23
const val NONCHARACTER_CODE_POINT = 24
const val QUOTATION_MARK = 25
const val RADICAL = 26
const val SOFT_DOTTED = 27
const val TERMINAL_PUNCTUATION = 28
const val UNIFIED_IDEOGRAPH = 29
const val UPPERCASE = 30
const val WHITE_SPACE = 31
const val XID_CONTINUE = 32
const val XID_START = 33
const val CASE_SENSITIVE = 34
const val S_TERM = 35
const val VARIATION_SELECTOR = 36
const val NFD_INERT = 37
const val NFKD_INERT = 38
const val NFC_INERT = 39
const val NFKC_INERT = 40
const val SEGMENT_STARTER = 41
const val PATTERN_SYNTAX = 42
const val PATTERN_WHITE_SPACE = 43
const val POSIX_ALNUM = 44
const val POSIX_BLANK = 45
const val POSIX_GRAPH = 46
const val POSIX_PRINT = 47
const val POSIX_XDIGIT = 48
const val CASED = 49
const val CASE_IGNORABLE = 50
const val CHANGES_WHEN_LOWERCASED = 51
const val CHANGES_WHEN_UPPERCASED = 52
const val CHANGES_WHEN_TITLECASED = 53
const val CHANGES_WHEN_CASEFOLDED = 54
const val CHANGES_WHEN_CASEMAPPED = 55
const val CHANGES_WHEN_NFKC_CASEFOLDED = 56
const val EMOJI = 57
const val EMOJI_PRESENTATION = 58
const val EMOJI_MODIFIER = 59
const val EMOJI_MODIFIER_BASE = 60
const val EMOJI_COMPONENT = 61
const val REGIONAL_INDICATOR = 62
const val PREPENDED_CONCATENATION_MARK = 63
const val EXTENDED_PICTOGRAPHIC = 64
const val BIDI_CLASS = 0x1000
const val INT_START = BIDI_CLASS
const val BLOCK = 0x1001
const val CANONICAL_COMBINING_CLASS = 0x1002
const val DECOMPOSITION_TYPE = 0x1003
const val EAST_ASIAN_WIDTH = 0x1004
const val GENERAL_CATEGORY = 0x1005
const val JOINING_GROUP = 0x1006
const val JOINING_TYPE = 0x1007
const val LINE_BREAK = 0x1008
const val NUMERIC_TYPE = 0x1009
const val SCRIPT = 0x100A
const val HANGUL_SYLLABLE_TYPE = 0x100B
const val NFD_QUICK_CHECK = 0x100C
const val NFKD_QUICK_CHECK = 0x100D
const val NFC_QUICK_CHECK = 0x100E
const val NFKC_QUICK_CHECK = 0x100F
const val LEAD_CANONICAL_COMBINING_CLASS = 0x1010
const val TRAIL_CANONICAL_COMBINING_CLASS = 0x1011
const val GRAPHEME_CLUSTER_BREAK = 0x1012
const val SENTENCE_BREAK = 0x1013
const val WORD_BREAK = 0x1014
const val BIDI_PAIRED_BRACKET_TYPE = 0x1015
const val INDIC_POSITIONAL_CATEGORY = 0x1016
const val INDIC_SYLLABIC_CATEGORY = 0x1017
const val VERTICAL_ORIENTATION = 0x1018
const val GENERAL_CATEGORY_MASK = 0x2000
const val MASK_START = GENERAL_CATEGORY_MASK
const val NUMERIC_VALUE = 0x3000
const val DOUBLE_START = NUMERIC_VALUE
const val AGE = 0x4000
const val STRING_START = AGE
const val BIDI_MIRRORING_GLYPH = 0x4001
const val CASE_FOLDING = 0x4002
const val LOWERCASE_MAPPING = 0x4004
const val NAME = 0x4005
const val SIMPLE_CASE_FOLDING = 0x4006
const val SIMPLE_LOWERCASE_MAPPING = 0x4007
const val SIMPLE_TITLECASE_MAPPING = 0x4008
const val SIMPLE_UPPERCASE_MAPPING = 0x4009
const val TITLECASE_MAPPING = 0x400A
const val UPPERCASE_MAPPING = 0x400C
const val BIDI_PAIRED_BRACKET = 0x400D
const val SCRIPT_EXTENSIONS = 0x7000
const val OTHER_PROPERTY_START = SCRIPT_EXTENSIONS
const val INVALID_CODE = -1

/**
* Returns whether the given codepoint has the given binary property.
*
* @param codePoint The codepoint.
* @param property A binary property; one of the constants defined in [CharProperties].
*/
fun codePointHasBinaryProperty(codePoint: Int, property: Int): Boolean {
Stats.onNativeCall()
return _nCodePointHasBinaryProperty(codePoint, property)
}
}

@ExternalSymbolName("org_jetbrains_skia_icu_Unicode__1nCharDirection")
@ModuleImport("./skiko.mjs", "org_jetbrains_skia_icu_Unicode__1nCharDirection")
private external fun _nCharDirection(codePoint: Int): Int

@ExternalSymbolName("org_jetbrains_skia_icu_Unicode__1nCodePointHasBinaryProperty")
@ModuleImport("./skiko.mjs", "org_jetbrains_skia_icu_Unicode__1nCodePointHasBinaryProperty")
private external fun _nCodePointHasBinaryProperty(codePoint: Int, property: Int): Boolean

Original file line number Diff line number Diff line change
@@ -1,15 +1,33 @@
package org.jetbrains.skia.icu

import org.jetbrains.skia.codePoints
import kotlin.test.Test
import kotlin.test.assertEquals
import kotlin.test.assertFalse
import kotlin.test.assertTrue

class UnicodeTest {

@Test
fun directionality() {
assertEquals(CharDirection.EUROPEAN_NUMBER, CharDirection.of('0'.code)) // Number
assertEquals(CharDirection.LEFT_TO_RIGHT, CharDirection.of('A'.code)) // Latin
assertEquals(CharDirection.RIGHT_TO_LEFT, CharDirection.of('א'.code)) // Hebrew
assertEquals(CharDirection.RIGHT_TO_LEFT_ARABIC, CharDirection.of('؈'.code)) // Arabic
}

@Test
fun binaryProperties() {
fun String.firstCodePointHasProperty(property: Int): Boolean {
val codePoint = this.codePoints.first()
println(codePoint.toString(16))
return CharProperties.codePointHasBinaryProperty(codePoint, property)
}

assertTrue("⌚".firstCodePointHasProperty(CharProperties.EMOJI))
assertTrue("✅".firstCodePointHasProperty(CharProperties.EMOJI_PRESENTATION))
assertTrue("♥️".firstCodePointHasProperty(CharProperties.EXTENDED_PICTOGRAPHIC))
assertTrue("🇮🇱".firstCodePointHasProperty(CharProperties.EMOJI)) // flag

assertFalse("x".firstCodePointHasProperty(CharProperties.EMOJI))
}
}
2 changes: 0 additions & 2 deletions skiko/src/jsWasmMain/kotlin/org/jetbrains/skia/Actuals.js.kt
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ internal actual fun <R> commonSynchronized(lock: Any, block: () -> R) {
block()
}

actual fun String.intCodePoints(): IntArray = IntArray(this.length) { this[it].code }

actual class Pattern constructor(regex: String) {
private val _regex = Regex(regex)

Expand Down
7 changes: 6 additions & 1 deletion skiko/src/jvmMain/cpp/common/icu/Unicode.cc
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
#include <jni.h>
#include "third_party/externals/icu/source/common/unicode/uchar.h"

extern "C" JNIEXPORT jint JNICALL Java_org_jetbrains_skia_icu_UnicodeKt_charDirection
extern "C" JNIEXPORT jint JNICALL Java_org_jetbrains_skia_icu_UnicodeKt__1nCharDirection
(JNIEnv* env, jclass jclass, jint codePoint) {
return u_charDirection(codePoint);
}

extern "C" JNIEXPORT jboolean JNICALL Java_org_jetbrains_skia_icu_UnicodeKt__1nCodePointHasBinaryProperty
(JNIEnv* env, jclass jclass, jint codePoint, jint property) {
return u_hasBinaryProperty(codePoint, (UProperty)property);
}
2 changes: 0 additions & 2 deletions skiko/src/jvmMain/kotlin/org/jetbrains/skia/Actuals.jvm.kt
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ internal actual fun <R> commonSynchronized(lock: Any, block: () -> R) {
synchronized(lock, block)
}

internal actual fun String.intCodePoints(): IntArray = this.codePoints().toArray()

actual typealias Pattern = java.util.regex.Pattern

actual typealias Matcher = java.util.regex.Matcher
Expand Down
6 changes: 5 additions & 1 deletion skiko/src/nativeJsMain/cpp/icu/Unicode.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
#include "common.h"
#include "third_party/externals/icu/source/common/unicode/uchar.h"

SKIKO_EXPORT KInt org_jetbrains_skia_icu_Unicode_charDirection(KInt codePoint) {
SKIKO_EXPORT KInt org_jetbrains_skia_icu_Unicode__1nCharDirection(KInt codePoint) {
return u_charDirection(codePoint);
}

SKIKO_EXPORT KBoolean org_jetbrains_skia_icu_Unicode__1nCodePointHasBinaryProperty(KInt codePoint, KInt property) {
return u_hasBinaryProperty(codePoint, (UProperty)property);
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@ internal actual fun <R> commonSynchronized(lock: Any, block: () -> R) {
block()
}

internal actual fun String.intCodePoints(): IntArray = IntArray(this.length) { this[it].code }

actual class Pattern constructor(regex: String) {
private val _regex = Regex(regex)

Expand Down
Loading