From 3ccf1aebb6959fbc6bbbf74d2821522ddfd7d484 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Berkay=20=C5=9Eahin?= <124376117+berkaysynnada@users.noreply.github.com> Date: Sat, 18 Mar 2023 18:13:53 +0300 Subject: [PATCH] Timestamp subtraction and interval operations for `ScalarValue` (#5603) * first implementation and tests of timestamp subtraction * improvement after review * postgre interval format option * random tests extended * corrections after review * operator check * flag is removed * clippy fix * toml conflict * minor changes * deterministic matches * simplifications (clippy error) * test format changed * minor test fix * Update scalar.rs * Refactoring and simplifications * Make ScalarValue support interval comparison * naming tests * macro renaming * renaming macro * Utilize DateTime parsing timezone * Get rid of boilerplate by using convenience functions * Get rid of boilerplate by using convenience functions (part 2) --------- Co-authored-by: Mehmet Ozan Kabak --- datafusion-cli/Cargo.lock | 337 ++++++++---- datafusion/common/Cargo.toml | 4 + datafusion/common/src/scalar.rs | 941 +++++++++++++++++++++++++++++++- 3 files changed, 1168 insertions(+), 114 deletions(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index d01617ac76d5..6fe28cabee06 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -74,16 +74,16 @@ checksum = "f410d3907b6b3647b9e7bca4551274b2e3d716aa940afb67b7287257401da921" dependencies = [ "ahash", "arrow-arith", - "arrow-array", - "arrow-buffer", + "arrow-array 34.0.0", + "arrow-buffer 34.0.0", "arrow-cast", "arrow-csv", - "arrow-data", + "arrow-data 34.0.0", "arrow-ipc", "arrow-json", "arrow-ord", "arrow-row", - "arrow-schema", + "arrow-schema 34.0.0", "arrow-select", "arrow-string", "comfy-table", @@ -95,10 +95,10 @@ version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f87391cf46473c9bc53dab68cb8872c3a81d4dfd1703f1c8aa397dba9880a043" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 34.0.0", + "arrow-buffer 34.0.0", + "arrow-data 34.0.0", + "arrow-schema 34.0.0", "chrono", "half", "num", @@ -111,15 +111,32 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d35d5475e65c57cffba06d0022e3006b677515f99b54af33a7cd54f6cdd4a5b5" dependencies = [ "ahash", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-buffer 34.0.0", + "arrow-data 34.0.0", + "arrow-schema 34.0.0", "chrono", "half", "hashbrown 0.13.2", "num", ] +[[package]] +name = "arrow-array" +version = "35.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43489bbff475545b78b0e20bde1d22abd6c99e54499839f9e815a2fa5134a51b" +dependencies = [ + "ahash", + "arrow-buffer 35.0.0", + "arrow-data 35.0.0", + "arrow-schema 35.0.0", + "chrono", + "chrono-tz", + "half", + "hashbrown 0.13.2", + "num", +] + [[package]] name = "arrow-buffer" version = "34.0.0" @@ -130,16 +147,26 @@ dependencies = [ "num", ] +[[package]] +name = "arrow-buffer" +version = "35.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3759e4a52c593281184787af5435671dc8b1e78333e5a30242b2e2d6e3c9d1f" +dependencies = [ + "half", + "num", +] + [[package]] name = "arrow-cast" version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0a7285272c9897321dfdba59de29f5b05aeafd3cdedf104a941256d155f6d304" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 34.0.0", + "arrow-buffer 34.0.0", + "arrow-data 34.0.0", + "arrow-schema 34.0.0", "arrow-select", "chrono", "lexical-core", @@ -152,11 +179,11 @@ version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "981ee4e7f6a120da04e00d0b39182e1eeacccb59c8da74511de753c56b7fddf7" dependencies = [ - "arrow-array", - "arrow-buffer", + "arrow-array 34.0.0", + "arrow-buffer 34.0.0", "arrow-cast", - "arrow-data", - "arrow-schema", + "arrow-data 34.0.0", + "arrow-schema 34.0.0", "chrono", "csv", "csv-core", @@ -171,8 +198,20 @@ version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "27cc673ee6989ea6e4b4e8c7d461f7e06026a096c8f0b1a7288885ff71ae1e56" dependencies = [ - "arrow-buffer", - "arrow-schema", + "arrow-buffer 34.0.0", + "arrow-schema 34.0.0", + "half", + "num", +] + +[[package]] +name = "arrow-data" +version = "35.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19c7787c6cdbf9539b1ffb860bfc18c5848926ec3d62cbd52dc3b1ea35c874fd" +dependencies = [ + "arrow-buffer 35.0.0", + "arrow-schema 35.0.0", "half", "num", ] @@ -183,11 +222,11 @@ version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e37b8b69d9e59116b6b538e8514e0ec63a30f08b617ce800d31cb44e3ef64c1a" dependencies = [ - "arrow-array", - "arrow-buffer", + "arrow-array 34.0.0", + "arrow-buffer 34.0.0", "arrow-cast", - "arrow-data", - "arrow-schema", + "arrow-data 34.0.0", + "arrow-schema 34.0.0", "flatbuffers", ] @@ -197,11 +236,11 @@ version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "80c3fa0bed7cfebf6d18e46b733f9cb8a1cb43ce8e6539055ca3e1e48a426266" dependencies = [ - "arrow-array", - "arrow-buffer", + "arrow-array 34.0.0", + "arrow-buffer 34.0.0", "arrow-cast", - "arrow-data", - "arrow-schema", + "arrow-data 34.0.0", + "arrow-schema 34.0.0", "chrono", "half", "indexmap", @@ -216,10 +255,10 @@ version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d247dce7bed6a8d6a3c6debfa707a3a2f694383f0c692a39d736a593eae5ef94" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 34.0.0", + "arrow-buffer 34.0.0", + "arrow-data 34.0.0", + "arrow-schema 34.0.0", "arrow-select", "num", ] @@ -231,10 +270,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8d609c0181f963cea5c70fddf9a388595b5be441f3aa1d1cdbf728ca834bbd3a" dependencies = [ "ahash", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 34.0.0", + "arrow-buffer 34.0.0", + "arrow-data 34.0.0", + "arrow-schema 34.0.0", "half", "hashbrown 0.13.2", ] @@ -245,16 +284,22 @@ version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "64951898473bfb8e22293e83a44f02874d2257514d49cd95f9aa4afcff183fbc" +[[package]] +name = "arrow-schema" +version = "35.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf6b26f6a6f8410e3b9531cbd1886399b99842701da77d4b4cf2013f7708f20f" + [[package]] name = "arrow-select" version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a513d89c2e1ac22b28380900036cf1f3992c6443efc5e079de631dcf83c6888" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 34.0.0", + "arrow-buffer 34.0.0", + "arrow-data 34.0.0", + "arrow-schema 34.0.0", "num", ] @@ -264,10 +309,10 @@ version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5288979b2705dae1114c864d73150629add9153b9b8f1d7ee3963db94c372ba5" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 34.0.0", + "arrow-buffer 34.0.0", + "arrow-data 34.0.0", + "arrow-schema 34.0.0", "arrow-select", "regex", "regex-syntax", @@ -440,9 +485,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.23" +version = "0.4.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16b0a3d9ed01224b22057780a37bb8c5dbfe1be8ba48678e7bf57ec4b385411f" +checksum = "4e3c5919066adf22df73762e50cffcde3a758f2a848b113b586d1f86728b673b" dependencies = [ "iana-time-zone", "num-integer", @@ -451,6 +496,28 @@ dependencies = [ "winapi", ] +[[package]] +name = "chrono-tz" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa48fa079165080f11d7753fd0bc175b7d391f276b965fe4b55bfad67856e463" +dependencies = [ + "chrono", + "chrono-tz-build", + "phf", +] + +[[package]] +name = "chrono-tz-build" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9998fb9f7e9b2111641485bf8beb32f92945f97f92a3d061f744cfef335f751" +dependencies = [ + "parse-zoneinfo", + "phf", + "phf_codegen", +] + [[package]] name = "clap" version = "3.2.23" @@ -546,9 +613,9 @@ dependencies = [ [[package]] name = "constant_time_eq" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3ad85c1f65dc7b37604eb0e89748faf0b9653065f2a8ef69f96a687ec1e9279" +checksum = "13418e745008f7349ec7e449155f419a61b92b58a99cc3616942b926825ec76b" [[package]] name = "core-foundation-sys" @@ -737,6 +804,7 @@ name = "datafusion-common" version = "20.0.0" dependencies = [ "arrow", + "arrow-array 35.0.0", "chrono", "num_cpus", "object_store", @@ -792,8 +860,8 @@ version = "20.0.0" dependencies = [ "ahash", "arrow", - "arrow-buffer", - "arrow-schema", + "arrow-buffer 34.0.0", + "arrow-schema 34.0.0", "blake2", "blake3", "chrono", @@ -829,7 +897,7 @@ dependencies = [ name = "datafusion-sql" version = "20.0.0" dependencies = [ - "arrow-schema", + "arrow-schema 34.0.0", "datafusion-common", "datafusion-expr", "log", @@ -1022,9 +1090,9 @@ dependencies = [ [[package]] name = "futures" -version = "0.3.26" +version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13e2792b0ff0340399d58445b88fd9770e3489eff258a4cbc1523418f12abf84" +checksum = "531ac96c6ff5fd7c62263c5e3c67a603af4fcaee2e1a0ae5565ba3a11e69e549" dependencies = [ "futures-channel", "futures-core", @@ -1037,9 +1105,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.26" +version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e5317663a9089767a1ec00a487df42e0ca174b61b4483213ac24448e4664df5" +checksum = "164713a5a0dcc3e7b4b1ed7d3b433cabc18025386f9339346e8daf15963cf7ac" dependencies = [ "futures-core", "futures-sink", @@ -1047,15 +1115,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.26" +version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec90ff4d0fe1f57d600049061dc6bb68ed03c7d2fbd697274c41805dcb3f8608" +checksum = "86d7a0c1aa76363dac491de0ee99faf6941128376f1cf96f07db7603b7de69dd" [[package]] name = "futures-executor" -version = "0.3.26" +version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8de0a35a6ab97ec8869e32a2473f4b1324459e14c29275d14b10cb1fd19b50e" +checksum = "1997dd9df74cdac935c76252744c1ed5794fac083242ea4fe77ef3ed60ba0f83" dependencies = [ "futures-core", "futures-task", @@ -1064,15 +1132,15 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.26" +version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfb8371b6fb2aeb2d280374607aeabfc99d95c72edfe51692e42d3d7f0d08531" +checksum = "89d422fa3cbe3b40dca574ab087abb5bc98258ea57eea3fd6f1fa7162c778b91" [[package]] name = "futures-macro" -version = "0.3.26" +version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95a73af87da33b5acf53acfebdc339fe592ecf5357ac7c0a7734ab9d8c876a70" +checksum = "3eb14ed937631bd8b8b8977f2c198443447a8355b6e3ca599f38c975e5a963b6" dependencies = [ "proc-macro2", "quote", @@ -1081,21 +1149,21 @@ dependencies = [ [[package]] name = "futures-sink" -version = "0.3.26" +version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f310820bb3e8cfd46c80db4d7fb8353e15dfff853a127158425f31e0be6c8364" +checksum = "ec93083a4aecafb2a80a885c9de1f0ccae9dbd32c2bb54b0c3a65690e0b8d2f2" [[package]] name = "futures-task" -version = "0.3.26" +version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcf79a1bf610b10f42aea489289c5a2c478a786509693b80cd39c44ccd936366" +checksum = "fd65540d33b37b16542a0438c12e6aeead10d4ac5d05bd3f805b8f35ab592879" [[package]] name = "futures-util" -version = "0.3.26" +version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c1d6de3acfef38d2be4b1f543f553131788603495be83da675e180c8d6b7bd1" +checksum = "3ef6b17e481503ec85211fed8f39d1970f128935ca1f814cd32ac4a6842e84ab" dependencies = [ "futures-channel", "futures-core", @@ -1204,6 +1272,12 @@ dependencies = [ "libc", ] +[[package]] +name = "hermit-abi" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" + [[package]] name = "http" version = "0.2.9" @@ -1342,10 +1416,11 @@ checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" [[package]] name = "io-lifetimes" -version = "1.0.6" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfa919a82ea574332e2de6e74b4c36e74d41982b335080fa59d4ef31be20fdf3" +checksum = "76e86b86ae312accbf05ade23ce76b625e0e47a255712b7414037385a1c05380" dependencies = [ + "hermit-abi 0.3.1", "libc", "windows-sys 0.45.0", ] @@ -1784,12 +1859,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ac135ecf63ebb5f53dda0921b0b76d6048b3ef631a5f4760b9e8f863ff00cfa" dependencies = [ "ahash", - "arrow-array", - "arrow-buffer", + "arrow-array 34.0.0", + "arrow-buffer 34.0.0", "arrow-cast", - "arrow-data", + "arrow-data 34.0.0", "arrow-ipc", - "arrow-schema", + "arrow-schema 34.0.0", "arrow-select", "base64", "brotli", @@ -1810,6 +1885,15 @@ dependencies = [ "zstd 0.12.3+zstd.1.5.2", ] +[[package]] +name = "parse-zoneinfo" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c705f256449c60da65e11ff6626e0c16a0a0b96aaa348de61376b249bc340f41" +dependencies = [ + "regex", +] + [[package]] name = "paste" version = "1.0.12" @@ -1832,6 +1916,44 @@ dependencies = [ "indexmap", ] +[[package]] +name = "phf" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "928c6535de93548188ef63bb7c4036bd415cd8f36ad25af44b9789b2ee72a48c" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_codegen" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a56ac890c5e3ca598bbdeaa99964edb5b0258a583a9eb6ef4e89fc85d9224770" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1181c94580fa345f50f19d738aaa39c0ed30a600d95cb2d3e23f94266f14fbf" +dependencies = [ + "phf_shared", + "rand", +] + +[[package]] +name = "phf_shared" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1fb5f6f826b772a8d4c0394209441e7d37cbbb967ae9c7e0e8134365c9ee676" +dependencies = [ + "siphasher", +] + [[package]] name = "pin-project-lite" version = "0.2.9" @@ -1888,9 +2010,9 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" [[package]] name = "proc-macro2" -version = "1.0.51" +version = "1.0.52" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d727cae5b39d21da60fa540906919ad737832fe0b1c165da3a34d6548c849d6" +checksum = "1d0e1ae9e836cc3beddd63db0df682593d7e2d3d891ae8c9083d2113e1744224" dependencies = [ "unicode-ident", ] @@ -1907,9 +2029,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.23" +version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" +checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" dependencies = [ "proc-macro2", ] @@ -2159,9 +2281,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.16" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58bc9567378fc7690d6b2addae4e60ac2eeea07becb2c64b9f218b53865cba2a" +checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed" [[package]] name = "seq-macro" @@ -2171,18 +2293,18 @@ checksum = "e6b44e8fc93a14e66336d230954dda83d18b4605ccace8fe09bc7514a71ad0bc" [[package]] name = "serde" -version = "1.0.154" +version = "1.0.156" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cdd151213925e7f1ab45a9bbfb129316bd00799784b174b7cc7bcd16961c49e" +checksum = "314b5b092c0ade17c00142951e50ced110ec27cea304b1037c6969246c2469a4" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.154" +version = "1.0.156" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fc80d722935453bcafdc2c9a73cd6fac4dc1938f0346035d84bf99fa9e33217" +checksum = "d7e29c4601e36bcec74a223228dce795f4cd3616341a4af93520ca1a837c087d" dependencies = [ "proc-macro2", "quote", @@ -2223,6 +2345,12 @@ dependencies = [ "digest", ] +[[package]] +name = "siphasher" +version = "0.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de" + [[package]] name = "slab" version = "0.4.8" @@ -2639,12 +2767,11 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "walkdir" -version = "2.3.2" +version = "2.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" +checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698" dependencies = [ "same-file", - "winapi", "winapi-util", ] @@ -2829,9 +2956,9 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e2522491fbfcd58cc84d47aeb2958948c4b8982e9a2d8a2a35bbaed431390e7" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" dependencies = [ "windows_aarch64_gnullvm", "windows_aarch64_msvc", @@ -2844,45 +2971,45 @@ dependencies = [ [[package]] name = "windows_aarch64_gnullvm" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c9864e83243fdec7fc9c5444389dcbbfd258f745e7853198f365e3c4968a608" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" [[package]] name = "windows_aarch64_msvc" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c8b1b673ffc16c47a9ff48570a9d85e25d265735c503681332589af6253c6c7" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" [[package]] name = "windows_i686_gnu" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de3887528ad530ba7bdbb1faa8275ec7a1155a45ffa57c37993960277145d640" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" [[package]] name = "windows_i686_msvc" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf4d1122317eddd6ff351aa852118a2418ad4214e6613a50e0191f7004372605" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" [[package]] name = "windows_x86_64_gnu" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1040f221285e17ebccbc2591ffdc2d44ee1f9186324dd3e84e99ac68d699c45" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" [[package]] name = "windows_x86_64_gnullvm" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "628bfdf232daa22b0d64fdb62b09fcc36bb01f05a3939e20ab73aaf9470d0463" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" [[package]] name = "windows_x86_64_msvc" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "447660ad36a13288b1db4d4248e857b510e8c3a225c822ba4fb748c0aafecffd" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" [[package]] name = "winreg" diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index 8a0a7042fcba..7d78ed70eb35 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -41,6 +41,7 @@ pyarrow = ["pyo3", "arrow/pyarrow"] [dependencies] apache-avro = { version = "0.14", default-features = false, features = ["snappy"], optional = true } arrow = { workspace = true, default-features = false } +arrow-array = { version = "35.0.0", default-features = false, features = ["chrono-tz"] } chrono = { version = "0.4", default-features = false } cranelift-module = { version = "0.92.0", optional = true } num_cpus = "1.13.0" @@ -48,3 +49,6 @@ object_store = { version = "0.5.4", default-features = false, optional = true } parquet = { workspace = true, default-features = false, optional = true } pyo3 = { version = "0.18.0", optional = true } sqlparser = "0.32" + +[dev-dependencies] +rand = "0.8.4" diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index 73352941afa7..92cdab3ebba3 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -43,7 +43,14 @@ use arrow::{ DECIMAL128_MAX_PRECISION, }, }; -use chrono::{Datelike, Duration, NaiveDate, NaiveDateTime}; +use arrow_array::timezone::Tz; +use chrono::{DateTime, Datelike, Duration, NaiveDate, NaiveDateTime, TimeZone}; + +// Constants we use throughout this file: +const MILLISECS_IN_ONE_DAY: i64 = 86_400_000; +const NANOSECS_IN_ONE_DAY: i64 = 86_400_000_000_000; +const MILLISECS_IN_ONE_MONTH: i64 = 2_592_000_000; // assuming 30 days. +const NANOSECS_IN_ONE_MONTH: i128 = 2_592_000_000_000_000; // assuming 30 days. /// Represents a dynamically typed, nullable single value. /// This is the single-valued counter-part to arrow's [`Array`]. @@ -199,10 +206,28 @@ impl PartialEq for ScalarValue { (TimestampNanosecond(v1, _), TimestampNanosecond(v2, _)) => v1.eq(v2), (TimestampNanosecond(_, _), _) => false, (IntervalYearMonth(v1), IntervalYearMonth(v2)) => v1.eq(v2), + (IntervalYearMonth(v1), IntervalDayTime(v2)) => { + ym_to_milli(v1).eq(&dt_to_milli(v2)) + } + (IntervalYearMonth(v1), IntervalMonthDayNano(v2)) => { + ym_to_nano(v1).eq(&mdn_to_nano(v2)) + } (IntervalYearMonth(_), _) => false, (IntervalDayTime(v1), IntervalDayTime(v2)) => v1.eq(v2), + (IntervalDayTime(v1), IntervalYearMonth(v2)) => { + dt_to_milli(v1).eq(&ym_to_milli(v2)) + } + (IntervalDayTime(v1), IntervalMonthDayNano(v2)) => { + dt_to_nano(v1).eq(&mdn_to_nano(v2)) + } (IntervalDayTime(_), _) => false, (IntervalMonthDayNano(v1), IntervalMonthDayNano(v2)) => v1.eq(v2), + (IntervalMonthDayNano(v1), IntervalYearMonth(v2)) => { + mdn_to_nano(v1).eq(&ym_to_nano(v2)) + } + (IntervalMonthDayNano(v1), IntervalDayTime(v2)) => { + mdn_to_nano(v1).eq(&dt_to_nano(v2)) + } (IntervalMonthDayNano(_), _) => false, (Struct(v1, t1), Struct(v2, t2)) => v1.eq(v2) && t1.eq(t2), (Struct(_, _), _) => false, @@ -304,10 +329,28 @@ impl PartialOrd for ScalarValue { } (TimestampNanosecond(_, _), _) => None, (IntervalYearMonth(v1), IntervalYearMonth(v2)) => v1.partial_cmp(v2), + (IntervalYearMonth(v1), IntervalDayTime(v2)) => { + ym_to_milli(v1).partial_cmp(&dt_to_milli(v2)) + } + (IntervalYearMonth(v1), IntervalMonthDayNano(v2)) => { + ym_to_nano(v1).partial_cmp(&mdn_to_nano(v2)) + } (IntervalYearMonth(_), _) => None, (IntervalDayTime(v1), IntervalDayTime(v2)) => v1.partial_cmp(v2), + (IntervalDayTime(v1), IntervalYearMonth(v2)) => { + dt_to_milli(v1).partial_cmp(&ym_to_milli(v2)) + } + (IntervalDayTime(v1), IntervalMonthDayNano(v2)) => { + dt_to_nano(v1).partial_cmp(&mdn_to_nano(v2)) + } (IntervalDayTime(_), _) => None, (IntervalMonthDayNano(v1), IntervalMonthDayNano(v2)) => v1.partial_cmp(v2), + (IntervalMonthDayNano(v1), IntervalYearMonth(v2)) => { + mdn_to_nano(v1).partial_cmp(&ym_to_nano(v2)) + } + (IntervalMonthDayNano(v1), IntervalDayTime(v2)) => { + mdn_to_nano(v1).partial_cmp(&dt_to_nano(v2)) + } (IntervalMonthDayNano(_), _) => None, (Struct(v1, t1), Struct(v2, t2)) => { if t1.eq(t2) { @@ -332,6 +375,52 @@ impl PartialOrd for ScalarValue { } } +/// This function computes the duration (in milliseconds) of the given +/// year-month-interval. +#[inline] +fn ym_to_milli(val: &Option) -> Option { + val.map(|value| (value as i64) * MILLISECS_IN_ONE_MONTH) +} + +/// This function computes the duration (in nanoseconds) of the given +/// year-month-interval. +#[inline] +fn ym_to_nano(val: &Option) -> Option { + val.map(|value| (value as i128) * NANOSECS_IN_ONE_MONTH) +} + +/// This function computes the duration (in milliseconds) of the given +/// daytime-interval. +#[inline] +fn dt_to_milli(val: &Option) -> Option { + val.map(|val| { + let (days, millis) = IntervalDayTimeType::to_parts(val); + (days as i64) * MILLISECS_IN_ONE_DAY + (millis as i64) + }) +} + +/// This function computes the duration (in nanoseconds) of the given +/// daytime-interval. +#[inline] +fn dt_to_nano(val: &Option) -> Option { + val.map(|val| { + let (days, millis) = IntervalDayTimeType::to_parts(val); + (days as i128) * (NANOSECS_IN_ONE_DAY as i128) + (millis as i128) * 1_000_000 + }) +} + +/// This function computes the duration (in nanoseconds) of the given +/// month-day-nano-interval. Assumes a month is 30 days long. +#[inline] +fn mdn_to_nano(val: &Option) -> Option { + val.map(|val| { + let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(val); + (months as i128) * NANOSECS_IN_ONE_MONTH + + (days as i128) * (NANOSECS_IN_ONE_DAY as i128) + + (nanos as i128) + }) +} + impl Eq for ScalarValue {} // TODO implement this in arrow-rs with simd @@ -464,6 +553,71 @@ macro_rules! unsigned_subtraction_error { } macro_rules! impl_op { + ($LHS:expr, $RHS:expr, +) => { + impl_op_arithmetic!($LHS, $RHS, +) + }; + ($LHS:expr, $RHS:expr, -) => { + match ($LHS, $RHS) { + ( + ScalarValue::TimestampSecond(Some(ts_lhs), tz_lhs), + ScalarValue::TimestampSecond(Some(ts_rhs), tz_rhs), + ) => { + let err = || { + DataFusionError::Execution( + "Overflow while converting seconds to milliseconds".to_string(), + ) + }; + ts_sub_to_interval( + ts_lhs.checked_mul(1_000).ok_or_else(err)?, + ts_rhs.checked_mul(1_000).ok_or_else(err)?, + &tz_lhs, + &tz_rhs, + IntervalMode::Milli, + ) + }, + ( + ScalarValue::TimestampMillisecond(Some(ts_lhs), tz_lhs), + ScalarValue::TimestampMillisecond(Some(ts_rhs), tz_rhs), + ) => ts_sub_to_interval( + *ts_lhs, + *ts_rhs, + tz_lhs, + tz_rhs, + IntervalMode::Milli, + ), + ( + ScalarValue::TimestampMicrosecond(Some(ts_lhs), tz_lhs), + ScalarValue::TimestampMicrosecond(Some(ts_rhs), tz_rhs), + ) => { + let err = || { + DataFusionError::Execution( + "Overflow while converting microseconds to nanoseconds".to_string(), + ) + }; + ts_sub_to_interval( + ts_lhs.checked_mul(1_000).ok_or_else(err)?, + ts_rhs.checked_mul(1_000).ok_or_else(err)?, + tz_lhs, + tz_rhs, + IntervalMode::Nano, + ) + }, + ( + ScalarValue::TimestampNanosecond(Some(ts_lhs), tz_lhs), + ScalarValue::TimestampNanosecond(Some(ts_rhs), tz_rhs), + ) => ts_sub_to_interval( + *ts_lhs, + *ts_rhs, + tz_lhs, + tz_rhs, + IntervalMode::Nano, + ), + _ => impl_op_arithmetic!($LHS, $RHS, -) + } + }; +} + +macro_rules! impl_op_arithmetic { ($LHS:expr, $RHS:expr, $OPERATION:tt) => { match ($LHS, $RHS) { // Binary operations on arguments with the same type: @@ -503,6 +657,40 @@ macro_rules! impl_op { (ScalarValue::Int8(lhs), ScalarValue::Int8(rhs)) => { primitive_op!(lhs, rhs, Int8, $OPERATION) } + ( + ScalarValue::IntervalYearMonth(Some(lhs)), + ScalarValue::IntervalYearMonth(Some(rhs)), + ) => Ok(ScalarValue::new_interval_ym( + 0, + lhs + rhs * get_sign!($OPERATION), + )), + ( + ScalarValue::IntervalDayTime(Some(lhs)), + ScalarValue::IntervalDayTime(Some(rhs)), + ) => { + let sign = get_sign!($OPERATION); + let (lhs_days, lhs_millis) = IntervalDayTimeType::to_parts(*lhs); + let (rhs_days, rhs_millis) = IntervalDayTimeType::to_parts(*rhs); + Ok(ScalarValue::new_interval_dt( + lhs_days + rhs_days * sign, + lhs_millis + rhs_millis * sign, + )) + } + ( + ScalarValue::IntervalMonthDayNano(Some(lhs)), + ScalarValue::IntervalMonthDayNano(Some(rhs)), + ) => { + let sign = get_sign!($OPERATION); + let (lhs_months, lhs_days, lhs_nanos) = + IntervalMonthDayNanoType::to_parts(*lhs); + let (rhs_months, rhs_days, rhs_nanos) = + IntervalMonthDayNanoType::to_parts(*rhs); + Ok(ScalarValue::new_interval_mdn( + lhs_months + rhs_months * sign, + lhs_days + rhs_days * sign, + lhs_nanos + rhs_nanos * (sign as i64), + )) + } // Binary operations on arguments with different types: (ScalarValue::Date32(Some(days)), _) => { let value = date32_add(*days, $RHS, get_sign!($OPERATION))?; @@ -544,6 +732,30 @@ macro_rules! impl_op { let value = nanoseconds_add(*ts_ns, $LHS, get_sign!($OPERATION))?; Ok(ScalarValue::TimestampNanosecond(Some(value), zone.clone())) } + ( + ScalarValue::IntervalYearMonth(Some(lhs)), + ScalarValue::IntervalDayTime(Some(rhs)), + ) => op_ym_dt(*lhs, *rhs, get_sign!($OPERATION), false), + ( + ScalarValue::IntervalYearMonth(Some(lhs)), + ScalarValue::IntervalMonthDayNano(Some(rhs)), + ) => op_ym_mdn(*lhs, *rhs, get_sign!($OPERATION), false), + ( + ScalarValue::IntervalDayTime(Some(lhs)), + ScalarValue::IntervalYearMonth(Some(rhs)), + ) => op_ym_dt(*rhs, *lhs, get_sign!($OPERATION), true), + ( + ScalarValue::IntervalDayTime(Some(lhs)), + ScalarValue::IntervalMonthDayNano(Some(rhs)), + ) => op_dt_mdn(*lhs, *rhs, get_sign!($OPERATION), false), + ( + ScalarValue::IntervalMonthDayNano(Some(lhs)), + ScalarValue::IntervalYearMonth(Some(rhs)), + ) => op_ym_mdn(*rhs, *lhs, get_sign!($OPERATION), true), + ( + ScalarValue::IntervalMonthDayNano(Some(lhs)), + ScalarValue::IntervalDayTime(Some(rhs)), + ) => op_dt_mdn(*rhs, *lhs, get_sign!($OPERATION), true), _ => Err(DataFusionError::Internal(format!( "Operator {} is not implemented for types {:?} and {:?}", stringify!($OPERATION), @@ -554,6 +766,68 @@ macro_rules! impl_op { }; } +/// This function adds/subtracts two "raw" intervals (`lhs` and `rhs`) of different +/// types ([`IntervalYearMonthType`] and [`IntervalDayTimeType`], respectively). +/// The argument `sign` chooses between addition and subtraction, the argument +/// `commute` swaps `lhs` and `rhs`. The return value is an interval [`ScalarValue`] +/// with type data type [`IntervalMonthDayNanoType`]. +#[inline] +fn op_ym_dt(mut lhs: i32, rhs: i64, sign: i32, commute: bool) -> Result { + let (mut days, millis) = IntervalDayTimeType::to_parts(rhs); + let mut nanos = (millis as i64) * 1_000_000; + if commute { + lhs *= sign; + } else { + days *= sign; + nanos *= sign as i64; + }; + Ok(ScalarValue::new_interval_mdn(lhs, days, nanos)) +} + +/// This function adds/subtracts two "raw" intervals (`lhs` and `rhs`) of different +/// types ([`IntervalYearMonthType`] and [`IntervalMonthDayNanoType`], respectively). +/// The argument `sign` chooses between addition and subtraction, the argument +/// `commute` swaps `lhs` and `rhs`. The return value is an interval [`ScalarValue`] +/// with type data type [`IntervalMonthDayNanoType`]. +#[inline] +fn op_ym_mdn(lhs: i32, rhs: i128, sign: i32, commute: bool) -> Result { + let (mut months, mut days, mut nanos) = IntervalMonthDayNanoType::to_parts(rhs); + if commute { + months += lhs * sign; + } else { + months = lhs + (months * sign); + days *= sign; + nanos *= sign as i64; + } + Ok(ScalarValue::new_interval_mdn(months, days, nanos)) +} + +/// This function adds/subtracts two "raw" intervals (`lhs` and `rhs`) of different +/// types ([`IntervalDayTimeType`] and [`IntervalMonthDayNanoType`], respectively). +/// The argument `sign` chooses between addition and subtraction, the argument +/// `commute` swaps `lhs` and `rhs`. The return value is an interval [`ScalarValue`] +/// with type data type [`IntervalMonthDayNanoType`]. +#[inline] +fn op_dt_mdn(lhs: i64, rhs: i128, sign: i32, commute: bool) -> Result { + let (lhs_days, lhs_millis) = IntervalDayTimeType::to_parts(lhs); + let (rhs_months, rhs_days, rhs_nanos) = IntervalMonthDayNanoType::to_parts(rhs); + + let result = if commute { + IntervalMonthDayNanoType::make_value( + rhs_months, + lhs_days * sign + rhs_days, + (lhs_millis * sign) as i64 * 1_000_000 + rhs_nanos, + ) + } else { + IntervalMonthDayNanoType::make_value( + rhs_months * sign, + lhs_days + rhs_days * sign, + (lhs_millis as i64) * 1_000_000 + rhs_nanos * (sign as i64), + ) + }; + Ok(ScalarValue::IntervalMonthDayNano(Some(result))) +} + macro_rules! get_sign { (+) => { 1 @@ -563,46 +837,138 @@ macro_rules! get_sign { }; } +#[derive(Clone, Copy)] +enum IntervalMode { + Milli, + Nano, +} + +/// This function computes subtracts `rhs_ts` from `lhs_ts`, taking timezones +/// into account when given. Units of the resulting interval is specified by +/// the argument `mode`. +/// The default behavior of Datafusion is the following: +/// - When subtracting timestamps at seconds/milliseconds precision, the output +/// interval will have the type [`IntervalDayTimeType`]. +/// - When subtracting timestamps at microseconds/nanoseconds precision, the +/// output interval will have the type [`IntervalMonthDayNanoType`]. +fn ts_sub_to_interval( + lhs_ts: i64, + rhs_ts: i64, + lhs_tz: &Option, + rhs_tz: &Option, + mode: IntervalMode, +) -> Result { + let lhs_dt = with_timezone_to_naive_datetime(lhs_ts, lhs_tz, mode)?; + let rhs_dt = with_timezone_to_naive_datetime(rhs_ts, rhs_tz, mode)?; + let delta_secs = lhs_dt.signed_duration_since(rhs_dt); + + match mode { + IntervalMode::Milli => { + let as_millisecs = delta_secs.num_milliseconds(); + Ok(ScalarValue::new_interval_dt( + (as_millisecs / MILLISECS_IN_ONE_DAY) as i32, + (as_millisecs % MILLISECS_IN_ONE_DAY) as i32, + )) + } + IntervalMode::Nano => { + let as_nanosecs = delta_secs.num_nanoseconds().ok_or_else(|| { + DataFusionError::Execution(String::from( + "Can not compute timestamp differences with nanosecond precision", + )) + })?; + Ok(ScalarValue::new_interval_mdn( + 0, + (as_nanosecs / NANOSECS_IN_ONE_DAY) as i32, + as_nanosecs % NANOSECS_IN_ONE_DAY, + )) + } + } +} + +/// This function creates the [`NaiveDateTime`] object corresponding to the +/// given timestamp using the units (tick size) implied by argument `mode`. +#[inline] +fn with_timezone_to_naive_datetime( + ts: i64, + tz: &Option, + mode: IntervalMode, +) -> Result { + let datetime = if let IntervalMode::Milli = mode { + ticks_to_naive_datetime::<1_000_000>(ts) + } else { + ticks_to_naive_datetime::<1>(ts) + }?; + + if let Some(tz) = tz { + let parsed_tz: Tz = FromStr::from_str(tz).map_err(|_| { + DataFusionError::Execution("cannot parse given timezone".to_string()) + })?; + let offset = parsed_tz + .offset_from_local_datetime(&datetime) + .single() + .ok_or_else(|| { + DataFusionError::Execution( + "error conversion result of timezone offset".to_string(), + ) + })?; + return Ok(DateTime::::from_local(datetime, offset).naive_utc()); + } + Ok(datetime) +} + +/// This function creates the [`NaiveDateTime`] object corresponding to the +/// given timestamp, whose tick size is specified by `UNIT_NANOS`. +#[inline] +fn ticks_to_naive_datetime(ticks: i64) -> Result { + NaiveDateTime::from_timestamp_opt( + (ticks * UNIT_NANOS) / 1_000_000_000, + ((ticks * UNIT_NANOS) % 1_000_000_000) as u32, + ) + .ok_or_else(|| { + DataFusionError::Execution( + "Can not convert given timestamp to a NaiveDateTime".to_string(), + ) + }) +} + #[inline] pub fn date32_add(days: i32, scalar: &ScalarValue, sign: i32) -> Result { let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(); let prior = epoch.add(Duration::days(days as i64)); - let posterior = do_date_math(prior, scalar, sign)?; - Ok(posterior.sub(epoch).num_days() as i32) + do_date_math(prior, scalar, sign).map(|d| d.sub(epoch).num_days() as i32) } #[inline] pub fn date64_add(ms: i64, scalar: &ScalarValue, sign: i32) -> Result { let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(); let prior = epoch.add(Duration::milliseconds(ms)); - let posterior = do_date_math(prior, scalar, sign)?; - Ok(posterior.sub(epoch).num_milliseconds()) + do_date_math(prior, scalar, sign).map(|d| d.sub(epoch).num_milliseconds()) } #[inline] pub fn seconds_add(ts_s: i64, scalar: &ScalarValue, sign: i32) -> Result { - Ok(do_date_time_math(ts_s, 0, scalar, sign)?.timestamp()) + do_date_time_math(ts_s, 0, scalar, sign).map(|dt| dt.timestamp()) } #[inline] pub fn milliseconds_add(ts_ms: i64, scalar: &ScalarValue, sign: i32) -> Result { let secs = ts_ms / 1000; let nsecs = ((ts_ms % 1000) * 1_000_000) as u32; - Ok(do_date_time_math(secs, nsecs, scalar, sign)?.timestamp_millis()) + do_date_time_math(secs, nsecs, scalar, sign).map(|dt| dt.timestamp_millis()) } #[inline] pub fn microseconds_add(ts_us: i64, scalar: &ScalarValue, sign: i32) -> Result { let secs = ts_us / 1_000_000; let nsecs = ((ts_us % 1_000_000) * 1000) as u32; - Ok(do_date_time_math(secs, nsecs, scalar, sign)?.timestamp_nanos() / 1000) + do_date_time_math(secs, nsecs, scalar, sign).map(|dt| dt.timestamp_nanos() / 1000) } #[inline] pub fn nanoseconds_add(ts_ns: i64, scalar: &ScalarValue, sign: i32) -> Result { let secs = ts_ns / 1_000_000_000; let nsecs = (ts_ns % 1_000_000_000) as u32; - Ok(do_date_time_math(secs, nsecs, scalar, sign)?.timestamp_nanos()) + do_date_time_math(secs, nsecs, scalar, sign).map(|dt| dt.timestamp_nanos()) } #[inline] @@ -2921,6 +3287,7 @@ mod tests { use arrow::compute::kernels; use arrow::datatypes::ArrowPrimitiveType; + use rand::Rng; use crate::cast::{as_string_array, as_uint32_array, as_uint64_array}; use crate::from_slice::FromSlice; @@ -3707,6 +4074,53 @@ mod tests { ])), None ); + // Different type of intervals can be compared. + assert!( + IntervalYearMonth(Some(IntervalYearMonthType::make_value(1, 2))) + < IntervalMonthDayNano(Some(IntervalMonthDayNanoType::make_value( + 14, 0, 1 + ))), + ); + assert!( + IntervalYearMonth(Some(IntervalYearMonthType::make_value(0, 4))) + >= IntervalDayTime(Some(IntervalDayTimeType::make_value(119, 1))) + ); + assert!( + IntervalDayTime(Some(IntervalDayTimeType::make_value(12, 86_399_999))) + >= IntervalDayTime(Some(IntervalDayTimeType::make_value(12, 0))) + ); + assert!( + IntervalYearMonth(Some(IntervalYearMonthType::make_value(2, 12))) + == IntervalMonthDayNano(Some(IntervalMonthDayNanoType::make_value( + 36, 0, 0 + ))), + ); + assert!( + IntervalYearMonth(Some(IntervalYearMonthType::make_value(0, 0))) + != IntervalDayTime(Some(IntervalDayTimeType::make_value(0, 1))) + ); + assert!( + IntervalYearMonth(Some(IntervalYearMonthType::make_value(1, 4))) + == IntervalYearMonth(Some(IntervalYearMonthType::make_value(0, 16))), + ); + assert!( + IntervalYearMonth(Some(IntervalYearMonthType::make_value(0, 3))) + > IntervalMonthDayNano(Some(IntervalMonthDayNanoType::make_value( + 2, + 28, + 999_999_999 + ))), + ); + assert!( + IntervalYearMonth(Some(IntervalYearMonthType::make_value(0, 1))) + > IntervalDayTime(Some(IntervalDayTimeType::make_value(29, 9_999))), + ); + assert!( + IntervalMonthDayNano(Some(IntervalMonthDayNanoType::make_value(1, 12, 34))) + > IntervalMonthDayNano(Some(IntervalMonthDayNanoType::make_value( + 0, 142, 34 + ))) + ); } #[test] @@ -4486,4 +4900,513 @@ mod tests { assert!(distance.is_none()); } } + + #[test] + fn test_scalar_interval_add() { + let cases = [ + ( + ScalarValue::new_interval_ym(1, 12), + ScalarValue::new_interval_ym(1, 12), + ScalarValue::new_interval_ym(2, 24), + ), + ( + ScalarValue::new_interval_dt(1, 999), + ScalarValue::new_interval_dt(1, 999), + ScalarValue::new_interval_dt(2, 1998), + ), + ( + ScalarValue::new_interval_mdn(12, 15, 123_456), + ScalarValue::new_interval_mdn(12, 15, 123_456), + ScalarValue::new_interval_mdn(24, 30, 246_912), + ), + ( + ScalarValue::new_interval_ym(0, 1), + ScalarValue::new_interval_dt(29, 86_390), + ScalarValue::new_interval_mdn(1, 29, 86_390_000_000), + ), + ( + ScalarValue::new_interval_ym(0, 1), + ScalarValue::new_interval_mdn(2, 10, 999_999_999), + ScalarValue::new_interval_mdn(3, 10, 999_999_999), + ), + ( + ScalarValue::new_interval_dt(400, 123_456), + ScalarValue::new_interval_ym(1, 1), + ScalarValue::new_interval_mdn(13, 400, 123_456_000_000), + ), + ( + ScalarValue::new_interval_dt(65, 321), + ScalarValue::new_interval_mdn(2, 5, 1_000_000), + ScalarValue::new_interval_mdn(2, 70, 322_000_000), + ), + ( + ScalarValue::new_interval_mdn(12, 15, 123_456), + ScalarValue::new_interval_ym(2, 0), + ScalarValue::new_interval_mdn(36, 15, 123_456), + ), + ( + ScalarValue::new_interval_mdn(12, 15, 100_000), + ScalarValue::new_interval_dt(370, 1), + ScalarValue::new_interval_mdn(12, 385, 1_100_000), + ), + ]; + for (lhs, rhs, expected) in cases.iter() { + let result = lhs.add(rhs).unwrap(); + let result_commute = rhs.add(lhs).unwrap(); + assert_eq!(*expected, result, "lhs:{:?} + rhs:{:?}", lhs, rhs); + assert_eq!(*expected, result_commute, "lhs:{:?} + rhs:{:?}", rhs, lhs); + } + } + + #[test] + fn test_scalar_interval_sub() { + let cases = [ + ( + ScalarValue::new_interval_ym(1, 12), + ScalarValue::new_interval_ym(1, 12), + ScalarValue::new_interval_ym(0, 0), + ), + ( + ScalarValue::new_interval_dt(1, 999), + ScalarValue::new_interval_dt(1, 999), + ScalarValue::new_interval_dt(0, 0), + ), + ( + ScalarValue::new_interval_mdn(12, 15, 123_456), + ScalarValue::new_interval_mdn(12, 15, 123_456), + ScalarValue::new_interval_mdn(0, 0, 0), + ), + ( + ScalarValue::new_interval_ym(0, 1), + ScalarValue::new_interval_dt(29, 999_999), + ScalarValue::new_interval_mdn(1, -29, -999_999_000_000), + ), + ( + ScalarValue::new_interval_ym(0, 1), + ScalarValue::new_interval_mdn(2, 10, 999_999_999), + ScalarValue::new_interval_mdn(-1, -10, -999_999_999), + ), + ( + ScalarValue::new_interval_dt(400, 123_456), + ScalarValue::new_interval_ym(1, 1), + ScalarValue::new_interval_mdn(-13, 400, 123_456_000_000), + ), + ( + ScalarValue::new_interval_dt(65, 321), + ScalarValue::new_interval_mdn(2, 5, 1_000_000), + ScalarValue::new_interval_mdn(-2, 60, 320_000_000), + ), + ( + ScalarValue::new_interval_mdn(12, 15, 123_456), + ScalarValue::new_interval_ym(2, 0), + ScalarValue::new_interval_mdn(-12, 15, 123_456), + ), + ( + ScalarValue::new_interval_mdn(12, 15, 100_000), + ScalarValue::new_interval_dt(370, 1), + ScalarValue::new_interval_mdn(12, -355, -900_000), + ), + ]; + for (lhs, rhs, expected) in cases.iter() { + let result = lhs.sub(rhs).unwrap(); + assert_eq!(*expected, result, "lhs:{:?} - rhs:{:?}", lhs, rhs); + } + } + + #[test] + fn timestamp_op_tests() { + // positive interval, edge cases + let test_data = get_timestamp_test_data(1); + for (lhs, rhs, expected) in test_data.into_iter() { + assert_eq!(expected, lhs.sub(rhs).unwrap()) + } + + // negative interval, edge cases + let test_data = get_timestamp_test_data(-1); + for (rhs, lhs, expected) in test_data.into_iter() { + assert_eq!(expected, lhs.sub(rhs).unwrap()); + } + } + #[test] + fn timestamp_op_random_tests() { + // timestamp1 + (or -) interval = timestamp2 + // timestamp2 - timestamp1 (or timestamp1 - timestamp2) = interval ? + let sample_size = 1000000; + let timestamps1 = get_random_timestamps(sample_size); + let intervals = get_random_intervals(sample_size); + // ts(sec) + interval(ns) = ts(sec); however, + // ts(sec) - ts(sec) cannot be = interval(ns). Therefore, + // timestamps are more precise than intervals in tests. + for (idx, ts1) in timestamps1.iter().enumerate() { + if idx % 2 == 0 { + let timestamp2 = ts1.add(intervals[idx].clone()).unwrap(); + assert_eq!( + intervals[idx], + timestamp2.sub(ts1).unwrap(), + "index:{}, operands: {:?} (-) {:?}", + idx, + timestamp2, + ts1 + ); + } else { + let timestamp2 = ts1.sub(intervals[idx].clone()).unwrap(); + assert_eq!( + intervals[idx], + ts1.sub(timestamp2.clone()).unwrap(), + "index:{}, operands: {:?} (-) {:?}", + idx, + ts1, + timestamp2 + ); + }; + } + } + + fn get_timestamp_test_data( + sign: i32, + ) -> Vec<(ScalarValue, ScalarValue, ScalarValue)> { + vec![ + ( + // 1st test case, having the same time but different with timezones + // Since they are timestamps with nanosecond precision, expected type is + // [`IntervalMonthDayNanoType`] + ScalarValue::TimestampNanosecond( + Some( + NaiveDate::from_ymd_opt(2023, 1, 1) + .unwrap() + .and_hms_nano_opt(12, 0, 0, 000_000_000) + .unwrap() + .timestamp_nanos(), + ), + Some("+12:00".to_string()), + ), + ScalarValue::TimestampNanosecond( + Some( + NaiveDate::from_ymd_opt(2023, 1, 1) + .unwrap() + .and_hms_nano_opt(0, 0, 0, 000_000_000) + .unwrap() + .timestamp_nanos(), + ), + Some("+00:00".to_string()), + ), + ScalarValue::new_interval_mdn(0, 0, 0), + ), + // 2nd test case, january with 31 days plus february with 28 days, with timezone + ( + ScalarValue::TimestampMicrosecond( + Some( + NaiveDate::from_ymd_opt(2023, 3, 1) + .unwrap() + .and_hms_micro_opt(2, 0, 0, 000_000) + .unwrap() + .timestamp_micros(), + ), + Some("+01:00".to_string()), + ), + ScalarValue::TimestampMicrosecond( + Some( + NaiveDate::from_ymd_opt(2023, 1, 1) + .unwrap() + .and_hms_micro_opt(0, 0, 0, 000_000) + .unwrap() + .timestamp_micros(), + ), + Some("-01:00".to_string()), + ), + ScalarValue::new_interval_mdn(0, sign * 59, 0), + ), + // 3rd test case, 29-days long february minus previous, year with timezone + ( + ScalarValue::TimestampMillisecond( + Some( + NaiveDate::from_ymd_opt(2024, 2, 29) + .unwrap() + .and_hms_milli_opt(10, 10, 0, 000) + .unwrap() + .timestamp_millis(), + ), + Some("+10:10".to_string()), + ), + ScalarValue::TimestampMillisecond( + Some( + NaiveDate::from_ymd_opt(2023, 12, 31) + .unwrap() + .and_hms_milli_opt(1, 0, 0, 000) + .unwrap() + .timestamp_millis(), + ), + Some("+01:00".to_string()), + ), + ScalarValue::new_interval_dt(sign * 60, 0), + ), + // 4th test case, leap years occur mostly every 4 years, but every 100 years + // we skip a leap year unless the year is divisible by 400, so 31 + 28 = 59 + ( + ScalarValue::TimestampSecond( + Some( + NaiveDate::from_ymd_opt(2100, 3, 1) + .unwrap() + .and_hms_opt(0, 0, 0) + .unwrap() + .timestamp(), + ), + Some("-11:59".to_string()), + ), + ScalarValue::TimestampSecond( + Some( + NaiveDate::from_ymd_opt(2100, 1, 1) + .unwrap() + .and_hms_opt(23, 58, 0) + .unwrap() + .timestamp(), + ), + Some("+11:59".to_string()), + ), + ScalarValue::new_interval_dt(sign * 59, 0), + ), + // 5th test case, without timezone positively seemed, but with timezone, + // negative resulting interval + ( + ScalarValue::TimestampMillisecond( + Some( + NaiveDate::from_ymd_opt(2023, 1, 1) + .unwrap() + .and_hms_milli_opt(6, 00, 0, 000) + .unwrap() + .timestamp_millis(), + ), + Some("+06:00".to_string()), + ), + ScalarValue::TimestampMillisecond( + Some( + NaiveDate::from_ymd_opt(2023, 1, 1) + .unwrap() + .and_hms_milli_opt(0, 0, 0, 000) + .unwrap() + .timestamp_millis(), + ), + Some("-12:00".to_string()), + ), + ScalarValue::new_interval_dt(0, sign * -43_200_000), + ), + // 6th test case, no problem before unix epoch beginning + ( + ScalarValue::TimestampMicrosecond( + Some( + NaiveDate::from_ymd_opt(1970, 1, 1) + .unwrap() + .and_hms_micro_opt(1, 2, 3, 15) + .unwrap() + .timestamp_micros(), + ), + None, + ), + ScalarValue::TimestampMicrosecond( + Some( + NaiveDate::from_ymd_opt(1969, 1, 1) + .unwrap() + .and_hms_micro_opt(0, 0, 0, 000_000) + .unwrap() + .timestamp_micros(), + ), + None, + ), + ScalarValue::new_interval_mdn( + 0, + 365 * sign, + sign as i64 * 3_723_000_015_000, + ), + ), + // 7th test case, no problem with big intervals + ( + ScalarValue::TimestampNanosecond( + Some( + NaiveDate::from_ymd_opt(2100, 1, 1) + .unwrap() + .and_hms_nano_opt(0, 0, 0, 0) + .unwrap() + .timestamp_nanos(), + ), + None, + ), + ScalarValue::TimestampNanosecond( + Some( + NaiveDate::from_ymd_opt(2000, 1, 1) + .unwrap() + .and_hms_nano_opt(0, 0, 0, 000_000_000) + .unwrap() + .timestamp_nanos(), + ), + None, + ), + ScalarValue::new_interval_mdn(0, sign * 36525, 0), + ), + // 8th test case, no problem detecting 366-days long years + ( + ScalarValue::TimestampSecond( + Some( + NaiveDate::from_ymd_opt(2041, 1, 1) + .unwrap() + .and_hms_opt(0, 0, 0) + .unwrap() + .timestamp(), + ), + None, + ), + ScalarValue::TimestampSecond( + Some( + NaiveDate::from_ymd_opt(2040, 1, 1) + .unwrap() + .and_hms_opt(0, 0, 0) + .unwrap() + .timestamp(), + ), + None, + ), + ScalarValue::new_interval_dt(sign * 366, 0), + ), + // 9th test case, no problem with unrealistic timezones + ( + ScalarValue::TimestampSecond( + Some( + NaiveDate::from_ymd_opt(2023, 1, 3) + .unwrap() + .and_hms_opt(0, 0, 0) + .unwrap() + .timestamp(), + ), + Some("+23:59".to_string()), + ), + ScalarValue::TimestampSecond( + Some( + NaiveDate::from_ymd_opt(2023, 1, 1) + .unwrap() + .and_hms_opt(0, 2, 0) + .unwrap() + .timestamp(), + ), + Some("-23:59".to_string()), + ), + ScalarValue::new_interval_dt(0, 0), + ), + // 10th test case, parsing different types of timezone input + ( + ScalarValue::TimestampSecond( + Some( + NaiveDate::from_ymd_opt(2023, 3, 17) + .unwrap() + .and_hms_opt(14, 10, 0) + .unwrap() + .timestamp(), + ), + Some("Europe/Istanbul".to_string()), + ), + ScalarValue::TimestampSecond( + Some( + NaiveDate::from_ymd_opt(2023, 3, 17) + .unwrap() + .and_hms_opt(4, 10, 0) + .unwrap() + .timestamp(), + ), + Some("America/Los_Angeles".to_string()), + ), + ScalarValue::new_interval_dt(0, 0), + ), + ] + } + + fn get_random_timestamps(sample_size: u64) -> Vec { + let vector_size = sample_size; + let mut timestamp = vec![]; + let mut rng = rand::thread_rng(); + for i in 0..vector_size { + let year = rng.gen_range(1995..=2050); + let month = rng.gen_range(1..=12); + let day = rng.gen_range(1..=28); // to exclude invalid dates + let hour = rng.gen_range(0..=23); + let minute = rng.gen_range(0..=59); + let second = rng.gen_range(0..=59); + if i % 4 == 0 { + timestamp.push(ScalarValue::TimestampSecond( + Some( + NaiveDate::from_ymd_opt(year, month, day) + .unwrap() + .and_hms_opt(hour, minute, second) + .unwrap() + .timestamp(), + ), + None, + )) + } else if i % 4 == 1 { + let millisec = rng.gen_range(0..=999); + timestamp.push(ScalarValue::TimestampMillisecond( + Some( + NaiveDate::from_ymd_opt(year, month, day) + .unwrap() + .and_hms_milli_opt(hour, minute, second, millisec) + .unwrap() + .timestamp_millis(), + ), + None, + )) + } else if i % 4 == 2 { + let microsec = rng.gen_range(0..=999_999); + timestamp.push(ScalarValue::TimestampMicrosecond( + Some( + NaiveDate::from_ymd_opt(year, month, day) + .unwrap() + .and_hms_micro_opt(hour, minute, second, microsec) + .unwrap() + .timestamp_micros(), + ), + None, + )) + } else if i % 4 == 3 { + let nanosec = rng.gen_range(0..=999_999_999); + timestamp.push(ScalarValue::TimestampNanosecond( + Some( + NaiveDate::from_ymd_opt(year, month, day) + .unwrap() + .and_hms_nano_opt(hour, minute, second, nanosec) + .unwrap() + .timestamp_nanos(), + ), + None, + )) + } + } + timestamp + } + + fn get_random_intervals(sample_size: u64) -> Vec { + let vector_size = sample_size; + let mut intervals = vec![]; + let mut rng = rand::thread_rng(); + const SECS_IN_ONE_DAY: i32 = 86_400; + const MICROSECS_IN_ONE_DAY: i64 = 86_400_000_000; + for i in 0..vector_size { + if i % 4 == 0 { + let days = rng.gen_range(0..5000); + // to not break second precision + let millis = rng.gen_range(0..SECS_IN_ONE_DAY) * 1000; + intervals.push(ScalarValue::new_interval_dt(days, millis)); + } else if i % 4 == 1 { + let days = rng.gen_range(0..5000); + let millisec = rng.gen_range(0..(MILLISECS_IN_ONE_DAY as i32)); + intervals.push(ScalarValue::new_interval_dt(days, millisec)); + } else if i % 4 == 2 { + let days = rng.gen_range(0..5000); + // to not break microsec precision + let nanosec = rng.gen_range(0..MICROSECS_IN_ONE_DAY) * 1000; + intervals.push(ScalarValue::new_interval_mdn(0, days, nanosec)); + } else { + let days = rng.gen_range(0..5000); + let nanosec = rng.gen_range(0..NANOSECS_IN_ONE_DAY); + intervals.push(ScalarValue::new_interval_mdn(0, days, nanosec)); + } + } + intervals + } }