diff --git a/dozer-cli/Cargo.toml b/dozer-cli/Cargo.toml index ead43df48e..4f35b24589 100644 --- a/dozer-cli/Cargo.toml +++ b/dozer-cli/Cargo.toml @@ -68,3 +68,5 @@ snowflake = ["dozer-ingestion/snowflake"] mongodb = ["dozer-ingestion/mongodb"] onnx = ["dozer-sql/onnx"] tokio-console = ["dozer-tracing/tokio-console"] +javascript = ["dozer-ingestion/javascript", "dozer-sql/javascript"] +datafusion = ["dozer-ingestion/datafusion"] diff --git a/dozer-ingestion/tests/test_suite/mod.rs b/dozer-ingestion/tests/test_suite/mod.rs index 309fe6642a..58ee1af7e5 100644 --- a/dozer-ingestion/tests/test_suite/mod.rs +++ b/dozer-ingestion/tests/test_suite/mod.rs @@ -51,6 +51,6 @@ pub use basic::{ #[cfg(feature = "mongodb")] pub use connectors::MongodbConnectorTest; -pub use connectors::PostgresConnectorTest; #[cfg(feature = "datafusion")] -pub use LocalStorageObjectStoreConnectorTest; +pub use connectors::LocalStorageObjectStoreConnectorTest; +pub use connectors::PostgresConnectorTest; diff --git a/dozer-sql/Cargo.toml b/dozer-sql/Cargo.toml index 6a86af5523..00d9aa1183 100644 --- a/dozer-sql/Cargo.toml +++ b/dozer-sql/Cargo.toml @@ -27,3 +27,4 @@ proptest = "1.3.1" [features] python = ["dozer-sql-expression/python"] onnx = ["dozer-sql-expression/onnx"] +javascript = ["dozer-sql-expression/javascript"] diff --git a/dozer-sql/expression/src/builder.rs b/dozer-sql/expression/src/builder.rs index 1d78330931..19a6d25028 100644 --- a/dozer-sql/expression/src/builder.rs +++ b/dozer-sql/expression/src/builder.rs @@ -547,7 +547,7 @@ impl ExpressionBuilder { } UdfType::JavaScript(config) => { - #[cfg(feature = "javasscript")] + #[cfg(feature = "javascript")] { self.parse_javascript_udf( function_name.clone(), @@ -562,7 +562,7 @@ impl ExpressionBuilder { #[cfg(not(feature = "javascript"))] { let _ = config; - Err(Error::OnnxNotEnabled) + Err(Error::JavaScriptNotEnabled) } } }; diff --git a/dozer-sql/expression/src/error.rs b/dozer-sql/expression/src/error.rs index 1fd7a9bef1..2b923ff8bf 100644 --- a/dozer-sql/expression/src/error.rs +++ b/dozer-sql/expression/src/error.rs @@ -103,7 +103,7 @@ pub enum Error { #[error("Javascript is not enabled")] JavaScriptNotEnabled, - #[cfg(feature = "javasscript")] + #[cfg(feature = "javascript")] #[error("JavaScript UDF error: {0}")] JavaScript(#[from] crate::javascript::Error), diff --git a/dozer-sql/expression/src/scalar/string.rs b/dozer-sql/expression/src/scalar/string.rs index b8680105f6..28589eabca 100644 --- a/dozer-sql/expression/src/scalar/string.rs +++ b/dozer-sql/expression/src/scalar/string.rs @@ -407,42 +407,54 @@ pub(crate) fn evaluate_substr( let arg_value = arg_field.to_string(); let position_field = position.evaluate(record, schema)?; - let position_result = position_field.to_uint(); - if position_result.is_none() { - return Err(Error::InvalidFunctionArgument { + let position_value = position_field + .to_int() + .ok_or_else(|| Error::InvalidFunctionArgument { function_name: "SUBSTR".to_string(), argument_index: 1, argument: position_field, - }); - } - let position_value = position_result.unwrap(); + })?; + // 0 is treated as 1 + let position_value_normalized = if position_value == 0 { + 1 + } else { + position_value + }; let length_value = match length { Some(length_expr) => { let length_field = length_expr.evaluate(record, schema)?; - let length_result = length_field.to_i128(); - if length_result.is_none() { - return Err(Error::InvalidFunctionArgument { + let length = length_field + .to_int() + .ok_or_else(|| Error::InvalidFunctionArgument { function_name: "SUBSTR".to_string(), argument_index: 2, argument: length_field, - }); + })?; + if length < 1 { + return Ok(Field::Null); } - length_result.unwrap() + length as usize } - None => arg_value.len() as i128, + None => arg_value.len(), }; - let mut iter = arg_value.char_indices(); - let (start, _) = iter - .nth(position_value as usize) - .unwrap_or((arg_value.len(), ' ')); - let (end, _) = iter - .nth(length_value as usize) - .unwrap_or((arg_value.len(), ' ')); - let result = &arg_value[start..end]; + let start = if position_value_normalized >= 1 { + arg_value + .char_indices() + .nth(position_value_normalized as usize - 1) + .map_or(arg_value.len(), |(i, _)| i) + } else { + arg_value + .char_indices() + .nth_back((-position_value_normalized) as usize - 1) + .map_or(0, |(i, _)| i) + }; - Ok(Field::String(result.to_owned())) + let remainder = &arg_value[start..]; + Ok(Field::String( + remainder.chars().take(length_value).collect(), + )) } pub fn validate_replace(args: &[Expression], schema: &Schema) -> Result { diff --git a/json_schemas/connections.json b/json_schemas/connections.json index c929085d1a..c860d813bd 100644 --- a/json_schemas/connections.json +++ b/json_schemas/connections.json @@ -7,12 +7,12 @@ "description": "Configuration for a Postgres connection", "examples": [ { - "database": "postgres", - "host": "localhost", + "user": "postgres", "password": "postgres", + "host": "localhost", "port": 5432, - "schema": "public", - "user": "postgres" + "database": "postgres", + "schema": "public" } ], "type": "object", @@ -97,11 +97,11 @@ { "provider": { "Log": { + "wss_url": "", "filter": { "from_block": 0, "to_block": null - }, - "wss_url": "" + } } } } @@ -266,12 +266,12 @@ "title": "GrpcConfig", "examples": [ { - "adapter": "arrow", "host": "localhost", "port": 50051, "schemas": { "Path": "schema.json" - } + }, + "adapter": "arrow" } ], "type": "object", @@ -342,15 +342,15 @@ "title": "SnowflakeConfig", "examples": [ { - "database": "database", - "driver": "SnowflakeDSIIDriver", - "password": "password", - "port": "443", - "role": "role", - "schema": "schema", "server": "..snowflakecomputing.com", + "port": "443", "user": "bob", - "warehouse": "warehouse" + "password": "password", + "database": "database", + "schema": "schema", + "warehouse": "warehouse", + "driver": "SnowflakeDSIIDriver", + "role": "role" } ], "type": "object", @@ -439,16 +439,16 @@ { "details": { "access_key_id": "", - "bucket_name": "", + "secret_access_key": "", "region": "", - "secret_access_key": "" + "bucket_name": "" }, "tables": [ { "config": { "CSV": { - "extension": ".csv", - "path": "path/to/file" + "path": "path/to/file", + "extension": ".csv" } }, "name": "table_name" @@ -598,8 +598,8 @@ { "config": { "CSV": { - "extension": ".csv", - "path": "path/to/table" + "path": "path/to/table", + "extension": ".csv" } }, "name": "table_name" @@ -732,8 +732,8 @@ { "tables": [ { - "name": "", - "path": "" + "path": "", + "name": "" } ] } @@ -797,8 +797,8 @@ "title": "MySQLConfig", "examples": [ { - "server_id": 1, - "url": "mysql://root:1234@localhost:3306/db_name" + "url": "mysql://root:1234@localhost:3306/db_name", + "server_id": 1 } ], "type": "object", diff --git a/json_schemas/dozer.json b/json_schemas/dozer.json index 884daad8ac..82c69b1431 100644 --- a/json_schemas/dozer.json +++ b/json_schemas/dozer.json @@ -129,9 +129,9 @@ }, "replication": { "default": { - "datacenter": "esp", "server_address": "0.0.0.0", - "server_port": 5929 + "server_port": 5929, + "datacenter": "esp" }, "allOf": [ { @@ -829,8 +829,8 @@ { "tables": [ { - "name": "", - "path": "" + "path": "", + "name": "" } ] } @@ -968,11 +968,11 @@ { "provider": { "Log": { + "wss_url": "", "filter": { "from_block": 0, "to_block": null - }, - "wss_url": "" + } } } } @@ -1217,12 +1217,12 @@ "GrpcConfig": { "examples": [ { - "adapter": "arrow", "host": "localhost", "port": 50051, "schemas": { "Path": "schema.json" - } + }, + "adapter": "arrow" } ], "type": "object", @@ -1355,8 +1355,8 @@ { "config": { "CSV": { - "extension": ".csv", - "path": "path/to/table" + "path": "path/to/table", + "extension": ".csv" } }, "name": "table_name" @@ -1400,8 +1400,8 @@ "MySQLConfig": { "examples": [ { - "server_id": 1, - "url": "mysql://root:1234@localhost:3306/db_name" + "url": "mysql://root:1234@localhost:3306/db_name", + "server_id": 1 } ], "type": "object", @@ -1597,12 +1597,12 @@ "description": "Configuration for a Postgres connection", "examples": [ { - "database": "postgres", - "host": "localhost", + "user": "postgres", "password": "postgres", + "host": "localhost", "port": 5432, - "schema": "public", - "user": "postgres" + "database": "postgres", + "schema": "public" } ], "type": "object", @@ -1768,16 +1768,16 @@ { "details": { "access_key_id": "", - "bucket_name": "", + "secret_access_key": "", "region": "", - "secret_access_key": "" + "bucket_name": "" }, "tables": [ { "config": { "CSV": { - "extension": ".csv", - "path": "path/to/file" + "path": "path/to/file", + "extension": ".csv" } }, "name": "table_name" @@ -1873,15 +1873,15 @@ "SnowflakeConfig": { "examples": [ { - "database": "database", - "driver": "SnowflakeDSIIDriver", - "password": "password", - "port": "443", - "role": "role", - "schema": "schema", "server": "..snowflakecomputing.com", + "port": "443", "user": "bob", - "warehouse": "warehouse" + "password": "password", + "database": "database", + "schema": "schema", + "warehouse": "warehouse", + "driver": "SnowflakeDSIIDriver", + "role": "role" } ], "type": "object", @@ -2140,20 +2140,20 @@ "WebhookConfig": { "examples": [ { + "host": "localhost", + "port": 50059, "endpoints": [ { "path": "/ingest", - "schema": { - "Inline": "\n {\n \"users\": {\n \"schema\": {\n \"fields\": [\n {\n \"name\": \"id\",\n \"typ\": \"Int\",\n \"nullable\": false\n },\n {\n \"name\": \"name\",\n \"typ\": \"String\",\n \"nullable\": true\n },\n {\n \"name\": \"json\",\n \"typ\": \"Json\",\n \"nullable\": true\n }\n ]\n }\n }\n }\n " - }, "verbs": [ "POST", "DELETE" - ] + ], + "schema": { + "Inline": "\n {\n \"users\": {\n \"schema\": {\n \"fields\": [\n {\n \"name\": \"id\",\n \"typ\": \"Int\",\n \"nullable\": false\n },\n {\n \"name\": \"name\",\n \"typ\": \"String\",\n \"nullable\": true\n },\n {\n \"name\": \"json\",\n \"typ\": \"Json\",\n \"nullable\": true\n }\n ]\n }\n }\n }\n " + } } - ], - "host": "localhost", - "port": 50059 + ] } ], "type": "object", @@ -2215,13 +2215,13 @@ "examples": [ { "path": "/ingest", - "schema": { - "Inline": "\n {\n \"users\": {\n \"schema\": {\n \"fields\": [\n {\n \"name\": \"id\",\n \"typ\": \"Int\",\n \"nullable\": false\n },\n {\n \"name\": \"name\",\n \"typ\": \"String\",\n \"nullable\": true\n },\n {\n \"name\": \"json\",\n \"typ\": \"Json\",\n \"nullable\": true\n }\n ]\n }\n }\n }\n " - }, "verbs": [ "POST", "DELETE" - ] + ], + "schema": { + "Inline": "\n {\n \"users\": {\n \"schema\": {\n \"fields\": [\n {\n \"name\": \"id\",\n \"typ\": \"Int\",\n \"nullable\": false\n },\n {\n \"name\": \"name\",\n \"typ\": \"String\",\n \"nullable\": true\n },\n {\n \"name\": \"json\",\n \"typ\": \"Json\",\n \"nullable\": true\n }\n ]\n }\n }\n }\n " + } } ], "type": "object",