diff --git a/crates/catalog/hms/src/catalog.rs b/crates/catalog/hms/src/catalog.rs index 9512194d0..4ca66bd21 100644 --- a/crates/catalog/hms/src/catalog.rs +++ b/crates/catalog/hms/src/catalog.rs @@ -56,8 +56,7 @@ pub enum HmsThriftTransport { pub struct HmsCatalogConfig { address: String, thrift_transport: HmsThriftTransport, - #[builder(default, setter(strip_option))] - warehouse: Option, + warehouse: String, #[builder(default)] props: HashMap, } @@ -68,6 +67,7 @@ struct HmsClient(ThriftHiveMetastoreClient); pub struct HmsCatalog { config: HmsCatalogConfig, client: HmsClient, + file_io: FileIO, } impl Debug for HmsCatalog { @@ -105,11 +105,20 @@ impl HmsCatalog { .build(), }; + let file_io = FileIO::from_path(&config.warehouse)? + .with_props(&config.props) + .build()?; + Ok(Self { config, client: HmsClient(client), + file_io, }) } + /// Get the catalogs `FileIO` + pub fn file_io(&self) -> FileIO { + self.file_io.clone() + } } #[async_trait] @@ -333,17 +342,18 @@ impl Catalog for HmsCatalog { Some(location) => location.clone(), None => { let ns = self.get_namespace(namespace).await?; - get_default_table_location(&ns, &table_name, self.config.warehouse.clone())? + get_default_table_location(&ns, &table_name, &self.config.warehouse) } }; let metadata = TableMetadataBuilder::from_table_creation(creation)?.build()?; let metadata_location = create_metadata_location(&location, 0)?; - let file_io = FileIO::from_path(&metadata_location)? - .with_props(&self.config.props) - .build()?; - let mut file = file_io.new_output(&metadata_location)?.writer().await?; + let mut file = self + .file_io + .new_output(&metadata_location)? + .writer() + .await?; file.write_all(&serde_json::to_vec(&metadata)?).await?; file.shutdown().await?; @@ -363,7 +373,7 @@ impl Catalog for HmsCatalog { .map_err(from_thrift_error)?; let table = Table::builder() - .file_io(file_io) + .file_io(self.file_io()) .metadata_location(metadata_location) .metadata(metadata) .identifier(TableIdent::new(NamespaceIdent::new(db_name), table_name)) @@ -396,16 +406,13 @@ impl Catalog for HmsCatalog { let metadata_location = get_metadata_location(&hive_table.parameters)?; - let file_io = FileIO::from_path(&metadata_location)? - .with_props(&self.config.props) - .build()?; - let mut reader = file_io.new_input(&metadata_location)?.reader().await?; + let mut reader = self.file_io.new_input(&metadata_location)?.reader().await?; let mut metadata_str = String::new(); reader.read_to_string(&mut metadata_str).await?; let metadata = serde_json::from_str::(&metadata_str)?; let table = Table::builder() - .file_io(file_io) + .file_io(self.file_io()) .metadata_location(metadata_location) .metadata(metadata) .identifier(TableIdent::new( diff --git a/crates/catalog/hms/src/utils.rs b/crates/catalog/hms/src/utils.rs index 0653de79f..04ee5d4b3 100644 --- a/crates/catalog/hms/src/utils.rs +++ b/crates/catalog/hms/src/utils.rs @@ -231,24 +231,16 @@ pub(crate) fn validate_namespace(namespace: &NamespaceIdent) -> Result { pub(crate) fn get_default_table_location( namespace: &Namespace, table_name: impl AsRef, - warehouse: Option, -) -> Result { + warehouse: impl AsRef, +) -> String { let properties = namespace.properties(); let location = match properties.get(LOCATION) { - Some(location) => location.to_string(), - None => match warehouse { - Some(location) => location, - None => { - return Err(Error::new( - ErrorKind::DataInvalid, - "No default path is set, please specify a location when creating a table", - )) - } - }, + Some(location) => location, + None => warehouse.as_ref(), }; - Ok(format!("{}/{}", location, table_name.as_ref())) + format!("{}/{}", location, table_name.as_ref()) } /// Create metadata location from `location` and `version` @@ -443,11 +435,7 @@ mod tests { let table_name = "my_table"; let expected = "db_location/my_table"; - let result = get_default_table_location( - &namespace, - table_name, - Some("warehouse_location".to_string()), - )?; + let result = get_default_table_location(&namespace, table_name, "warehouse_location"); assert_eq!(expected, result); @@ -460,27 +448,13 @@ mod tests { let table_name = "my_table"; let expected = "warehouse_location/my_table"; - let result = get_default_table_location( - &namespace, - table_name, - Some("warehouse_location".to_string()), - )?; + let result = get_default_table_location(&namespace, table_name, "warehouse_location"); assert_eq!(expected, result); Ok(()) } - #[test] - fn test_get_default_table_location_missing() { - let namespace = Namespace::new(NamespaceIdent::new("default".into())); - let table_name = "my_table"; - - let result = get_default_table_location(&namespace, table_name, None); - - assert!(result.is_err()); - } - #[test] fn test_convert_to_namespace() -> Result<()> { let properties = HashMap::from([ diff --git a/crates/catalog/hms/tests/hms_catalog_test.rs b/crates/catalog/hms/tests/hms_catalog_test.rs index 3ebc80d62..a48d0568c 100644 --- a/crates/catalog/hms/tests/hms_catalog_test.rs +++ b/crates/catalog/hms/tests/hms_catalog_test.rs @@ -19,9 +19,7 @@ use std::collections::HashMap; -use iceberg::io::{ - FileIO, FileIOBuilder, S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_REGION, S3_SECRET_ACCESS_KEY, -}; +use iceberg::io::{S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_REGION, S3_SECRET_ACCESS_KEY}; use iceberg::spec::{NestedField, PrimitiveType, Schema, Type}; use iceberg::{Catalog, Namespace, NamespaceIdent, TableCreation, TableIdent}; use iceberg_catalog_hms::{HmsCatalog, HmsCatalogConfig, HmsThriftTransport}; @@ -37,7 +35,6 @@ type Result = std::result::Result; struct TestFixture { _docker_compose: DockerCompose, hms_catalog: HmsCatalog, - file_io: FileIO, } async fn set_test_fixture(func: &str) -> TestFixture { @@ -73,11 +70,6 @@ async fn set_test_fixture(func: &str) -> TestFixture { (S3_REGION.to_string(), "us-east-1".to_string()), ]); - let file_io = FileIOBuilder::new("s3a") - .with_props(&props) - .build() - .unwrap(); - let config = HmsCatalogConfig::builder() .address(format!("{}:{}", hms_catalog_ip, HMS_CATALOG_PORT)) .thrift_transport(HmsThriftTransport::Buffered) @@ -90,7 +82,6 @@ async fn set_test_fixture(func: &str) -> TestFixture { TestFixture { _docker_compose: docker_compose, hms_catalog, - file_io, } } @@ -219,7 +210,8 @@ async fn test_create_table() -> Result<()> { .is_some_and(|location| location.starts_with("s3a://warehouse/hive/metadata/00000-"))); assert!( fixture - .file_io + .hms_catalog + .file_io() .is_exist("s3a://warehouse/hive/metadata/") .await? );