Skip to content

Commit

Permalink
API, Core: Support default values in UpdateSchema.
Browse files Browse the repository at this point in the history
  • Loading branch information
rdblue committed Feb 9, 2025
1 parent 81c4aee commit 0823872
Show file tree
Hide file tree
Showing 7 changed files with 671 additions and 138 deletions.
202 changes: 183 additions & 19 deletions api/src/main/java/org/apache/iceberg/UpdateSchema.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import java.util.Collection;
import org.apache.iceberg.exceptions.CommitFailedException;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
import org.apache.iceberg.types.Type;

Expand Down Expand Up @@ -49,14 +50,16 @@ public interface UpdateSchema extends PendingUpdate<Schema> {
UpdateSchema allowIncompatibleChanges();

/**
* Add a new top-level column.
* Add a new optional top-level column.
*
* <p>Because "." may be interpreted as a column path separator or may be used in field names, it
* is not allowed in names passed to this method. To add to nested structures or to add fields
* with names that contain ".", use {@link #addColumn(String, String, Type)}.
*
* <p>If type is a nested type, its field IDs are reassigned when added to the existing schema.
*
* <p>The added column will be optional with a null default value.
*
* @param name name for the new column
* @param type type for the new column
* @return this for method chaining
Expand All @@ -67,24 +70,52 @@ default UpdateSchema addColumn(String name, Type type) {
}

/**
* Add a new top-level column.
* Add a new optional top-level column.
*
* <p>Because "." may be interpreted as a column path separator or may be used in field names, it
* is not allowed in names passed to this method. To add to nested structures or to add fields
* with names that contain ".", use {@link #addColumn(String, String, Type)}.
*
* <p>If type is a nested type, its field IDs are reassigned when added to the existing schema.
*
* <p>The added column will be optional with a null default value.
*
* @param name name for the new column
* @param type type for the new column
* @param doc documentation string for the new column
* @return this for method chaining
* @throws IllegalArgumentException If name contains "."
*/
UpdateSchema addColumn(String name, Type type, String doc);
default UpdateSchema addColumn(String name, Type type, String doc) {
return addColumn(name, type, doc, null);
}

/**
* Add a new column to a nested struct.
* Add a new optional top-level column.
*
* <p>Because "." may be interpreted as a column path separator or may be used in field names, it
* is not allowed in names passed to this method. To add to nested structures or to add fields
* with names that contain ".", use {@link #addColumn(String, String, Type)}.
*
* <p>If type is a nested type, its field IDs are reassigned when added to the existing schema.
*
* @param name name for the new column
* @param type type for the new column
* @param doc documentation string for the new column
* @param defaultValue a default value for the column in existing rows
* @return this for method chaining
* @throws IllegalArgumentException If name contains "."
*/
default UpdateSchema addColumn(String name, Type type, String doc, Object defaultValue) {
Preconditions.checkArgument(
!name.contains("."),
"Cannot add column with ambiguous name: %s, use addColumn(parent, name, type)",
name);
return addColumn(null, name, type, doc, defaultValue);
}

/**
* Add a new optional column to a nested struct.
*
* <p>The parent name is used to find the parent using {@link Schema#findField(String)}. If the
* parent name is null, the new column will be added to the root as a top-level column. If parent
Expand All @@ -97,6 +128,8 @@ default UpdateSchema addColumn(String name, Type type) {
*
* <p>If type is a nested type, its field IDs are reassigned when added to the existing schema.
*
* <p>The added column will be optional with a null default value.
*
* @param parent name of the parent struct to the column will be added to
* @param name name for the new column
* @param type type for the new column
Expand All @@ -108,7 +141,7 @@ default UpdateSchema addColumn(String parent, String name, Type type) {
}

/**
* Add a new column to a nested struct.
* Add a new optional column to a nested struct.
*
* <p>The parent name is used to find the parent using {@link Schema#findField(String)}. If the
* parent name is null, the new column will be added to the root as a top-level column. If parent
Expand All @@ -121,20 +154,51 @@ default UpdateSchema addColumn(String parent, String name, Type type) {
*
* <p>If type is a nested type, its field IDs are reassigned when added to the existing schema.
*
* <p>The added column will be optional with a null default value.
*
* @param parent name of the parent struct to the column will be added to
* @param name name for the new column
* @param type type for the new column
* @param doc documentation string for the new column
* @return this for method chaining
* @throws IllegalArgumentException If parent doesn't identify a struct
*/
UpdateSchema addColumn(String parent, String name, Type type, String doc);
default UpdateSchema addColumn(String parent, String name, Type type, String doc) {
return addColumn(parent, name, type, doc, null);
}

/**
* Add a new optional column to a nested struct.
*
* <p>The parent name is used to find the parent using {@link Schema#findField(String)}. If the
* parent name is null, the new column will be added to the root as a top-level column. If parent
* identifies a struct, a new column is added to that struct. If it identifies a list, the column
* is added to the list element struct, and if it identifies a map, the new column is added to the
* map's value struct.
*
* <p>The given name is used to name the new column and names containing "." are not handled
* differently.
*
* <p>If type is a nested type, its field IDs are reassigned when added to the existing schema.
*
* @param parent name of the parent struct to the column will be added to
* @param name name for the new column
* @param type type for the new column
* @param doc documentation string for the new column
* @param defaultValue a default value for the column in existing rows
* @return this for method chaining
* @throws IllegalArgumentException If parent doesn't identify a struct
*/
UpdateSchema addColumn(String parent, String name, Type type, String doc, Object defaultValue);

/**
* Add a new required top-level column.
*
* <p>This is an incompatible change that can break reading older data. This method will result in
* an exception unless {@link #allowIncompatibleChanges()} has been called.
* <p>Adding a required column without a default is an incompatible change that can break reading
* older data. To make this a compatible change, add a default value by calling {@link
* #updateColumnDefault(String, Object)} or use {@link #addRequiredColumn(String, Type,
* String, Object)} instead. To suppress exceptions thrown when an incompatible change is
* detected, call {@link #allowIncompatibleChanges()}.
*
* <p>Because "." may be interpreted as a column path separator or may be used in field names, it
* is not allowed in names passed to this method. To add to nested structures or to add fields
Expand All @@ -154,8 +218,11 @@ default UpdateSchema addRequiredColumn(String name, Type type) {
/**
* Add a new required top-level column.
*
* <p>This is an incompatible change that can break reading older data. This method will result in
* an exception unless {@link #allowIncompatibleChanges()} has been called.
* <p>Adding a required column without a default is an incompatible change that can break reading
* older data. To make this a compatible change, add a default value by calling {@link
* #updateColumnDefault(String, Object)} or use {@link #addRequiredColumn(String, Type,
* String, Object)} instead. To suppress exceptions thrown when an incompatible change is
* detected, call {@link #allowIncompatibleChanges()}.
*
* <p>Because "." may be interpreted as a column path separator or may be used in field names, it
* is not allowed in names passed to this method. To add to nested structures or to add fields
Expand All @@ -169,13 +236,41 @@ default UpdateSchema addRequiredColumn(String name, Type type) {
* @return this for method chaining
* @throws IllegalArgumentException If name contains "."
*/
UpdateSchema addRequiredColumn(String name, Type type, String doc);
default UpdateSchema addRequiredColumn(String name, Type type, String doc) {
return addRequiredColumn(name, type, doc, null);
}

/**
* Add a new required top-level column.
*
* <p>This is an incompatible change that can break reading older data. This method will result in
* an exception unless {@link #allowIncompatibleChanges()} has been called.
* <p>Because "." may be interpreted as a column path separator or may be used in field names, it
* is not allowed in names passed to this method. To add to nested structures or to add fields
* with names that contain ".", use {@link #addRequiredColumn(String, String, Type)}.
*
* <p>If type is a nested type, its field IDs are reassigned when added to the existing schema.
*
* @param name name for the new column
* @param type type for the new column
* @param doc documentation string for the new column
* @return this for method chaining
* @throws IllegalArgumentException If name contains "."
*/
default UpdateSchema addRequiredColumn(String name, Type type, String doc, Object defaultValue) {
Preconditions.checkArgument(
!name.contains("."),
"Cannot add column with ambiguous name: %s, use addColumn(parent, name, type)",
name);
return addRequiredColumn(null, name, type, doc, defaultValue);
}

/**
* Add a new required column to a nested struct.
*
* <p>Adding a required column without a default is an incompatible change that can break reading
* older data. To make this a compatible change, add a default value by calling {@link
* #updateColumnDefault(String, Object)} or use {@link #addRequiredColumn(String, String, Type,
* String, Object)} instead. To suppress exceptions thrown when an incompatible change is
* detected, call {@link #allowIncompatibleChanges()}.
*
* <p>The parent name is used to find the parent using {@link Schema#findField(String)}. If the
* parent name is null, the new column will be added to the root as a top-level column. If parent
Expand All @@ -199,10 +294,38 @@ default UpdateSchema addRequiredColumn(String parent, String name, Type type) {
}

/**
* Add a new required top-level column.
* Add a new required column to a nested struct.
*
* <p>This is an incompatible change that can break reading older data. This method will result in
* an exception unless {@link #allowIncompatibleChanges()} has been called.
* <p>Adding a required column without a default is an incompatible change that can break reading
* older data. To make this a compatible change, add a default value by calling {@link
* #updateColumnDefault(String, Object)} or use {@link #addRequiredColumn(String, String, Type,
* String, Object)} instead. To suppress exceptions thrown when an incompatible change is
* detected, call {@link #allowIncompatibleChanges()}.
*
* <p>The parent name is used to find the parent using {@link Schema#findField(String)}. If the
* parent name is null, the new column will be added to the root as a top-level column. If parent
* identifies a struct, a new column is added to that struct. If it identifies a list, the column
* is added to the list element struct, and if it identifies a map, the new column is added to the
* map's value struct.
*
* <p>The given name is used to name the new column and names containing "." are not handled
* differently.
*
* <p>If type is a nested type, its field IDs are reassigned when added to the existing schema.
*
* @param parent name of the parent struct to the column will be added to
* @param name name for the new column
* @param type type for the new column
* @param doc documentation string for the new column
* @return this for method chaining
* @throws IllegalArgumentException If parent doesn't identify a struct
*/
default UpdateSchema addRequiredColumn(String parent, String name, Type type, String doc) {
return addRequiredColumn(parent, name, type, doc, null);
}

/**
* Add a new required top-level column.
*
* <p>The parent name is used to find the parent using {@link Schema#findField(String)}. If the
* parent name is null, the new column will be added to the root as a top-level column. If parent
Expand All @@ -219,10 +342,12 @@ default UpdateSchema addRequiredColumn(String parent, String name, Type type) {
* @param name name for the new column
* @param type type for the new column
* @param doc documentation string for the new column
* @param defaultValue a default value for the column in existing rows
* @return this for method chaining
* @throws IllegalArgumentException If parent doesn't identify a struct
*/
UpdateSchema addRequiredColumn(String parent, String name, Type type, String doc);
UpdateSchema addRequiredColumn(
String parent, String name, Type type, String doc, Object defaultValue);

/**
* Rename a column in the schema.
Expand Down Expand Up @@ -280,6 +405,30 @@ default UpdateSchema updateColumn(String name, Type.PrimitiveType newType, Strin
return updateColumn(name, newType).updateColumnDoc(name, newDoc);
}

/**
* Update a column in the schema to a new primitive type.
*
* <p>The name is used to find the column to update using {@link Schema#findField(String)}.
*
* <p>Only updates that widen types are allowed.
*
* <p>Columns may be updated and renamed in the same schema update.
*
* @param name name of the column to rename
* @param newType replacement type for the column
* @param newDoc replacement documentation string for the column
* @return this for method chaining
* @throws IllegalArgumentException If name doesn't identify a column in the schema or if this
* change introduces a type incompatibility or if it conflicts with other additions, renames,
* or updates.
*/
default UpdateSchema updateColumn(
String name, Type.PrimitiveType newType, String newDoc, Object newDefaultValue) {
return updateColumn(name, newType)
.updateColumnDoc(name, newDoc)
.updateColumnDefault(name, newDefaultValue);
}

/**
* Update the documentation string for a column.
*
Expand All @@ -294,15 +443,30 @@ default UpdateSchema updateColumn(String name, Type.PrimitiveType newType, Strin
UpdateSchema updateColumnDoc(String name, String newDoc);

/**
* Update a column to optional.
* Update the default value for a column.
*
* <p>The name is used to find the column to update using {@link Schema#findField(String)}.
*
* <p>Note: Changing the default value for a column does not alter existing rows.
*
* @param name name of the column to update the default value for
* @param newDefault replacement default value for the column
* @return this for method chaining
* @throws IllegalArgumentException If name doesn't identify a column in the schema or if the
* column will be deleted
*/
UpdateSchema updateColumnDefault(String name, Object newDefault);

/**
* Update a column to be optional.
*
* @param name name of the column to mark optional
* @return this for method chaining
*/
UpdateSchema makeColumnOptional(String name);

/**
* Update a column to required.
* Update a column to be required.
*
* <p>This is an incompatible change that can break reading older data. This method will result in
* an exception unless {@link #allowIncompatibleChanges()} has been called.
Expand Down
31 changes: 29 additions & 2 deletions api/src/main/java/org/apache/iceberg/types/Types.java
Original file line number Diff line number Diff line change
Expand Up @@ -559,15 +559,22 @@ public static Builder optional(String name) {
return new Builder(true, name);
}

public static Builder builder() {
return new Builder();
}

public static class Builder {
private final boolean isOptional;
private final String name;
private boolean isOptional = true;
private String name = null;
private Integer id = null;
private Type type = null;
private String doc = null;
private Object initialDefault = null;
private Object writeDefault = null;

private Builder() {
}

private Builder(boolean isFieldOptional, String fieldName) {
isOptional = isFieldOptional;
name = fieldName;
Expand All @@ -583,6 +590,26 @@ private Builder(NestedField toCopy) {
this.writeDefault = toCopy.writeDefault;
}

public Builder asRequired() {
this.isOptional = false;
return this;
}

public Builder asOptional() {
this.isOptional = true;
return this;
}

public Builder isOptional(boolean fieldIsOptional) {
this.isOptional = fieldIsOptional;
return this;
}

public Builder withName(String fieldName) {
this.name = fieldName;
return this;
}

public Builder withId(int fieldId) {
id = fieldId;
return this;
Expand Down
Loading

0 comments on commit 0823872

Please sign in to comment.