Skip to content

Commit

Permalink
refactoring VariantBuilder
Browse files Browse the repository at this point in the history
  • Loading branch information
aihuaxu committed Feb 3, 2025
1 parent b2290a0 commit 38e6f2f
Show file tree
Hide file tree
Showing 8 changed files with 129 additions and 95 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,12 @@ public VariantArrayBuilder(ByteBufferWrapper valueBuffer, Dictionary dict) {

public VariantObjectBuilder startObject() {
addOffset();
return new VariantObjectBuilder(valueBuffer, dict);
return new VariantObjectBuilder(valueBuffer(), dict());
}

public VariantArrayBuilder startArray() {
addOffset();
return new VariantArrayBuilder(valueBuffer, dict);
return new VariantArrayBuilder(valueBuffer(), dict());
}

public VariantArrayBuilder writeNull() {
Expand All @@ -56,9 +56,9 @@ public VariantArrayBuilder writeBoolean(boolean value) {
return this;
}

public VariantArrayBuilder writeNumeric(long value) {
public VariantArrayBuilder writeIntegral(long value) {
addOffset();
writeNumericInternal(value);
writeIntegralInternal(value);
return this;
}

Expand Down Expand Up @@ -111,10 +111,10 @@ public VariantArrayBuilder writeString(String str) {
}

private void addOffset() {
offsets.add(valueBuffer.pos() - startPos);
offsets.add(valueBuffer().pos() - startPos());
}

public void endArray() {
super.endArray(startPos, offsets);
super.endArray(startPos(), offsets);
}
}
118 changes: 56 additions & 62 deletions core/src/main/java/org/apache/iceberg/variants/VariantBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,17 @@
*/
package org.apache.iceberg.variants;

import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonToken;
import com.fasterxml.jackson.core.exc.InputCoercionException;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.math.BigDecimal;
import java.util.List;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.util.JsonUtil;

/** A builder class to build a primitive/array/object variant. */
public class VariantBuilder extends VariantBuilderBase {
Expand All @@ -36,35 +37,34 @@ public VariantBuilder() {
}

public VariantPrimitiveBuilder createPrimitive() {
return new VariantPrimitiveBuilder(valueBuffer, dict);
return new VariantPrimitiveBuilder(valueBuffer(), dict());
}

public VariantObjectBuilder startObject() {
return new VariantObjectBuilder(valueBuffer, dict);
return new VariantObjectBuilder(valueBuffer(), dict());
}

public VariantArrayBuilder startArray() {
return new VariantArrayBuilder(valueBuffer, dict);
return new VariantArrayBuilder(valueBuffer(), dict());
}

/**
* Parses a JSON string and constructs a Variant object.
*
* @param json The JSON string to parse.
* @return The constructed Variant object.
* @throws IOException If an error occurs while reading or parsing the JSON.
*/
public static Variant parseJson(String json) throws IOException {
public static Variant parseJson(String json) {
Preconditions.checkArgument(
json != null && !json.isEmpty(), "Input JSON string cannot be null or empty.");

try (JsonParser parser = new JsonFactory().createParser(json)) {
try (JsonParser parser = JsonUtil.factory().createParser(json)) {
parser.nextToken();

VariantBuilder builder = new VariantBuilder();
builder.parseJson(parser);

return builder.build();
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}

Expand All @@ -77,99 +77,93 @@ private void parseJson(JsonParser parser) throws IOException {

switch (token) {
case START_OBJECT:
writeObject(parser);
parseObject(parser);
break;
case START_ARRAY:
writeArray(parser);
break;
case VALUE_STRING:
writeStringInternal(parser.getText());
break;
case VALUE_NUMBER_INT:
writeInteger(parser);
break;
case VALUE_NUMBER_FLOAT:
writeFloat(parser);
break;
case VALUE_TRUE:
writeBooleanInternal(true);
break;
case VALUE_FALSE:
writeBooleanInternal(false);
break;
case VALUE_NULL:
writeNullInternal();
parseArray(parser);
break;
default:
throw new JsonParseException(parser, "Unexpected token " + token);
parsePrimitive(parser);
}
}

private void writeObject(JsonParser parser) throws IOException {
private void parseObject(JsonParser parser) throws IOException {
List<VariantBuilderBase.FieldEntry> fields = Lists.newArrayList();
int startPos = valueBuffer.pos();
int startPos = valueBuffer().pos();

// Store object keys to dictionary of metadata
while (parser.nextToken() != JsonToken.END_OBJECT) {
String key = parser.currentName();
parser.nextToken(); // Move to the value

int id = dict.add(key);
fields.add(new VariantBuilderBase.FieldEntry(key, id, valueBuffer.pos() - startPos));
int id = dict().add(key);
fields.add(new VariantBuilderBase.FieldEntry(key, id, valueBuffer().pos() - startPos));
parseJson(parser);
}

endObject(startPos, fields);
}

private void writeArray(JsonParser parser) throws IOException {
private void parseArray(JsonParser parser) throws IOException {
List<Integer> offsets = Lists.newArrayList();
int startPos = valueBuffer.pos();
int startPos = valueBuffer().pos();

while (parser.nextToken() != JsonToken.END_ARRAY) {
offsets.add(valueBuffer.pos() - startPos);
offsets.add(valueBuffer().pos() - startPos);
parseJson(parser);
}

endArray(startPos, offsets);
}

private void writeInteger(JsonParser parser) throws IOException {
try {
writeNumericInternal(parser.getLongValue());
} catch (InputCoercionException ignored) {
writeFloat(parser); // Fallback for large integers
}
}
private void parsePrimitive(JsonParser parser) throws IOException {
JsonToken token = parser.currentToken();

private void writeFloat(JsonParser parser) throws IOException {
if (!tryWriteDecimal(parser.getText())) {
writeDoubleInternal(parser.getDoubleValue());
switch (token) {
case VALUE_STRING:
writeStringInternal(parser.getText());
break;
case VALUE_NUMBER_INT:
try {
writeIntegralInternal(parser.getLongValue());
} catch (InputCoercionException ignored) {
writeFloatValue(parser);
}
break;
case VALUE_NUMBER_FLOAT:
writeFloatValue(parser);
break;
case VALUE_TRUE:
writeBooleanInternal(true);
break;
case VALUE_FALSE:
writeBooleanInternal(false);
break;
case VALUE_NULL:
writeNullInternal();
break;
default:
throw new JsonParseException(parser, "Unexpected token " + token);
}
}

/**
* This function attempts to parse a JSON number and write it as a decimal value.
* This function attempts to write floating number in decimal format to store the exact value if
* it fits in the decimal for Variant; otherwise, write as a double value.
*
* @param input the input string expecting to be in decimal format, not in scientific notation.
* @return true if the decimal is valid and written successfully; false otherwise.
* @param parser instance of JSONParser with the current token to be floating number
*/
private boolean tryWriteDecimal(String input) {
// Validate that the input matches a decimal format and is not in scientific notation.
if (!input.matches("-?\\d+(\\.\\d+)?")) {
return false;
}

// Parse the input string to BigDecimal.
private void writeFloatValue(JsonParser parser) throws IOException {
String input = parser.getText();
BigDecimal decimalValue = new BigDecimal(input);

// Ensure the decimal value meets precision and scale limits.
if (decimalValue.scale() <= MAX_DECIMAL16_PRECISION
// Decimal values only support a scale in [0, 38] and a precision <= 38
if (decimalValue.scale() >= 0
&& decimalValue.scale() <= MAX_DECIMAL16_PRECISION
&& decimalValue.precision() <= MAX_DECIMAL16_PRECISION) {
writeDecimalInternal(decimalValue);
return true;
} else {
writeDoubleInternal(parser.getDoubleValue());
}

return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,21 @@ abstract class VariantBuilderBase {
private static final int MAX_DECIMAL8_PRECISION = 18;
protected static final int MAX_DECIMAL16_PRECISION = 38;

protected final ByteBufferWrapper valueBuffer;
protected final Dictionary dict;
protected int startPos;
private final ByteBufferWrapper valueBuffer;
private final Dictionary dict;
private int startPos;

ByteBufferWrapper valueBuffer() {
return valueBuffer;
}

Dictionary dict() {
return dict;
}

int startPos() {
return startPos;
}

VariantBuilderBase(ByteBufferWrapper valueBuffer, Dictionary dict) {
this.valueBuffer = valueBuffer;
Expand Down Expand Up @@ -101,12 +113,12 @@ protected void writeBooleanInternal(boolean value) {
}

/**
* Writes a numeric value to the variant builder, automatically choosing the smallest type (INT8,
* INT16, INT32, or INT64) to store the value efficiently.
* Writes an integral value to the variant builder, automatically choosing the smallest type
* (INT8, INT16, INT32, or INT64) to store the value efficiently.
*
* @param value The numeric value to append.
* @param value The integral value to append.
*/
protected void writeNumericInternal(long value) {
protected void writeIntegralInternal(long value) {
if (value == (byte) value) {
valueBuffer.writePrimitive(Variants.PhysicalType.INT8, (byte) value);
} else if (value == (short) value) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,17 +36,17 @@ public class VariantObjectBuilder extends VariantBuilderBase {

public VariantObjectBuilder startObject(String key) {
writeKey(key);
return new VariantObjectBuilder(valueBuffer, dict);
return new VariantObjectBuilder(valueBuffer(), dict());
}

public VariantArrayBuilder startArray(String key) {
writeKey(key);
return new VariantArrayBuilder(valueBuffer, dict);
return new VariantArrayBuilder(valueBuffer(), dict());
}

private void writeKey(String key) {
int id = dict.add(key);
fields.add(new FieldEntry(key, id, valueBuffer.pos() - startPos));
int id = dict().add(key);
fields.add(new FieldEntry(key, id, valueBuffer().pos() - startPos()));
}

public VariantObjectBuilder writeNull(String key) {
Expand All @@ -61,9 +61,9 @@ public VariantObjectBuilder writeBoolean(String key, boolean value) {
return this;
}

public VariantObjectBuilder writeNumeric(String key, long value) {
public VariantObjectBuilder writeIntegral(String key, long value) {
writeKey(key);
writeNumericInternal(value);
writeIntegralInternal(value);
return this;
}

Expand Down Expand Up @@ -116,6 +116,6 @@ public VariantObjectBuilder writeString(String key, String value) {
}

public void endObject() {
super.endObject(startPos, fields);
super.endObject(startPos(), fields);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ public VariantPrimitiveBuilder writeBoolean(boolean value) {
return this;
}

public VariantPrimitiveBuilder writeNumeric(long value) {
writeNumericInternal(value);
public VariantPrimitiveBuilder writeIntegral(long value) {
writeIntegralInternal(value);
return this;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ public void testSimpleArrayJson() throws IOException {
}

@Test
public void testArrayJson() throws IOException {
public void testArrayJson() {
String input =
"[{\n"
+ " \"firstName\": \"John\","
Expand Down Expand Up @@ -91,10 +91,10 @@ public void testBuildArray() {
.writeNull()
.writeBoolean(true)
.writeBoolean(false)
.writeNumeric(34)
.writeNumeric(1234)
.writeNumeric(1234567890)
.writeNumeric(1234567890987654321L)
.writeIntegral(34)
.writeIntegral(1234)
.writeIntegral(1234567890)
.writeIntegral(1234567890987654321L)
.writeDouble(1234e-2)
.writeDecimal(new BigDecimal("123456.789"))
.writeDecimal(new BigDecimal("123456789.987654321"))
Expand All @@ -112,7 +112,7 @@ public void testBuildArray() {
.startObject()
.writeString("firstName", "John")
.writeString("lastName", "Smith")
.writeNumeric("age", 25)
.writeIntegral("age", 25)
.endObject();
builder.endArray();

Expand Down
Loading

0 comments on commit 38e6f2f

Please sign in to comment.