Skip to content

Commit

Permalink
Merge pull request apache#381 from Parquet/fix_concurency_problem
Browse files Browse the repository at this point in the history
fix metadata concurency problem
  • Loading branch information
tsdeng committed Apr 29, 2014
2 parents 76d05fa + 9a38aec commit 6aed528
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 33 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/**
* Copyright 2014 Twitter, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package parquet.hadoop.metadata;

import java.util.concurrent.ConcurrentHashMap;

/**
* returns canonical representation of objects (similar to String.intern()) to save memory
* if a.equals(b) then canonicalize(a) == canonicalize(b)
* this class is thread safe
* @author Julien Le Dem
*
* @param <T>
*/
public class Canonicalizer<T> {

private ConcurrentHashMap<T, T> canonicals = new ConcurrentHashMap<T, T>();

/**
* @param value the value to canonicalize
* @return the corresponding canonical value
*/
final public T canonicalize(T value) {
T canonical = canonicals.get(value);
if (canonical == null) {
value = toCanonical(value);
T existing = canonicals.putIfAbsent(value, value);
// putIfAbsent is atomic, making sure we always return the same canonical representation of the value
if (existing == null) {
canonical = value;
} else {
canonical = existing;
}
}
return canonical;
}

/**
* @param value the value to canonicalize if needed
* @return the canonicalized value
*/
protected T toCanonical(T value) {
return value;
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -16,25 +16,17 @@
package parquet.hadoop.metadata;

import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;

import parquet.column.Encoding;
import parquet.schema.PrimitiveType.PrimitiveTypeName;

public class ColumnChunkProperties {

private static Map<ColumnChunkProperties, ColumnChunkProperties> cache = new HashMap<ColumnChunkProperties, ColumnChunkProperties>();
private static Canonicalizer<ColumnChunkProperties> properties = new Canonicalizer<ColumnChunkProperties>();

public static ColumnChunkProperties get(ColumnPath path, PrimitiveTypeName type, CompressionCodecName codec, Set<Encoding> encodings) {
ColumnChunkProperties key = new ColumnChunkProperties(codec, path, type, encodings);
ColumnChunkProperties cached = cache.get(key);
if (cached == null) {
cached = key;
cache.put(key, cached);
}
return cached;
return properties.canonicalize(new ColumnChunkProperties(codec, path, type, encodings));
}

private final CompressionCodecName codec;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,25 +16,22 @@
package parquet.hadoop.metadata;

import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

public final class ColumnPath implements Iterable<String> {

private static Map<ColumnPath, ColumnPath> paths = new HashMap<ColumnPath, ColumnPath>();

public static ColumnPath get(String... path){
ColumnPath key = new ColumnPath(path);
ColumnPath cached = paths.get(key);
if (cached == null) {
for (int i = 0; i < path.length; i++) {
path[i] = path[i].intern();
private static Canonicalizer<ColumnPath> paths = new Canonicalizer<ColumnPath>() {
protected ColumnPath toCanonical(ColumnPath value) {
String[] path = new String[value.p.length];
for (int i = 0; i < value.p.length; i++) {
path[i] = value.p[i].intern();
}
cached = key;
paths.put(key, cached);
return new ColumnPath(path);
}
return cached;
};

public static ColumnPath get(String... path){
return paths.canonicalize(new ColumnPath(path));
}

private final String[] p;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,25 +16,17 @@
package parquet.hadoop.metadata;

import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import parquet.column.Encoding;

public class EncodingList implements Iterable<Encoding> {

private static Map<EncodingList, EncodingList> encodingLists = new HashMap<EncodingList, EncodingList>();
private static Canonicalizer<EncodingList> encodingLists = new Canonicalizer<EncodingList>();

public static EncodingList getEncodingList(List<Encoding> encodings) {
EncodingList key = new EncodingList(encodings);
EncodingList cached = encodingLists.get(key);
if (cached == null) {
cached = key;
encodingLists.put(key, cached);
}
return cached;
return encodingLists.canonicalize(new EncodingList(encodings));
}

private final List<Encoding> encodings;
Expand Down

0 comments on commit 6aed528

Please sign in to comment.