diff --git a/giraph-gora/conf/edge.avsc b/giraph-gora/conf/edge.avsc new file mode 100644 index 000000000..c5caeb1f4 --- /dev/null +++ b/giraph-gora/conf/edge.avsc @@ -0,0 +1,12 @@ +{ + "type": "record", + "name": "GEdge", + "namespace": "org.apache.giraph.gora.generated", + "fields" : [ + {"name": "edgeId", "type": "string"}, + {"name": "edgeWeight", "type": "float"}, + {"name": "vertexInId", "type": "string"}, + {"name": "vertexOutId", "type": "string"}, + {"name": "label", "type": "string"} + ] +} diff --git a/giraph-gora/conf/gora-cassandra-mapping.xml b/giraph-gora/conf/gora-cassandra-mapping.xml new file mode 100644 index 000000000..85079ddc5 --- /dev/null +++ b/giraph-gora/conf/gora-cassandra-mapping.xml @@ -0,0 +1,42 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/giraph-gora/conf/gora-hbase-mapping.xml b/giraph-gora/conf/gora-hbase-mapping.xml new file mode 100644 index 000000000..fd40e307e --- /dev/null +++ b/giraph-gora/conf/gora-hbase-mapping.xml @@ -0,0 +1,63 @@ + + + + + + + + +
+ + + + + + + + + +
+ + + + + + + + + +
+ + + + + + + + + + +
+ + + + + + + +
\ No newline at end of file diff --git a/giraph-gora/conf/gora.properties b/giraph-gora/conf/gora.properties new file mode 100644 index 000000000..d59faad01 --- /dev/null +++ b/giraph-gora/conf/gora.properties @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# FOR HBASE DATASTORE +gora.datastore.default=org.apache.gora.hbase.store.HBaseStore + +# FOR CASSANDRA DATASTORE +#gora.datastore.default=org.apache.gora.cassandra.CassandraStore + +# FOR DYNAMO DATASTORE +#gora.datastore.default=org.apache.gora.dynamodb.store.DynamoDBStore +#gora.datastore.autocreateschema=true + +#preferred.schema.name=person +#gora.dynamodb.client=sync +#gora.dynamodb.consistent.reads=true +#gora.dynamodb.endpoint=http://dynamodb.us-east-1.amazonaws.com/ \ No newline at end of file diff --git a/giraph-gora/conf/vertex.json b/giraph-gora/conf/vertex.json new file mode 100644 index 000000000..9f435fa15 --- /dev/null +++ b/giraph-gora/conf/vertex.json @@ -0,0 +1,18 @@ +{ + "type": "record", + "name": "Vertex", + "namespace": "org.apache.giraph.gora.generated", + "fields" : [ + {"name": "vertexId", "type": "long"}, + {"name": "value", "type": "float"}, + {"name": "edges", "type": {"type":"array", "items": { + "name": "Edge", + "type": "record", + "namespace": "org.apache.giraph.gora.generated", + "fields": [ + {"name": "vertexId", "type": "long"}, + {"name": "edgeValue", "type": "float"} + ] + }}} + ] +} \ No newline at end of file diff --git a/giraph-gora/conf/zoo.cfg b/giraph-gora/conf/zoo.cfg new file mode 100644 index 000000000..f4cec329a --- /dev/null +++ b/giraph-gora/conf/zoo.cfg @@ -0,0 +1,4 @@ +tickTime=20000 +dataDir=/var/zookeeper +clientPort=2181 +maxClientCnxns=300 \ No newline at end of file diff --git a/giraph-gora/executeCompile.sh b/giraph-gora/executeCompile.sh new file mode 100644 index 000000000..1976e1fd2 --- /dev/null +++ b/giraph-gora/executeCompile.sh @@ -0,0 +1,31 @@ +#! /bin/bash +mvn clean compile package -DskipTests +cp conf/gora-cassandra-mapping.xml target/. +cp conf/gora-hbase-mapping.xml target/. +cp conf/gora.properties target/. +cp conf/zoo.cfg target/. + +#/Users/keshann/Documents/workspace/workspaceGiraph/giraph/giraph-gora/conf/zoo.cfg + +################################### +echo "[INFO] Initializing classpath" +################################### +export HADOOP_CLASSPATH=/Users/keshann/Documents/workspace/workspaceGiraph/giraph2/giraph-examples/target/giraph-examples-1.1.0-SNAPSHOT-for-hadoop-0.20.203.0-jar-with-dependencies.jar:/Users/keshann/Documents/workspace/workspaceGiraph/giraph2/giraph-gora/target/giraph-gora-1.1.0-SNAPSHOT-jar-with-dependencies.jar:/Users/keshann/Documents/workspace/workspaceGiraph/gora-trunk99/gora-hbase/lib/* + + +################################### +echo "[INFO] Cleaning up" +################################### +/Users/keshann/Documents/Apache/Hadoop/hadoop-0.20.203.0/bin/hadoop fs -rmr shortestPathsOutputGraph + +################################### +echo "[INFO] Executing Hadoop" +################################### +#/Users/keshann/Documents/Apache/Hadoop/hadoop-0.20.203.0/bin/hadoop jar /Users/keshann/Documents/workspace/workspaceGiraph/giraph/giraph-examples/target/giraph-examples-1.1.0-SNAPSHOT-for-hadoop-0.20.203.0-jar-with-dependencies.jar org.apache.giraph.GiraphRunner -files /Users/keshann/Documents/workspace/workspaceGiraph/giraph/giraph-gora/target/gora-cassandra-mapping.xml,/Users/keshann/Documents/workspace/workspaceGiraph/giraph/giraph-gora/target/gora.properties,/Users/keshann/Documents/Apache/HBase/hbase-0.90.4/conf/hbase-site.xml,/Users/keshann/Documents/workspace/workspaceGiraph/giraph/giraph-gora/target/gora-hbase-mapping.xml -Dgiraph.metrics.enable=true -Dio.serializations=org.apache.hadoop.io.serializer.WritableSerialization,org.apache.hadoop.io.serializer.JavaSerialization -Dgiraph.gora.datastore.class=org.apache.gora.hbase.store.HBaseStore -Dgiraph.gora.key.class=java.lang.String -Dgiraph.gora.persistent.class=org.apache.giraph.io.gora.generated.GVertex -Dgiraph.gora.start.key=1 -Dgiraph.gora.end.key=101 -Dgiraph.gora.keys.factory.class=org.apache.giraph.io.gora.utils.DefaultKeyFactory -Dgiraph.gora.output.datastore.class=org.apache.gora.hbase.store.HBaseStore -Dgiraph.gora.output.key.class=java.lang.String -Dgiraph.gora.output.persistent.class=org.apache.giraph.io.gora.generated.GVertex -libjars /Users/keshann/Documents/workspace/workspaceGiraph/giraph/giraph-gora/target/giraph-gora-1.1.0-SNAPSHOT-jar-with-dependencies.jar,/Users/keshann/Documents/workspace/workspaceGiraph/gora-trunk99/gora-hbase/target/gora-hbase-0.3.jar,/Users/keshann/Documents/workspace/workspaceGiraph/gora-trunk99/gora-hbase/lib/hbase-0.90.4.jar org.apache.giraph.examples.SimpleShortestPathsComputation -vif org.apache.giraph.io.gora.GoraGVertexVertexInputFormat -vof org.apache.giraph.io.gora.GoraGVertexVertexOutputFormat -op shortestPathsOutputGraph -w 1 + +/Users/keshann/Documents/Apache/Hadoop/hadoop-0.20.203.0/bin/hadoop jar /Users/keshann/Documents/workspace/workspaceGiraph/giraph2/giraph-examples/target/giraph-examples-1.1.0-SNAPSHOT-for-hadoop-0.20.203.0-jar-with-dependencies.jar org.apache.giraph.GiraphRunner -files /Users/keshann/Documents/workspace/workspaceGiraph/gora-trunk99/gora-hbase/lib/jackson-core-asl-1.6.9.jar,/Users/keshann/Documents/workspace/workspaceGiraph/gora-trunk99/gora-hbase/lib/jackson-jaxrs-1.5.5.jar,/Users/keshann/Documents/workspace/workspaceGiraph/gora-trunk99/gora-hbase/lib/jackson-mapper-asl-1.6.9.jar,/Users/keshann/Documents/workspace/workspaceGiraph/gora-trunk99/gora-hbase/lib/jackson-xc-1.5.5.jar,/Users/keshann/Documents/workspace/workspaceGiraph/giraph2/giraph-gora/target/gora-cassandra-mapping.xml,/Users/keshann/Documents/workspace/workspaceGiraph/giraph2/giraph-gora/target/gora.properties,/Users/keshann/Documents/Apache/HBase/hbase-0.90.4/conf/hbase-site.xml,/Users/keshann/Documents/workspace/workspaceGiraph/giraph2/giraph-gora/target/gora-hbase-mapping.xml -Dgiraph.metrics.enable=true -Dio.serializations=org.apache.hadoop.io.serializer.WritableSerialization,org.apache.hadoop.io.serializer.JavaSerialization -Dgiraph.gora.datastore.class=org.apache.gora.hbase.store.HBaseStore -Dgiraph.gora.key.class=java.lang.String -Dgiraph.gora.persistent.class=org.apache.giraph.io.gora.generated.GVertex -Dgiraph.gora.keys.factory.class=org.apache.giraph.io.gora.utils.DefaultKeyFactory -Dgiraph.gora.output.datastore.class=org.apache.gora.hbase.store.HBaseStore -Dgiraph.gora.output.key.class=java.lang.String -Dgiraph.gora.output.persistent.class=org.apache.giraph.io.gora.generated.GVertexResult -libjars /Users/keshann/Documents/workspace/workspaceGiraph/giraph2/giraph-gora/target/giraph-gora-1.1.0-SNAPSHOT-jar-with-dependencies.jar,/Users/keshann/Documents/workspace/workspaceGiraph/gora-trunk99/gora-hbase/target/gora-hbase-0.3.jar,/Users/keshann/Documents/workspace/workspaceGiraph/gora-trunk99/gora-hbase/lib/hbase-0.90.4.jar org.apache.giraph.examples.SimpleShortestPathsComputation -vif org.apache.giraph.io.gora.GoraGVertexVertexInputFormat -vof org.apache.giraph.io.gora.GoraGVertexVertexOutputFormat -op shortestPathsOutputGraph -w 1 + +################################### +#echo "[INFO] Cleaning up" +################################### +#./Users/keshann/Documents/Apache/Hadoop/hadoop-0.20.203.0/bin/hadoop fs -rmr shortestPathsOutputGraph diff --git a/giraph-gora/pom.xml b/giraph-gora/pom.xml new file mode 100644 index 000000000..b5698e03b --- /dev/null +++ b/giraph-gora/pom.xml @@ -0,0 +1,130 @@ + + + + 4.0.0 + + + org.apache.giraph + giraph-parent + 1.1.0-SNAPSHOT + + giraph-gora + jar + + Apache Giraph Gora I/O + http://gora.apache.org/giraph-gora/ + Giraph Gora input/output classes + + + ${project.basedir}/.. + + + + + + org.apache.maven.plugins + maven-assembly-plugin + + + org.apache.maven.plugins + maven-checkstyle-plugin + + + org.apache.maven.plugins + maven-jar-plugin + + + org.apache.maven.plugins + maven-javadoc-plugin + + + org.apache.maven.plugins + maven-site-plugin + + ${project.basedir}/src/site + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.6 + + + + prop.jarLocation + ${top.dir}/giraph-core/target/giraph-${project.version}-${forHadoop}-jar-with-dependencies.jar + + + + + + org.codehaus.mojo + findbugs-maven-plugin + + + + + + + + commons-cli + commons-cli + + + commons-io + commons-io + + + org.apache.giraph + giraph-core + + + org.apache.giraph + giraph-core + test-jar + + + org.apache.gora + gora-core + + + org.apache.gora + gora-hbase + 0.3 + + + xerces + xercesImpl + 2.9.1 + + + xalan + xalan + 2.7.1 + + + + junit + junit + test + + + diff --git a/giraph-gora/src/main/assembly/compile.xml b/giraph-gora/src/main/assembly/compile.xml new file mode 100644 index 000000000..6acf679c0 --- /dev/null +++ b/giraph-gora/src/main/assembly/compile.xml @@ -0,0 +1,39 @@ + + + jar-with-dependencies + + jar + + false + + + + true + / + + + META-INF/LICENSE + + + true + runtime + + + \ No newline at end of file diff --git a/giraph-gora/src/main/java/org/apache/giraph/io/gora/GoraEdgeInputFormat.java b/giraph-gora/src/main/java/org/apache/giraph/io/gora/GoraEdgeInputFormat.java new file mode 100644 index 000000000..3647e3e21 --- /dev/null +++ b/giraph-gora/src/main/java/org/apache/giraph/io/gora/GoraEdgeInputFormat.java @@ -0,0 +1,409 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.io.gora; + +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_DATASTORE_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_END_KEY; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_KEYS_FACTORY_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_KEY_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_PERSISTENT_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_START_KEY; + +import java.io.IOException; +import java.util.List; + +import org.apache.giraph.edge.Edge; +import org.apache.giraph.io.EdgeInputFormat; +import org.apache.giraph.io.EdgeReader; +import org.apache.giraph.io.gora.utils.ExtraGoraInputFormat; +import org.apache.giraph.io.gora.utils.GoraUtils; +import org.apache.giraph.io.gora.utils.KeyFactory; +import org.apache.gora.persistency.Persistent; +import org.apache.gora.query.Query; +import org.apache.gora.query.Result; +import org.apache.gora.store.DataStore; +import org.apache.gora.util.GoraException; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.log4j.Logger; + +/** + * Class which wraps the GoraInputFormat. It's designed + * as an extension point to EdgeInputFormat subclasses who wish + * to read from Gora data sources. + * + * Works with + * {@link GoraVertexOutputFormat} + * + * @param vertex id type + * @param edge type + */ +public abstract class GoraEdgeInputFormat + + extends EdgeInputFormat { + + /** Start key for querying Gora data store. */ + private static Object START_KEY; + + /** End key for querying Gora data store. */ + private static Object END_KEY; + + /** Logger for Gora's vertex input format. */ + private static final Logger LOG = + Logger.getLogger(GoraEdgeInputFormat.class); + + /** KeyClass used for getting data. */ + private static Class KEY_CLASS; + + /** The vertex itself will be used as a value inside Gora. */ + private static Class PERSISTENT_CLASS; + + /** Data store class to be used as backend. */ + private static Class DATASTORE_CLASS; + + /** Class used to transform strings into Keys */ + private static Class KEY_FACTORY_CLASS; + + /** Data store used for querying data. */ + private static DataStore DATA_STORE; + + /** counter for iinput records */ + private static int RECORD_COUNTER = 0; + + /** Delegate Gora input format */ + private static ExtraGoraInputFormat GORA_INPUT_FORMAT = + new ExtraGoraInputFormat(); + + /** + * @param conf configuration parameters + */ + public void checkInputSpecs(Configuration conf) { + String sDataStoreType = + GIRAPH_GORA_DATASTORE_CLASS.get(getConf()); + String sKeyType = + GIRAPH_GORA_KEY_CLASS.get(getConf()); + String sPersistentType = + GIRAPH_GORA_PERSISTENT_CLASS.get(getConf()); + String sKeyFactoryClass = + GIRAPH_GORA_KEYS_FACTORY_CLASS.get(getConf()); + try { + Class keyClass = Class.forName(sKeyType); + Class persistentClass = Class.forName(sPersistentType); + Class dataStoreClass = Class.forName(sDataStoreType); + Class keyFactoryClass = Class.forName(sKeyFactoryClass); + setKeyClass(keyClass); + setPersistentClass((Class) persistentClass); + setDatastoreClass((Class) dataStoreClass); + setKeyFactoryClass(keyFactoryClass); + setDataStore(createDataStore()); + GORA_INPUT_FORMAT.setDataStore(getDataStore()); + } catch (ClassNotFoundException e) { + LOG.error("Error while reading Gora Input parameters"); + e.printStackTrace(); + } + } + + /** + * Gets the splits for a data store. + * @param context JobContext + * @param minSplitCountHint Hint for a minimum split count + * @return List A list of splits + */ + @Override + public List getSplits(JobContext context, int minSplitCountHint) + throws IOException, InterruptedException { + KeyFactory kFact = null; + try { + kFact = (KeyFactory) getKeyFactoryClass().newInstance(); + } catch (InstantiationException e) { + LOG.error("Key factory was not instantiated. Please verify."); + LOG.error(e.getMessage()); + e.printStackTrace(); + } catch (IllegalAccessException e) { + LOG.error("Key factory was not instantiated. Please verify."); + LOG.error(e.getMessage()); + e.printStackTrace(); + } + String sKey = GIRAPH_GORA_START_KEY.get(getConf()); + String eKey = GIRAPH_GORA_END_KEY.get(getConf()); + if (sKey == null || sKey.isEmpty()) { + LOG.warn("No start key has been defined."); + LOG.warn("Querying all the data store."); + sKey = null; + eKey = null; + } + kFact.setDataStore(getDataStore()); + setStartKey(kFact.buildKey(sKey)); + setEndKey(kFact.buildKey(eKey)); + Query tmpQuery = GoraUtils.getQuery( + getDataStore(), getStartKey(), getEndKey()); + GORA_INPUT_FORMAT.setQuery(tmpQuery); + List splits = GORA_INPUT_FORMAT.getSplits(context); + return splits; + } + + @Override + public abstract GoraEdgeReader createEdgeReader(InputSplit split, + TaskAttemptContext context) throws IOException; + + /** + * Abstract class to be implemented by the user based on their specific + * vertex input. Easiest to ignore the key value separator and only use + * key instead. + */ + protected abstract class GoraEdgeReader extends EdgeReader { + /** current edge obtained from Rexster */ + private Edge edge; + /** Results gotten from Gora data store. */ + private Result readResults; + + @Override + public void initialize(InputSplit inputSplit, TaskAttemptContext context) + throws IOException, InterruptedException { + getResults(); + RECORD_COUNTER = 0; + } + + /** + * Gets the next edge from Gora data store. + * @return true/false depending on the existence of vertices. + * @throws java.io.IOException exceptions passed along. + * @throws InterruptedException exceptions passed along. + */ + @Override + // CHECKSTYLE: stop IllegalCatch + public boolean nextEdge() throws IOException, InterruptedException { + boolean flg = false; + try { + flg = this.getReadResults().next(); + this.edge = transformEdge(this.getReadResults().get()); + RECORD_COUNTER++; + } catch (Exception e) { + LOG.debug("Error transforming vertices."); + flg = false; + } + LOG.debug(RECORD_COUNTER + " were transformed."); + return flg; + } + // CHECKSTYLE: resume IllegalCatch + + /** + * Gets the progress of reading results from Gora. + * @return the progress of reading results from Gora. + */ + @Override + public float getProgress() throws IOException, InterruptedException { + float progress = 0.0f; + if (getReadResults() != null) { + progress = getReadResults().getProgress(); + } + return progress; + } + + /** + * Gets current edge. + * + * @return The edge object represented by a Gora object + */ + @Override + public Edge getCurrentEdge() + throws IOException, InterruptedException { + return this.edge; + } + + /** + * Parser for a single Gora object + * + * @param goraObject vertex represented as a GoraObject + * @return The edge object represented by a Gora object + */ + protected abstract Edge transformEdge(Object goraObject); + + /** + * Performs a range query to a Gora data store. + */ + protected void getResults() { + setReadResults(GoraUtils.getRequest(getDataStore(), + getStartKey(), getEndKey())); + } + + /** + * Finishes the reading process. + * @throws java.io.IOException. + */ + @Override + public void close() throws IOException { + } + + /** + * Gets the results read. + * @return results read. + */ + Result getReadResults() { + return readResults; + } + + /** + * Sets the results read. + * @param readResults results read. + */ + void setReadResults(Result readResults) { + this.readResults = readResults; + } + } + + /** + * Gets the data store object initialized. + * @return DataStore created + */ + public DataStore createDataStore() { + DataStore dsCreated = null; + try { + dsCreated = GoraUtils.createSpecificDataStore(getDatastoreClass(), + getKeyClass(), getPersistentClass()); + } catch (GoraException e) { + LOG.error("Error creating data store."); + e.printStackTrace(); + } + return dsCreated; + } + + /** + * Gets the persistent Class + * @return persistentClass used + */ + static Class getPersistentClass() { + return PERSISTENT_CLASS; + } + + /** + * Sets the persistent Class + * @param persistentClassUsed to be set + */ + static void setPersistentClass + (Class persistentClassUsed) { + PERSISTENT_CLASS = persistentClassUsed; + } + + /** + * Gets the key class used. + * @return the key class used. + */ + static Class getKeyClass() { + return KEY_CLASS; + } + + /** + * Sets the key class used. + * @param keyClassUsed key class used. + */ + static void setKeyClass(Class keyClassUsed) { + KEY_CLASS = keyClassUsed; + } + + /** + * @return Class the DATASTORE_CLASS + */ + public static Class getDatastoreClass() { + return DATASTORE_CLASS; + } + + /** + * @param dataStoreClass the dataStore class to set + */ + public static void setDatastoreClass( + Class dataStoreClass) { + DATASTORE_CLASS = dataStoreClass; + } + + /** + * Gets the start key for querying. + * @return the start key. + */ + public Object getStartKey() { + return START_KEY; + } + + /** + * Gets the start key for querying. + * @param startKey start key. + */ + public static void setStartKey(Object startKey) { + START_KEY = startKey; + } + + /** + * Gets the end key for querying. + * @return the end key. + */ + static Object getEndKey() { + return END_KEY; + } + + /** + * Sets the end key for querying. + * @param pEndKey start key. + */ + static void setEndKey(Object pEndKey) { + END_KEY = pEndKey; + } + + /** + * Gets the key factory class. + * @return the kEY_FACTORY_CLASS + */ + static Class getKeyFactoryClass() { + return KEY_FACTORY_CLASS; + } + + /** + * Sets the key factory class. + * @param keyFactoryClass the keyFactoryClass to set. + */ + static void setKeyFactoryClass(Class keyFactoryClass) { + KEY_FACTORY_CLASS = keyFactoryClass; + } + + /** + * Gets the data store. + * @return DataStore + */ + public static DataStore getDataStore() { + return DATA_STORE; + } + + /** + * Sets the data store + * @param dStore the dATA_STORE to set + */ + public static void setDataStore(DataStore dStore) { + DATA_STORE = dStore; + } + + /** + * Returns a logger. + * @return the log for the output format. + */ + public static Logger getLogger() { + return LOG; + } +} diff --git a/giraph-gora/src/main/java/org/apache/giraph/io/gora/GoraEdgeOutputFormat.java b/giraph-gora/src/main/java/org/apache/giraph/io/gora/GoraEdgeOutputFormat.java new file mode 100644 index 000000000..cc81a4f95 --- /dev/null +++ b/giraph-gora/src/main/java/org/apache/giraph/io/gora/GoraEdgeOutputFormat.java @@ -0,0 +1,281 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.io.gora; + +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_OUTPUT_DATASTORE_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_OUTPUT_KEY_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_OUTPUT_PERSISTENT_CLASS; + +import java.io.IOException; + +import org.apache.giraph.edge.Edge; +import org.apache.giraph.io.EdgeOutputFormat; +import org.apache.giraph.io.EdgeWriter; +import org.apache.giraph.io.gora.utils.GoraUtils; +import org.apache.gora.persistency.Persistent; +import org.apache.gora.store.DataStore; +import org.apache.gora.util.GoraException; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.OutputCommitter; +import org.apache.log4j.Logger; + +/** + * Class which wraps the GoraInputFormat. It's designed + * as an extension point to EdgeOutputFormat subclasses who wish + * to write to Gora data sources. + * + * Works with + * {@link GoraEdgeInputFormat} + * + * @param edge id type + * @param vertex type + * @param edge type + */ +public abstract class GoraEdgeOutputFormat + extends EdgeOutputFormat { + + /** Logger for Gora's vertex input format. */ + private static final Logger LOG = + Logger.getLogger(GoraEdgeOutputFormat.class); + + /** KeyClass used for getting data. */ + private static Class KEY_CLASS; + + /** The vertex itself will be used as a value inside Gora. */ + private static Class PERSISTENT_CLASS; + + /** Data store class to be used as backend. */ + private static Class DATASTORE_CLASS; + + /** Data store used for querying data. */ + private static DataStore DATA_STORE; + + /** + * checkOutputSpecs + * + * @param context information about the job + * @throws java.io.IOException + * @throws InterruptedException + */ + @Override + public void checkOutputSpecs(JobContext context) + throws IOException, InterruptedException { + } + + /** + * Gets the data store object initialized. + * @return DataStore created + */ + public DataStore createDataStore() { + DataStore dsCreated = null; + try { + dsCreated = GoraUtils.createSpecificDataStore(getDatastoreClass(), + getKeyClass(), getPersistentClass()); + } catch (GoraException e) { + getLogger().error("Error creating data store."); + e.printStackTrace(); + } + return dsCreated; + } + + @Override + public abstract GoraEdgeWriter + createEdgeWriter(TaskAttemptContext context) + throws IOException, InterruptedException; + + /** + * getOutputCommitter + * + * @param context the task context + * @return OutputCommitter + * @throws java.io.IOException + * @throws InterruptedException + */ + @Override + public OutputCommitter getOutputCommitter(TaskAttemptContext context) + throws IOException, InterruptedException { + return new NullOutputCommitter(); + } + + /** + * Empty output commiter for hadoop. + */ + private static class NullOutputCommitter extends OutputCommitter { + @Override + public void abortTask(TaskAttemptContext arg0) throws IOException { } + + @Override + public void commitTask(TaskAttemptContext arg0) throws IOException { } + + @Override + public boolean needsTaskCommit(TaskAttemptContext arg0) throws IOException { + return false; + } + + @Override + public void setupJob(JobContext arg0) throws IOException { } + + @Override + public void setupTask(TaskAttemptContext arg0) throws IOException { } + } + + /** + * Abstract class to be implemented by the user based on their specific + * vertex/edges output. + */ + protected abstract class GoraEdgeWriter extends EdgeWriter { + @Override + public void initialize(TaskAttemptContext context) throws IOException, + InterruptedException { + String sDataStoreType = + GIRAPH_GORA_OUTPUT_DATASTORE_CLASS.get(getConf()); + String sKeyType = + GIRAPH_GORA_OUTPUT_KEY_CLASS.get(getConf()); + String sPersistentType = + GIRAPH_GORA_OUTPUT_PERSISTENT_CLASS.get(getConf()); + try { + Class keyClass = Class.forName(sKeyType); + Class persistentClass = Class.forName(sPersistentType); + Class dataStoreClass = Class.forName(sDataStoreType); + setKeyClass(keyClass); + setPersistentClass((Class) persistentClass); + setDatastoreClass((Class) dataStoreClass); + setDataStore(createDataStore()); + if (getDataStore() != null) { + getLogger().debug("The data store has been created."); + } + } catch (ClassNotFoundException e) { + getLogger().error("Error while reading Gora Output parameters"); + e.printStackTrace(); + } + } + + @Override + public void close(TaskAttemptContext context) + throws IOException, InterruptedException { + getDataStore().flush(); + getDataStore().close(); + } + + @Override + public void writeEdge(I srcId, V srcValue, Edge edge) + throws IOException, InterruptedException { + Persistent goraEdge = null; + Object goraKey = getGoraKey(srcId, srcValue, edge); + goraEdge = getGoraEdge(srcId, srcValue, edge); + getDataStore().put(goraKey, goraEdge); + } + + /** + * Each edge needs to be transformed into a Gora object to be sent to + * a specific data store. + * + * @param edge edge to be transformed into a Gora object + * @param srcId source vertex id + * @param srcValue source vertex value + * @return Gora representation of the vertex + */ + protected abstract Persistent getGoraEdge + (I srcId, V srcValue, Edge edge); + + /** + * Gets the correct key from a computed vertex. + * @param edge edge to extract the key from. + * @param srcId source vertex id + * @param srcValue source vertex value + * @return The key representing such edge. + */ + protected abstract Object getGoraKey(I srcId, V srcValue, Edge edge); + } + + /** + * Gets the data store. + * @return DataStore + */ + public static DataStore getDataStore() { + return DATA_STORE; + } + + /** + * Sets the data store + * @param dStore the dATA_STORE to set + */ + public static void setDataStore(DataStore dStore) { + DATA_STORE = dStore; + } + + /** + * Gets the persistent Class + * @return persistentClass used + */ + static Class getPersistentClass() { + return PERSISTENT_CLASS; + } + + /** + * Sets the persistent Class + * @param persistentClassUsed to be set + */ + static void setPersistentClass + (Class persistentClassUsed) { + PERSISTENT_CLASS = persistentClassUsed; + } + + /** + * Gets the key class used. + * @return the key class used. + */ + static Class getKeyClass() { + return KEY_CLASS; + } + + /** + * Sets the key class used. + * @param keyClassUsed key class used. + */ + static void setKeyClass(Class keyClassUsed) { + KEY_CLASS = keyClassUsed; + } + + /** + * @return Class the DATASTORE_CLASS + */ + public static Class getDatastoreClass() { + return DATASTORE_CLASS; + } + + /** + * @param dataStoreClass the dataStore class to set + */ + public static void setDatastoreClass( + Class dataStoreClass) { + DATASTORE_CLASS = dataStoreClass; + } + + /** + * Gets the logger for the class. + * @return the log of the class. + */ + public static Logger getLogger() { + return LOG; + } +} diff --git a/giraph-gora/src/main/java/org/apache/giraph/io/gora/GoraGEdgeEdgeInputFormat.java b/giraph-gora/src/main/java/org/apache/giraph/io/gora/GoraGEdgeEdgeInputFormat.java new file mode 100644 index 000000000..e738f36df --- /dev/null +++ b/giraph-gora/src/main/java/org/apache/giraph/io/gora/GoraGEdgeEdgeInputFormat.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.io.gora; + +import java.io.IOException; + +import org.apache.giraph.edge.Edge; +import org.apache.giraph.edge.EdgeFactory; +import org.apache.giraph.io.gora.generated.GEdge; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.TaskAttemptContext; + +/** + * Example implementation of a specific reader for a generated data bean. + */ +public class GoraGEdgeEdgeInputFormat + extends GoraEdgeInputFormat { + + /** + * Default constructor + */ + public GoraGEdgeEdgeInputFormat() { + } + + /** + * Creates specific vertex reader to be used inside Hadoop. + * @param split split to be read. + * @param context JobContext to be used. + * @return GoraEdgeReader Edge reader to be used by Hadoop. + */ + @Override + public GoraEdgeReader createEdgeReader( + InputSplit split, TaskAttemptContext context) throws IOException { + return new GoraGEdgeEdgeReader(); + } + + /** + * Gora edge reader + */ + protected class GoraGEdgeEdgeReader extends GoraEdgeReader { + + /** source vertex of the edge */ + private LongWritable sourceId; + + /** + * Transforms a GoraObject into an Edge object. + * @param goraObject Object from Gora to be translated. + * @return Edge Result from transforming the gora object. + */ + @Override + protected Edge transformEdge + (Object goraObject) { + Edge edge = null; + GEdge goraEdge = (GEdge) goraObject; + Long dest; + Long value; + dest = Long.valueOf(goraEdge.getVertexOutId().toString()); + this.sourceId = new LongWritable(); + this.sourceId.set(Long.valueOf(goraEdge.getVertexInId().toString())); + value = (long) goraEdge.getEdgeWeight(); + edge = EdgeFactory.create(new LongWritable(dest), + new FloatWritable(value)); + return edge; + } + + /** + * Gets the currentSourceId for the edge. + * @return LongWritable currentSourceId for the edge. + */ + @Override + public LongWritable getCurrentSourceId() throws IOException, + InterruptedException { + return this.sourceId; + } + } +} diff --git a/giraph-gora/src/main/java/org/apache/giraph/io/gora/GoraGEdgeEdgeOutputFormat.java b/giraph-gora/src/main/java/org/apache/giraph/io/gora/GoraGEdgeEdgeOutputFormat.java new file mode 100644 index 000000000..d350d3717 --- /dev/null +++ b/giraph-gora/src/main/java/org/apache/giraph/io/gora/GoraGEdgeEdgeOutputFormat.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.io.gora; + +import java.io.IOException; + +import org.apache.avro.util.Utf8; +import org.apache.giraph.edge.Edge; +import org.apache.giraph.io.gora.generated.GEdgeResult; +import org.apache.gora.persistency.Persistent; +import org.apache.hadoop.io.DoubleWritable; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapreduce.TaskAttemptContext; + +/** + * Implementation of a specific writer for a generated data bean. + */ +public class GoraGEdgeEdgeOutputFormat + extends GoraEdgeOutputFormat { + + /** + * Default constructor + */ + public GoraGEdgeEdgeOutputFormat() { + } + + @Override + public GoraEdgeWriter createEdgeWriter( + TaskAttemptContext context) throws IOException, InterruptedException { + return new GoraGEdgeEdgeWriter(); + } + + /** + * Gora edge writer. + */ + protected class GoraGEdgeEdgeWriter + extends GoraEdgeWriter { + + @Override + protected Persistent getGoraEdge(LongWritable srcId, + DoubleWritable srcValue, Edge edge) { + GEdgeResult tmpGEdge = new GEdgeResult(); + tmpGEdge.setEdgeId(new Utf8(srcId.toString())); + tmpGEdge.setEdgeWeight(edge.getValue().get()); + tmpGEdge.setVertexOutId(new Utf8(edge.getTargetVertexId().toString())); + getLogger().debug("GoraObject created: " + tmpGEdge.toString()); + return tmpGEdge; + } + + @Override + protected Object getGoraKey(LongWritable srcId, + DoubleWritable srcValue, Edge edge) { + String goraKey = String.valueOf( + edge.getTargetVertexId().get() + edge.getValue().get()); + return goraKey; + } + + } +} diff --git a/giraph-gora/src/main/java/org/apache/giraph/io/gora/GoraGVertexVertexInputFormat.java b/giraph-gora/src/main/java/org/apache/giraph/io/gora/GoraGVertexVertexInputFormat.java new file mode 100644 index 000000000..cb0f005c1 --- /dev/null +++ b/giraph-gora/src/main/java/org/apache/giraph/io/gora/GoraGVertexVertexInputFormat.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.io.gora; + +import java.io.IOException; +import java.util.Set; + +import org.apache.avro.util.Utf8; +import org.apache.giraph.edge.Edge; +import org.apache.giraph.edge.EdgeFactory; +import org.apache.giraph.graph.Vertex; +import org.apache.giraph.io.gora.generated.GVertex; +import org.apache.hadoop.io.DoubleWritable; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.TaskAttemptContext; + +/** + * Example implementation of a specific reader for a generated data bean. + */ +public class GoraGVertexVertexInputFormat + extends GoraVertexInputFormat { + + /** + * DEfault constructor + */ + public GoraGVertexVertexInputFormat() { + } + + /** + * Creates specific vertex reader to be used inside Hadoop. + * @param split split to be read. + * @param context JobContext to be used. + * @return GoraVertexReader Vertex reader to be used by Hadoop. + */ + @Override + public GoraVertexReader createVertexReader( + InputSplit split, TaskAttemptContext context) throws IOException { + return new GoraGVertexVertexReader(); + } + + /** + * Gora vertex reader + */ + protected class GoraGVertexVertexReader extends GoraVertexReader { + + /** + * Transforms a GoraObject into a Vertex object. + * @param goraObject Object from Gora to be translated. + * @return Vertex Result from transforming the gora object. + */ + @Override + protected Vertex + transformVertex(Object goraObject) { + Vertex vertex; + /* create the actual vertex */ + vertex = getConf().createVertex(); + GVertex tmpGVertex = (GVertex) goraObject; + + LongWritable vrtxId = new LongWritable( + Long.parseLong(tmpGVertex.getVertexId().toString())); + DoubleWritable vrtxValue = new DoubleWritable(tmpGVertex.getValue()); + vertex.initialize(vrtxId, vrtxValue); + if (tmpGVertex.getEdges() != null && !tmpGVertex.getEdges().isEmpty()) { + Set keyIt = tmpGVertex.getEdges().keySet(); + for (Utf8 key : keyIt) { + String keyVal = key.toString(); + String valVal = tmpGVertex.getEdges().get(key).toString(); + Edge edge; + if (!keyVal.contains("vertexId") && !keyVal.contains("value")) { + edge = EdgeFactory.create( + new LongWritable(Long.parseLong(keyVal)), + new FloatWritable(Float.parseFloat(valVal))); + vertex.addEdge(edge); + } + } + } + return vertex; + } + } +} diff --git a/giraph-gora/src/main/java/org/apache/giraph/io/gora/GoraGVertexVertexOutputFormat.java b/giraph-gora/src/main/java/org/apache/giraph/io/gora/GoraGVertexVertexOutputFormat.java new file mode 100644 index 000000000..893e083fe --- /dev/null +++ b/giraph-gora/src/main/java/org/apache/giraph/io/gora/GoraGVertexVertexOutputFormat.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.io.gora; + +import java.io.IOException; +import java.util.Iterator; + +import org.apache.avro.util.Utf8; +import org.apache.giraph.edge.Edge; +import org.apache.giraph.graph.Vertex; +import org.apache.giraph.io.VertexWriter; +import org.apache.giraph.io.gora.generated.GVertexResult; +import org.apache.gora.persistency.Persistent; +import org.apache.hadoop.io.DoubleWritable; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapreduce.TaskAttemptContext; + +/** + * Implementation of a specific reader for a generated data bean. + */ +public class GoraGVertexVertexOutputFormat + extends GoraVertexOutputFormat { + + /** + * DEfault constructor + */ + public GoraGVertexVertexOutputFormat() { + } + + @Override + public VertexWriter + createVertexWriter(TaskAttemptContext context) + throws IOException, InterruptedException { + return new GoraGVertexVertexWriter(); + } + + /** + * Gora vertex writer. + */ + protected class GoraGVertexVertexWriter extends GoraVertexWriter { + + @Override + protected Persistent getGoraVertex( + Vertex vertex) { + GVertexResult tmpGVertex = new GVertexResult(); + tmpGVertex.setVertexId(new Utf8(vertex.getId().toString())); + tmpGVertex.setValue(Float.parseFloat(vertex.getValue().toString())); + Iterator> it = + vertex.getEdges().iterator(); + while (it.hasNext()) { + Edge edge = it.next(); + tmpGVertex.putToEdges( + new Utf8(edge.getTargetVertexId().toString()), + new Utf8(edge.getValue().toString())); + } + return tmpGVertex; + } + + @Override + protected Object getGoraKey( + Vertex vertex) { + String goraKey = String.valueOf(vertex.getId()); + return goraKey; + } + } +} diff --git a/giraph-gora/src/main/java/org/apache/giraph/io/gora/GoraVertexInputFormat.java b/giraph-gora/src/main/java/org/apache/giraph/io/gora/GoraVertexInputFormat.java new file mode 100644 index 000000000..0a34cd26f --- /dev/null +++ b/giraph-gora/src/main/java/org/apache/giraph/io/gora/GoraVertexInputFormat.java @@ -0,0 +1,414 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.io.gora; + +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_DATASTORE_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_END_KEY; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_KEYS_FACTORY_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_KEY_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_PERSISTENT_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_START_KEY; + +import java.io.IOException; +import java.util.List; + +import org.apache.giraph.graph.Vertex; +import org.apache.giraph.io.VertexInputFormat; +import org.apache.giraph.io.VertexReader; +import org.apache.giraph.io.gora.utils.KeyFactory; +import org.apache.giraph.io.gora.utils.ExtraGoraInputFormat; +import org.apache.giraph.io.gora.utils.GoraUtils; +import org.apache.gora.persistency.Persistent; +import org.apache.gora.query.Query; +import org.apache.gora.query.Result; +import org.apache.gora.store.DataStore; +import org.apache.gora.util.GoraException; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.log4j.Logger; + +/** + * Class which wraps the GoraInputFormat. It's designed + * as an extension point to VertexInputFormat subclasses who wish + * to read from Gora data sources. + * + * Works with + * {@link GoraVertexOutputFormat} + * + * @param vertex id type + * @param vertex value type + * @param edge type + */ +public abstract class GoraVertexInputFormat< + I extends WritableComparable, + V extends Writable, + E extends Writable> + extends VertexInputFormat { + + /** Start key for querying Gora data store. */ + private static Object START_KEY; + + /** End key for querying Gora data store. */ + private static Object END_KEY; + + /** Logger for Gora's vertex input format. */ + private static final Logger LOG = + Logger.getLogger(GoraVertexInputFormat.class); + + /** KeyClass used for getting data. */ + private static Class KEY_CLASS; + + /** The vertex itself will be used as a value inside Gora. */ + private static Class PERSISTENT_CLASS; + + /** Data store class to be used as backend. */ + private static Class DATASTORE_CLASS; + + /** Class used to transform strings into Keys */ + private static Class KEY_FACTORY_CLASS; + + /** Data store used for querying data. */ + private static DataStore DATA_STORE; + + /** counter for iinput records */ + private static int RECORD_COUNTER = 0; + + /** Delegate Gora input format */ + private static ExtraGoraInputFormat GORA_INPUT_FORMAT = + new ExtraGoraInputFormat(); + + /** @param conf configuration parameters */ + public void checkInputSpecs(Configuration conf) { + String sDataStoreType = + GIRAPH_GORA_DATASTORE_CLASS.get(getConf()); + String sKeyType = + GIRAPH_GORA_KEY_CLASS.get(getConf()); + String sPersistentType = + GIRAPH_GORA_PERSISTENT_CLASS.get(getConf()); + String sKeyFactoryClass = + GIRAPH_GORA_KEYS_FACTORY_CLASS.get(getConf()); + try { + Class keyClass = Class.forName(sKeyType); + Class persistentClass = Class.forName(sPersistentType); + Class dataStoreClass = Class.forName(sDataStoreType); + Class keyFactoryClass = Class.forName(sKeyFactoryClass); + setKeyClass(keyClass); + setPersistentClass((Class) persistentClass); + setDatastoreClass((Class) dataStoreClass); + setKeyFactoryClass(keyFactoryClass); + setDataStore(createDataStore()); + GORA_INPUT_FORMAT.setDataStore(getDataStore()); + } catch (ClassNotFoundException e) { + LOG.error("Error while reading Gora Input parameters"); + e.printStackTrace(); + } + } + + /** + * Create a vertex reader for a given split. Guaranteed to have been + * configured with setConf() prior to use. The framework will also call + * {@link org.apache.giraph.io.VertexReader#initialize(org.apache.hadoop.mapreduce.InputSplit, org.apache.hadoop.mapreduce.TaskAttemptContext)} before + * the split is used. + * + * @param split the split to be read + * @param context the information about the task + * @return a new record reader + * @throws java.io.IOException + */ + public abstract GoraVertexReader createVertexReader(InputSplit split, + TaskAttemptContext context) throws IOException; + + /** + * Gets the splits for a data store. + * @param context JobContext + * @param minSplitCountHint Hint for a minimum split count + * @return List A list of splits + */ + @Override + public List getSplits(JobContext context, int minSplitCountHint) + throws IOException, InterruptedException { + KeyFactory kFact = null; + try { + kFact = (KeyFactory) getKeyFactoryClass().newInstance(); + kFact.setDataStore(getDataStore()); + } catch (InstantiationException e) { + LOG.error("Key factory was not instantiated. Please verify."); + LOG.error(e.getMessage()); + e.printStackTrace(); + } catch (IllegalAccessException e) { + LOG.error("Key factory was not instantiated. Please verify."); + LOG.error(e.getMessage()); + e.printStackTrace(); + } + String sKey = GIRAPH_GORA_START_KEY.get(getConf()); + String eKey = GIRAPH_GORA_END_KEY.get(getConf()); + if (sKey == null || sKey.isEmpty()) { + LOG.warn("No start key has been defined."); + LOG.warn("Querying all the data store."); + sKey = null; + eKey = null; + } else { + setStartKey(kFact.buildKey(sKey)); + setEndKey(kFact.buildKey(eKey)); + } + Query tmpQuery = GoraUtils.getQuery( + getDataStore(), getStartKey(), getEndKey()); + GORA_INPUT_FORMAT.setQuery(tmpQuery); + List splits = GORA_INPUT_FORMAT.getSplits(context); + return splits; + } + + /** + * Gets the data store object initialized. + * @return DataStore created + */ + public DataStore createDataStore() { + DataStore dsCreated = null; + try { + dsCreated = GoraUtils.createSpecificDataStore(getDatastoreClass(), + getKeyClass(), getPersistentClass()); + } catch (GoraException e) { + LOG.error("Error creating data store."); + e.printStackTrace(); + } + return dsCreated; + } + + /** + * Abstract class to be implemented by the user based on their specific + * vertex input. Easiest to ignore the key value separator and only use + * key instead. + */ + protected abstract class GoraVertexReader extends VertexReader { + /** Current vertex */ + private Vertex vertex; + /** Results gotten from Gora data store. */ + private Result readResults; + + @Override + public void initialize(InputSplit inputSplit, TaskAttemptContext context) + throws IOException, InterruptedException { + getResults(); + RECORD_COUNTER = 0; + } + + /** + * Gets the next vertex from Gora data store. + * @return true/false depending on the existence of vertices. + * @throws java.io.IOException exceptions passed along. + * @throws InterruptedException exceptions passed along. + */ + @Override + // CHECKSTYLE: stop IllegalCatch + public boolean nextVertex() throws IOException, InterruptedException { + boolean flg = false; + try { + flg = this.getReadResults().next(); + this.vertex = transformVertex(this.getReadResults().get()); + RECORD_COUNTER++; + } catch (Exception e) { + LOG.error("Error transforming vertices."); + LOG.error(e.getMessage()); + flg = false; + } + LOG.debug(RECORD_COUNTER + " were transformed."); + return flg; + } + // CHECKSTYLE: resume IllegalCatch + + /** + * Gets the progress of reading results from Gora. + * @return the progress of reading results from Gora. + */ + @Override + public float getProgress() throws IOException, InterruptedException { + float progress = 0.0f; + if (getReadResults() != null) { + progress = getReadResults().getProgress(); + } + return progress; + } + + /** + * Gets current vertex. + * + * @return The vertex object represented by a Gora object + */ + @Override + public Vertex getCurrentVertex() + throws IOException, InterruptedException { + return this.vertex; + } + + /** + * Parser for a single Gora object + * + * @param goraObject vertex represented as a GoraObject + * @return The vertex object represented by a Gora object + */ + protected abstract Vertex transformVertex(Object goraObject); + + /** + * Performs a range query to a Gora data store. + */ + protected void getResults() { + setReadResults(GoraUtils.getRequest(getDataStore(), + getStartKey(), getEndKey())); + } + + /** + * Finishes the reading process. + * @throws java.io.IOException. + */ + @Override + public void close() throws IOException { + } + + /** + * Gets the results read. + * @return results read. + */ + Result getReadResults() { + return readResults; + } + + /** + * Sets the results read. + * @param readResults results read. + */ + void setReadResults(Result readResults) { + this.readResults = readResults; + } + } + + /** + * Gets the persistent Class + * @return persistentClass used + */ + static Class getPersistentClass() { + return PERSISTENT_CLASS; + } + + /** + * Sets the persistent Class + * @param persistentClassUsed to be set + */ + static void setPersistentClass + (Class persistentClassUsed) { + PERSISTENT_CLASS = persistentClassUsed; + } + + /** + * Gets the key class used. + * @return the key class used. + */ + static Class getKeyClass() { + return KEY_CLASS; + } + + /** + * Sets the key class used. + * @param keyClassUsed key class used. + */ + static void setKeyClass(Class keyClassUsed) { + KEY_CLASS = keyClassUsed; + } + + /** + * @return Class the DATASTORE_CLASS + */ + public static Class getDatastoreClass() { + return DATASTORE_CLASS; + } + + /** + * @param dataStoreClass the dataStore class to set + */ + public static void setDatastoreClass( + Class dataStoreClass) { + DATASTORE_CLASS = dataStoreClass; + } + + /** + * Gets the start key for querying. + * @return the start key. + */ + public Object getStartKey() { + return START_KEY; + } + + /** + * Gets the start key for querying. + * @param startKey start key. + */ + public static void setStartKey(Object startKey) { + START_KEY = startKey; + } + + /** + * Gets the end key for querying. + * @return the end key. + */ + static Object getEndKey() { + return END_KEY; + } + + /** + * Sets the end key for querying. + * @param pEndKey start key. + */ + static void setEndKey(Object pEndKey) { + END_KEY = pEndKey; + } + + /** + * Gets the key factory class. + * @return the kEY_FACTORY_CLASS + */ + static Class getKeyFactoryClass() { + return KEY_FACTORY_CLASS; + } + + /** + * Sets the key factory class. + * @param keyFactoryClass the keyFactoryClass to set. + */ + static void setKeyFactoryClass(Class keyFactoryClass) { + KEY_FACTORY_CLASS = keyFactoryClass; + } + + /** + * Gets the data store. + * @return DataStore + */ + public static DataStore getDataStore() { + return DATA_STORE; + } + + /** + * Sets the data store + * @param dStore the dATA_STORE to set + */ + public static void setDataStore(DataStore dStore) { + DATA_STORE = dStore; + } +} diff --git a/giraph-gora/src/main/java/org/apache/giraph/io/gora/GoraVertexOutputFormat.java b/giraph-gora/src/main/java/org/apache/giraph/io/gora/GoraVertexOutputFormat.java new file mode 100644 index 000000000..105694700 --- /dev/null +++ b/giraph-gora/src/main/java/org/apache/giraph/io/gora/GoraVertexOutputFormat.java @@ -0,0 +1,297 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.io.gora; + +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_OUTPUT_DATASTORE_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_OUTPUT_KEY_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_OUTPUT_PERSISTENT_CLASS; + +import java.io.IOException; + +import org.apache.giraph.graph.Vertex; +import org.apache.giraph.io.VertexOutputFormat; +import org.apache.giraph.io.VertexWriter; +import org.apache.giraph.io.gora.utils.GoraUtils; +import org.apache.gora.persistency.Persistent; +import org.apache.gora.store.DataStore; +import org.apache.gora.util.GoraException; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.OutputCommitter; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.log4j.Logger; +import org.apache.zookeeper.WatchedEvent; +import org.apache.zookeeper.Watcher; +import org.apache.zookeeper.Watcher.Event.EventType; +/** + * + * Class which wraps the GoraOutputFormat. It's designed + * as an extension point to VertexOutputFormat subclasses who wish + * to write vertices back to an Accumulo table. + * + * Works with + * {@link GoraVertexInputFormat} + * + * + * @param vertex id type + * @param vertex value type + * @param edge type + */ +public abstract class GoraVertexOutputFormat< + I extends WritableComparable, + V extends Writable, + E extends Writable> + extends VertexOutputFormat { + + /** Logger for Gora's vertex input format. */ + private static final Logger LOG = + Logger.getLogger(GoraVertexOutputFormat.class); + + /** KeyClass used for getting data. */ + private static Class KEY_CLASS; + + /** The vertex itself will be used as a value inside Gora. */ + private static Class PERSISTENT_CLASS; + + /** Data store class to be used as backend. */ + private static Class DATASTORE_CLASS; + + /** Data store used for querying data. */ + private static DataStore DATA_STORE; + + /** + * checkOutputSpecs + * + * @param context information about the job + * @throws java.io.IOException + * @throws InterruptedException + */ + @Override + public void checkOutputSpecs(JobContext context) + throws IOException, InterruptedException { + } + + /** + * Gets the data store object initialized. + * @return DataStore created + */ + public DataStore createDataStore() { + DataStore dsCreated = null; + try { + dsCreated = GoraUtils.createSpecificDataStore(getDatastoreClass(), + getKeyClass(), getPersistentClass()); + } catch (GoraException e) { + getLogger().error("Error creating data store."); + e.printStackTrace(); + } + return dsCreated; + } + + /** + * getOutputCommitter + * + * @param context the task context + * @return OutputCommitter + * @throws java.io.IOException + * @throws InterruptedException + */ + @Override + public OutputCommitter getOutputCommitter(TaskAttemptContext context) + throws IOException, InterruptedException { + return new NullOutputCommitter(); + } + + /** + * Empty output commiter for hadoop. + */ + private static class NullOutputCommitter extends OutputCommitter { + @Override + public void abortTask(TaskAttemptContext arg0) throws IOException { } + + @Override + public void commitTask(TaskAttemptContext arg0) throws IOException { } + + @Override + public boolean needsTaskCommit(TaskAttemptContext arg0) throws IOException { + return false; + } + + @Override + public void setupJob(JobContext arg0) throws IOException { } + + @Override + public void setupTask(TaskAttemptContext arg0) throws IOException { } + } + + /** + * Abstract class to be implemented by the user based on their specific + * vertex/edges output. Easiest to ignore the key value separator and only + * use key instead. + */ + protected abstract class GoraVertexWriter + extends VertexWriter + implements Watcher { + /** lock for management of the barrier */ + private final Object lock = new Object(); + + @Override + public void initialize(TaskAttemptContext context) + throws IOException, InterruptedException { + String sDataStoreType = + GIRAPH_GORA_OUTPUT_DATASTORE_CLASS.get(getConf()); + String sKeyType = + GIRAPH_GORA_OUTPUT_KEY_CLASS.get(getConf()); + String sPersistentType = + GIRAPH_GORA_OUTPUT_PERSISTENT_CLASS.get(getConf()); + try { + Class keyClass = Class.forName(sKeyType); + Class persistentClass = Class.forName(sPersistentType); + Class dataStoreClass = Class.forName(sDataStoreType); + setKeyClass(keyClass); + setPersistentClass((Class) persistentClass); + setDatastoreClass((Class) dataStoreClass); + setDataStore(createDataStore()); + if (getDataStore() != null) { + getLogger().info("The output data store has been created."); + } + } catch (ClassNotFoundException e) { + getLogger().error("Error while reading Gora Output parameters"); + e.printStackTrace(); + } + } + + @Override + public void close(TaskAttemptContext context) + throws IOException, InterruptedException { + getDataStore().flush(); + getDataStore().close(); + } + + @Override + public void writeVertex(Vertex vertex) + throws IOException, InterruptedException { + Persistent goraVertex = null; + Object goraKey = getGoraKey(vertex); + goraVertex = getGoraVertex(vertex); + getDataStore().put(goraKey, goraVertex); + } + + @Override + public void process(WatchedEvent event) { + EventType type = event.getType(); + if (type == EventType.NodeChildrenChanged) { + if (getLogger().isDebugEnabled()) { + getLogger().debug("signal: number of children changed."); + } + synchronized (lock) { + lock.notify(); + } + } + } + + /** + * Each vertex needs to be transformed into a Gora object to be sent to + * a specific data store. + * + * @param vertex vertex to be transformed into a Gora object + * @return Gora representation of the vertex + */ + protected abstract Persistent getGoraVertex(Vertex vertex); + + /** + * Gets the correct key from a computed vertex. + * @param vertex vertex to extract the key from. + * @return The key representing such vertex + */ + protected abstract Object getGoraKey(Vertex vertex); + + } + + /** + * Gets the data store. + * @return DataStore + */ + public static DataStore getDataStore() { + return DATA_STORE; + } + + /** + * Sets the data store + * @param dStore the dATA_STORE to set + */ + public static void setDataStore(DataStore dStore) { + DATA_STORE = dStore; + } + + /** + * Gets the persistent Class + * @return persistentClass used + */ + static Class getPersistentClass() { + return PERSISTENT_CLASS; + } + + /** + * Sets the persistent Class + * @param persistentClassUsed to be set + */ + static void setPersistentClass + (Class persistentClassUsed) { + PERSISTENT_CLASS = persistentClassUsed; + } + + /** + * Gets the key class used. + * @return the key class used. + */ + static Class getKeyClass() { + return KEY_CLASS; + } + + /** + * Sets the key class used. + * @param keyClassUsed key class used. + */ + static void setKeyClass(Class keyClassUsed) { + KEY_CLASS = keyClassUsed; + } + + /** + * @return Class the DATASTORE_CLASS + */ + public static Class getDatastoreClass() { + return DATASTORE_CLASS; + } + + /** + * @param dataStoreClass the dataStore class to set + */ + public static void setDatastoreClass( + Class dataStoreClass) { + DATASTORE_CLASS = dataStoreClass; + } + + /** + * Returns a logger. + * @return the log for the output format. + */ + public static Logger getLogger() { + return LOG; + } +} diff --git a/giraph-gora/src/main/java/org/apache/giraph/io/gora/constants/GiraphGoraConstants.java b/giraph-gora/src/main/java/org/apache/giraph/io/gora/constants/GiraphGoraConstants.java new file mode 100644 index 000000000..187bf59fc --- /dev/null +++ b/giraph-gora/src/main/java/org/apache/giraph/io/gora/constants/GiraphGoraConstants.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.io.gora.constants; + +import org.apache.giraph.conf.StrConfOption; + +/** + * Constants used all over Giraph for configuration specific for Gora + */ +// CHECKSTYLE: stop InterfaceIsTypeCheck +public interface GiraphGoraConstants { + /** Gora data store class which provides data access. */ + StrConfOption GIRAPH_GORA_DATASTORE_CLASS = + new StrConfOption("giraph.gora.datastore.class", null, + "Gora DataStore class to access to data from. " + + "- required"); + + /** Gora key class to query the data store. */ + StrConfOption GIRAPH_GORA_KEY_CLASS = + new StrConfOption("giraph.gora.key.class", null, + "Gora Key class to query the datastore. " + + "- required"); + + /** Gora persistent class to query the data store. */ + StrConfOption GIRAPH_GORA_PERSISTENT_CLASS = + new StrConfOption("giraph.gora.persistent.class", null, + "Gora Persistent class to read objects from Gora. " + + "- required"); + + /** Gora start key to query the datastore. */ + StrConfOption GIRAPH_GORA_START_KEY = + new StrConfOption("giraph.gora.start.key", null, + "Gora start key to query the datastore. "); + + /** Gora end key to query the datastore. */ + StrConfOption GIRAPH_GORA_END_KEY = + new StrConfOption("giraph.gora.end.key", null, + "Gora end key to query the datastore. "); + + /** Gora data store class which provides data access. */ + StrConfOption GIRAPH_GORA_KEYS_FACTORY_CLASS = + new StrConfOption("giraph.gora.keys.factory.class", null, + "Keys factory to convert strings into desired keys" + + "- required"); + + // OUTPUT + /** Gora data store class which provides data access. */ + StrConfOption GIRAPH_GORA_OUTPUT_DATASTORE_CLASS = + new StrConfOption("giraph.gora.output.datastore.class", null, + "Gora DataStore class to write data to. " + + "- required"); + + /** Gora key class to query the data store. */ + StrConfOption GIRAPH_GORA_OUTPUT_KEY_CLASS = + new StrConfOption("giraph.gora.output.key.class", null, + "Gora Key class to write to datastore. " + + "- required"); + + /** Gora persistent class to query the data store. */ + StrConfOption GIRAPH_GORA_OUTPUT_PERSISTENT_CLASS = + new StrConfOption("giraph.gora.output.persistent.class", null, + "Gora Persistent class to write to Gora. " + + "- required"); +} +// CHECKSTYLE: resume InterfaceIsTypeCheck diff --git a/giraph-gora/src/main/java/org/apache/giraph/io/gora/constants/package-info.java b/giraph-gora/src/main/java/org/apache/giraph/io/gora/constants/package-info.java new file mode 100644 index 000000000..3a8b96fb3 --- /dev/null +++ b/giraph-gora/src/main/java/org/apache/giraph/io/gora/constants/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Constants for Gora Input/Output formats + */ +package org.apache.giraph.io.gora.constants; diff --git a/giraph-gora/src/main/java/org/apache/giraph/io/gora/generated/GEdge.java b/giraph-gora/src/main/java/org/apache/giraph/io/gora/generated/GEdge.java new file mode 100644 index 000000000..f6ac3f79b --- /dev/null +++ b/giraph-gora/src/main/java/org/apache/giraph/io/gora/generated/GEdge.java @@ -0,0 +1,314 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.giraph.io.gora.generated; + +import org.apache.avro.Schema; +import org.apache.avro.AvroRuntimeException; +import org.apache.avro.util.Utf8; +import org.apache.gora.persistency.StateManager; +import org.apache.gora.persistency.impl.PersistentBase; +import org.apache.gora.persistency.impl.StateManagerImpl; + +/** + * Example class for defining a Giraph-Edge. + */ +@SuppressWarnings("all") +public class GEdge extends PersistentBase { + /** + * Schema used for the class. + */ + public static final Schema OBJ_SCHEMA = Schema.parse("{\"type\":\"record\"," + + "\"name\":\"GEdge\",\"namespace\":\"org.apache.giraph.gora.generated\"," + + "\"fields\":[{\"name\":\"edgeId\",\"type\":\"string\"}," + + "{\"name\":\"edgeWeight\",\"type\":\"float\"}," + + "{\"name\":\"vertexInId\",\"type\":\"string\"}," + + "{\"name\":\"vertexOutId\",\"type\":\"string\"}," + + "{\"name\":\"label\",\"type\":\"string\"}]}"); + + /** + * Field enum + */ + public static enum Field { + /** + * Edge id. + */ + EDGE_ID(0, "edgeId"), + + /** + * Edge weight. + */ + EDGE_WEIGHT(1, "edgeWeight"), + + /** + * Edge vertex source id. + */ + VERTEX_IN_ID(2, "vertexInId"), + + /** + * Edge vertex end id. + */ + VERTEX_OUT_ID(3, "vertexOutId"), + + /** + * Edge label. + */ + LABEL(4, "label"); + + /** + * Field index + */ + private int index; + + /** + * Field name + */ + private String name; + + /** + * Field constructor + * @param index of attribute + * @param name of attribute + */ + Field(int index, String name) { + this.index = index; + this.name = name; + } + + /** + * Gets index + * @return int of attribute. + */ + public int getIndex() { + return index; + } + + /** + * Gets name + * @return String of name. + */ + public String getName() { + return name; + } + + /** + * Gets name + * @return String of name. + */ + public String toString() { + return name; + } + }; + + /** + * Array containing all fields/ + */ + private static final String[] ALL_FIELDS = { + "edgeId", "edgeWeight", "vertexInId", "vertexOutId", "label" + }; + + static { + PersistentBase.registerFields(GEdge.class, ALL_FIELDS); + } + + /** + * edgeId + */ + private Utf8 edgeId; + + /** + * edgeWeight + */ + private float edgeWeight; + + /** + * vertexInId + */ + private Utf8 vertexInId; + + /** + * vertexOutId + */ + private Utf8 vertexOutId; + + /** + * label + */ + private Utf8 label; + + /** + * Default constructor. + */ + public GEdge() { + this(new StateManagerImpl()); + } + + /** + * Constructor + * @param stateManager from which the object will be created. + */ + public GEdge(StateManager stateManager) { + super(stateManager); + } + + /** + * Creates a new instance + * @param stateManager from which the object will be created. + * @return GEdge created + */ + public GEdge newInstance(StateManager stateManager) { + return new GEdge(stateManager); + } + + /** + * Gets the object schema + * @return Schema of the object. + */ + public Schema getSchema() { + return OBJ_SCHEMA; + } + + /** + * Gets field + * @param fieldIndex index field. + * @return Object from an index. + */ + public Object get(int fieldIndex) { + switch (fieldIndex) { + case 0: + return edgeId; + case 1: + return edgeWeight; + case 2: + return vertexInId; + case 3: + return vertexOutId; + case 4: + return label; + default: + throw new AvroRuntimeException("Bad index"); + } + } + + /** + * Puts a value into a field. + * @param fieldIndex index of field used. + * @param fieldValue value of field used. + */ + @SuppressWarnings(value = "unchecked") + public void put(int fieldIndex, Object fieldValue) { + if (isFieldEqual(fieldIndex, fieldValue)) { + return; + } + getStateManager().setDirty(this, fieldIndex); + switch (fieldIndex) { + case 0: + edgeId = (Utf8) fieldValue; break; + case 1: + edgeWeight = (Float) fieldValue; break; + case 2: + vertexInId = (Utf8) fieldValue; break; + case 3: + vertexOutId = (Utf8) fieldValue; break; + case 4: + label = (Utf8) fieldValue; break; + default: + throw new AvroRuntimeException("Bad index"); + } + } + + /** + * Gets edgeId + * @return Utf8 edgeId + */ + public Utf8 getEdgeId() { + return (Utf8) get(0); + } + + /** + * Sets edgeId + * @param value edgeId + */ + public void setEdgeId(Utf8 value) { + put(0, value); + } + + /** + * Gets edgeWeight + * @return float edgeWeight + */ + public float getEdgeWeight() { + return (Float) get(1); + } + + /** + * Sets edgeWeight + * @param value edgeWeight + */ + public void setEdgeWeight(float value) { + put(1, value); + } + + /** + * Gets edgeVertexInId + * @return Utf8 edgeVertexInId + */ + public Utf8 getVertexInId() { + return (Utf8) get(2); + } + + /** + * Sets edgeVertexInId + * @param value edgeVertexInId + */ + public void setVertexInId(Utf8 value) { + put(2, value); + } + + /** + * Gets edgeVertexOutId + * @return Utf8 edgeVertexOutId + */ + public Utf8 getVertexOutId() { + return (Utf8) get(3); + } + + /** + * Sets edgeVertexOutId + * @param value edgeVertexOutId + */ + public void setVertexOutId(Utf8 value) { + put(3, value); + } + + /** + * Gets edgeLabel + * @return Utf8 edgeLabel + */ + public Utf8 getLabel() { + return (Utf8) get(4); + } + + /** + * Sets edgeLabel + * @param value edgeLabel + */ + public void setLabel(Utf8 value) { + put(4, value); + } +} diff --git a/giraph-gora/src/main/java/org/apache/giraph/io/gora/generated/GEdgeResult.java b/giraph-gora/src/main/java/org/apache/giraph/io/gora/generated/GEdgeResult.java new file mode 100644 index 000000000..0c3501cd9 --- /dev/null +++ b/giraph-gora/src/main/java/org/apache/giraph/io/gora/generated/GEdgeResult.java @@ -0,0 +1,314 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.giraph.io.gora.generated; + +import org.apache.avro.Schema; +import org.apache.avro.AvroRuntimeException; +import org.apache.avro.util.Utf8; +import org.apache.gora.persistency.StateManager; +import org.apache.gora.persistency.impl.PersistentBase; +import org.apache.gora.persistency.impl.StateManagerImpl; + +/** + * Example Class for defining a Giraph-Edge. + */ +@SuppressWarnings("all") +public class GEdgeResult extends PersistentBase { + /** + * Schema used for the class. + */ + public static final Schema OBJ_SCHEMA = Schema.parse("{\"type\":\"record\"," + + "\"name\":\"GEdge\",\"namespace\":\"org.apache.giraph.gora.generated\"," + + "\"fields\":[{\"name\":\"edgeId\",\"type\":\"string\"}," + + "{\"name\":\"edgeWeight\",\"type\":\"float\"}," + + "{\"name\":\"vertexInId\",\"type\":\"string\"}," + + "{\"name\":\"vertexOutId\",\"type\":\"string\"}," + + "{\"name\":\"label\",\"type\":\"string\"}]}"); + + /** + * Field enum + */ + public static enum Field { + /** + * Edge id. + */ + EDGE_ID(0, "edgeId"), + + /** + * Edge weight. + */ + EDGE_WEIGHT(1, "edgeWeight"), + + /** + * Edge vertex source id. + */ + VERTEX_IN_ID(2, "vertexInId"), + + /** + * Edge vertex end id. + */ + VERTEX_OUT_ID(3, "vertexOutId"), + + /** + * Edge label. + */ + LABEL(4, "label"); + + /** + * Field index + */ + private int index; + + /** + * Field name + */ + private String name; + + /** + * Field constructor + * @param index of attribute + * @param name of attribute + */ + Field(int index, String name) { + this.index = index; + this.name = name; + } + + /** + * Gets index + * @return int of attribute. + */ + public int getIndex() { + return index; + } + + /** + * Gets name + * @return String of name. + */ + public String getName() { + return name; + } + + /** + * Gets name + * @return String of name. + */ + public String toString() { + return name; + } + }; + + /** + * Array containing all fields/ + */ + private static final String[] ALL_FIELDS = { + "edgeId", "edgeWeight", "vertexInId", "vertexOutId", "label" + }; + + static { + PersistentBase.registerFields(GEdgeResult.class, ALL_FIELDS); + } + + /** + * edgeId + */ + private Utf8 edgeId; + + /** + * edgeWeight + */ + private float edgeWeight; + + /** + * vertexInId + */ + private Utf8 vertexInId; + + /** + * vertexOutId + */ + private Utf8 vertexOutId; + + /** + * label + */ + private Utf8 label; + + /** + * Default constructor. + */ + public GEdgeResult() { + this(new StateManagerImpl()); + } + + /** + * Constructor + * @param stateManager from which the object will be created. + */ + public GEdgeResult(StateManager stateManager) { + super(stateManager); + } + + /** + * Creates a new instance + * @param stateManager from which the object will be created. + * @return GEdge created + */ + public GEdgeResult newInstance(StateManager stateManager) { + return new GEdgeResult(stateManager); + } + + /** + * Gets the object schema + * @return Schema of the object. + */ + public Schema getSchema() { + return OBJ_SCHEMA; + } + + /** + * Gets field + * @param fieldIndex index field. + * @return Object from an index. + */ + public Object get(int fieldIndex) { + switch (fieldIndex) { + case 0: + return edgeId; + case 1: + return edgeWeight; + case 2: + return vertexInId; + case 3: + return vertexOutId; + case 4: + return label; + default: + throw new AvroRuntimeException("Bad index"); + } + } + + /** + * Puts a value into a field. + * @param fieldIndex index of field used. + * @param fieldValue value of field used. + */ + @SuppressWarnings(value = "unchecked") + public void put(int fieldIndex, Object fieldValue) { + if (isFieldEqual(fieldIndex, fieldValue)) { + return; + } + getStateManager().setDirty(this, fieldIndex); + switch (fieldIndex) { + case 0: + edgeId = (Utf8) fieldValue; break; + case 1: + edgeWeight = (Float) fieldValue; break; + case 2: + vertexInId = (Utf8) fieldValue; break; + case 3: + vertexOutId = (Utf8) fieldValue; break; + case 4: + label = (Utf8) fieldValue; break; + default: + throw new AvroRuntimeException("Bad index"); + } + } + + /** + * Gets edgeId + * @return Utf8 edgeId + */ + public Utf8 getEdgeId() { + return (Utf8) get(0); + } + + /** + * Sets edgeId + * @param value edgeId + */ + public void setEdgeId(Utf8 value) { + put(0, value); + } + + /** + * Gets edgeWeight + * @return float edgeWeight + */ + public float getEdgeWeight() { + return (Float) get(1); + } + + /** + * Sets edgeWeight + * @param value edgeWeight + */ + public void setEdgeWeight(float value) { + put(1, value); + } + + /** + * Gets edgeVertexInId + * @return Utf8 edgeVertexInId + */ + public Utf8 getVertexInId() { + return (Utf8) get(2); + } + + /** + * Sets edgeVertexInId + * @param value edgeVertexInId + */ + public void setVertexInId(Utf8 value) { + put(2, value); + } + + /** + * Gets edgeVertexOutId + * @return Utf8 edgeVertexOutId + */ + public Utf8 getVertexOutId() { + return (Utf8) get(3); + } + + /** + * Sets edgeVertexOutId + * @param value edgeVertexOutId + */ + public void setVertexOutId(Utf8 value) { + put(3, value); + } + + /** + * Gets edgeLabel + * @return Utf8 edgeLabel + */ + public Utf8 getLabel() { + return (Utf8) get(4); + } + + /** + * Sets edgeLabel + * @param value edgeLabel + */ + public void setLabel(Utf8 value) { + put(4, value); + } +} diff --git a/giraph-gora/src/main/java/org/apache/giraph/io/gora/generated/GVertex.java b/giraph-gora/src/main/java/org/apache/giraph/io/gora/generated/GVertex.java new file mode 100644 index 000000000..efd7b951e --- /dev/null +++ b/giraph-gora/src/main/java/org/apache/giraph/io/gora/generated/GVertex.java @@ -0,0 +1,280 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.giraph.io.gora.generated; + +import java.util.Map; + +import org.apache.avro.Schema; +import org.apache.avro.AvroRuntimeException; +import org.apache.avro.util.Utf8; +import org.apache.gora.persistency.StateManager; +import org.apache.gora.persistency.StatefulHashMap; +import org.apache.gora.persistency.impl.PersistentBase; +import org.apache.gora.persistency.impl.StateManagerImpl; + +/** + * Example class for defining a Giraph-Vertex. + */ +@SuppressWarnings("all") +public class GVertex extends PersistentBase { + /** + * Schema used for the class. + */ + public static final Schema OBJ_SCHEMA = Schema.parse( + "{\"type\":\"record\",\"name\":\"Vertex\"," + + "\"namespace\":\"org.apache.giraph.gora.generated\"," + + "\"fields\":[{\"name\":\"vertexId\",\"type\":\"string\"}," + + "{\"name\":\"value\",\"type\":\"float\"},{\"name\":\"edges\"," + + "\"type\":{\"type\":\"map\",\"values\":\"string\"}}]}"); + + /** + * Field enum + */ + public static enum Field { + /** + * VertexId + */ + VERTEX_ID(0, "vertexId"), + + /** + * Field value + */ + VALUE(1, "value"), + + /** + * Edges + */ + EDGES(2, "edges"); + + /** + * Field index + */ + private int index; + + /** + * Field name + */ + private String name; + + /** + * Field constructor + * @param index of attribute + * @param name of attribute + */ + Field(int index, String name) { + this.index = index; + this.name = name; + } + + /** + * Gets index + * @return int of attribute. + */ + public int getIndex() { + return index; + } + + /** + * Gets name + * @return String of name. + */ + public String getName() { + return name; + } + + /** + * Gets name + * @return String of name. + */ + public String toString() { + return name; + } + }; + + /** + * Array containing all fields/ + */ + private static final String[] ALL_FIELDS = { + "vertexId", "value", "edges" + }; + + static { + PersistentBase.registerFields(GVertex.class, ALL_FIELDS); + } + + /** + * Vertex Id + */ + private Utf8 vertexId; + + /** + * Value + */ + private float value; + + /** + * Edges + */ + private Map edges; + + /** + * Default constructor + */ + public GVertex() { + this(new StateManagerImpl()); + } + + /** + * Constructor + * @param stateManager from which the object will be created. + */ + public GVertex(StateManager stateManager) { + super(stateManager); + edges = new StatefulHashMap(); + } + + /** + * Creates a new instance + * @param stateManager from which the object will be created. + * @return GVertex created + */ + public GVertex newInstance(StateManager stateManager) { + return new GVertex(stateManager); + } + + /** + * Gets the object schema + * @return Schema of the object. + */ + public Schema getSchema() { + return OBJ_SCHEMA; + } + + /** + * Gets field + * @param fieldIndex index of field to be used. + * @return Object from an index. + */ + public Object get(int fieldIndex) { + switch (fieldIndex) { + case 0: + return vertexId; + case 1: + return value; + case 2: + return edges; + default: + throw new AvroRuntimeException("Bad index"); + } + } + + /** + * Puts a value into a field. + * @param fieldIndex index of field used. + * @param fieldValue value of field used. + */ + @SuppressWarnings(value = "unchecked") + public void put(int fieldIndex, Object fieldValue) { + if (isFieldEqual(fieldIndex, fieldValue)) { + return; + } + getStateManager().setDirty(this, fieldIndex); + switch (fieldIndex) { + case 0: + vertexId = (Utf8) fieldValue; break; + case 1: + value = (Float) fieldValue; break; + case 2: + edges = (Map) fieldValue; break; + default: + throw new AvroRuntimeException("Bad index"); + } + } + + /** + * Gets vertexId + * @return Utf8 vertexId + */ + public Utf8 getVertexId() { + return (Utf8) get(0); + } + + /** + * Sets vertexId + * @param value vertexId + */ + public void setVertexId(Utf8 value) { + put(0, value); + } + + /** + * Gets value + * @return String of value. + */ + public float getValue() { + return (Float) get(1); + } + + /** + * Sets value + * @param value . + */ + public void setValue(float value) { + put(1, value); + } + + /** + * Get edges. + * @return Map of edges. + */ + public Map getEdges() { + return (Map) get(2); + } + + /** + * Gets value from edge. + * @param key Edge key. + * @return Utf8 containing the value of edge. + */ + public Utf8 getFromEdges(Utf8 key) { + if (edges == null) { return null; } + return edges.get(key); + } + + /** + * Puts a new edge. + * @param key of new edge. + * @param value of new edge. + */ + public void putToEdges(Utf8 key, Utf8 value) { + getStateManager().setDirty(this, 2); + edges.put(key, value); + } + + /** + * Remove from edges + * @param key of edge to be deleted. + * @return Utf8 containing value of deleted key. + */ + public Utf8 removeFromEdges(Utf8 key) { + if (edges == null) { return null; } + getStateManager().setDirty(this, 2); + return edges.remove(key); + } +} diff --git a/giraph-gora/src/main/java/org/apache/giraph/io/gora/generated/GVertexResult.java b/giraph-gora/src/main/java/org/apache/giraph/io/gora/generated/GVertexResult.java new file mode 100644 index 000000000..2c1952dd7 --- /dev/null +++ b/giraph-gora/src/main/java/org/apache/giraph/io/gora/generated/GVertexResult.java @@ -0,0 +1,280 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.giraph.io.gora.generated; + +import java.util.Map; + +import org.apache.avro.Schema; +import org.apache.avro.AvroRuntimeException; +import org.apache.avro.util.Utf8; +import org.apache.gora.persistency.StateManager; +import org.apache.gora.persistency.StatefulHashMap; +import org.apache.gora.persistency.impl.PersistentBase; +import org.apache.gora.persistency.impl.StateManagerImpl; + +/** + * Example class for defining a Giraph-Vertex. + */ +@SuppressWarnings("all") +public class GVertexResult extends PersistentBase { + /** + * Schema used for the class. + */ + public static final Schema OBJ_SCHEMA = Schema.parse( + "{\"type\":\"record\",\"name\":\"Vertex\"," + + "\"namespace\":\"org.apache.giraph.gora.generated\"," + + "\"fields\":[{\"name\":\"vertexId\",\"type\":\"string\"}," + + "{\"name\":\"value\",\"type\":\"float\"},{\"name\":\"edges\"," + + "\"type\":{\"type\":\"map\",\"values\":\"string\"}}]}"); + + /** + * Field enum + */ + public static enum Field { + /** + * VertexId + */ + VERTEX_ID(0, "vertexId"), + + /** + * Field value + */ + VALUE(1, "value"), + + /** + * Edges + */ + EDGES(2, "edges"); + + /** + * Field index + */ + private int index; + + /** + * Field name + */ + private String name; + + /** + * Field constructor + * @param index of attribute + * @param name of attribute + */ + Field(int index, String name) { + this.index = index; + this.name = name; + } + + /** + * Gets index + * @return int of attribute. + */ + public int getIndex() { + return index; + } + + /** + * Gets name + * @return String of name. + */ + public String getName() { + return name; + } + + /** + * Gets name + * @return String of name. + */ + public String toString() { + return name; + } + }; + + /** + * Array containing all fields/ + */ + private static final String[] ALL_FIELDS = { + "vertexId", "value", "edges" + }; + + static { + PersistentBase.registerFields(GVertexResult.class, ALL_FIELDS); + } + + /** + * Vertex Id + */ + private Utf8 vertexId; + + /** + * Value + */ + private float value; + + /** + * Edges + */ + private Map edges; + + /** + * Default constructor + */ + public GVertexResult() { + this(new StateManagerImpl()); + } + + /** + * Constructor + * @param stateManager from which the object will be created. + */ + public GVertexResult(StateManager stateManager) { + super(stateManager); + edges = new StatefulHashMap(); + } + + /** + * Creates a new instance + * @param stateManager from which the object will be created. + * @return GVertex created + */ + public GVertexResult newInstance(StateManager stateManager) { + return new GVertexResult(stateManager); + } + + /** + * Gets the object schema + * @return Schema of the object. + */ + public Schema getSchema() { + return OBJ_SCHEMA; + } + + /** + * Gets field + * @param fieldIndex index of field to be used. + * @return Object from an index. + */ + public Object get(int fieldIndex) { + switch (fieldIndex) { + case 0: + return vertexId; + case 1: + return value; + case 2: + return edges; + default: + throw new AvroRuntimeException("Bad index"); + } + } + + /** + * Puts a value into a field. + * @param fieldIndex index of field used. + * @param fieldValue value of field used. + */ + @SuppressWarnings(value = "unchecked") + public void put(int fieldIndex, Object fieldValue) { + if (isFieldEqual(fieldIndex, fieldValue)) { + return; + } + getStateManager().setDirty(this, fieldIndex); + switch (fieldIndex) { + case 0: + vertexId = (Utf8) fieldValue; break; + case 1: + value = (Float) fieldValue; break; + case 2: + edges = (Map) fieldValue; break; + default: + throw new AvroRuntimeException("Bad index"); + } + } + + /** + * Gets vertexId + * @return Utf8 vertexId + */ + public Utf8 getVertexId() { + return (Utf8) get(0); + } + + /** + * Sets vertexId + * @param value vertexId + */ + public void setVertexId(Utf8 value) { + put(0, value); + } + + /** + * Gets value + * @return String of value. + */ + public float getValue() { + return (Float) get(1); + } + + /** + * Sets value + * @param value . + */ + public void setValue(float value) { + put(1, value); + } + + /** + * Get edges. + * @return Map of edges. + */ + public Map getEdges() { + return (Map) get(2); + } + + /** + * Gets value from edge. + * @param key Edge key. + * @return Utf8 containing the value of edge. + */ + public Utf8 getFromEdges(Utf8 key) { + if (edges == null) { return null; } + return edges.get(key); + } + + /** + * Puts a new edge. + * @param key of new edge. + * @param value of new edge. + */ + public void putToEdges(Utf8 key, Utf8 value) { + getStateManager().setDirty(this, 2); + edges.put(key, value); + } + + /** + * Remove from edges + * @param key of edge to be deleted. + * @return Utf8 containing value of deleted key. + */ + public Utf8 removeFromEdges(Utf8 key) { + if (edges == null) { return null; } + getStateManager().setDirty(this, 2); + return edges.remove(key); + } +} diff --git a/giraph-gora/src/main/java/org/apache/giraph/io/gora/generated/package-info.java b/giraph-gora/src/main/java/org/apache/giraph/io/gora/generated/package-info.java new file mode 100644 index 000000000..6c218d148 --- /dev/null +++ b/giraph-gora/src/main/java/org/apache/giraph/io/gora/generated/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Gora Input/Output for Giraph + */ +package org.apache.giraph.io.gora.generated; diff --git a/giraph-gora/src/main/java/org/apache/giraph/io/gora/package-info.java b/giraph-gora/src/main/java/org/apache/giraph/io/gora/package-info.java new file mode 100644 index 000000000..3a9b48892 --- /dev/null +++ b/giraph-gora/src/main/java/org/apache/giraph/io/gora/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Gora Input/Output for Giraph + */ +package org.apache.giraph.io.gora; diff --git a/giraph-gora/src/main/java/org/apache/giraph/io/gora/utils/DefaultKeyFactory.java b/giraph-gora/src/main/java/org/apache/giraph/io/gora/utils/DefaultKeyFactory.java new file mode 100644 index 000000000..8d814f5cd --- /dev/null +++ b/giraph-gora/src/main/java/org/apache/giraph/io/gora/utils/DefaultKeyFactory.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.io.gora.utils; + +/** + * Example class for defining a way to construct Gora keys. + * Uses strings as keys inside Gora. + */ +public class DefaultKeyFactory extends KeyFactory { + + /** + * Builds a key from a string parameter. + * @param keyString the key object as a string. + * @return the key object. + */ + @Override + public Object buildKey(String keyString) { + Object key = null; + if (getDataStore() == null) { + throw new RuntimeException( + "DataStore must be defined before using a key Builder."); + } else { + key = getDataStore().newKey(); + // Do specific transformation + key = keyString; + } + return key; + } + +} diff --git a/giraph-gora/src/main/java/org/apache/giraph/io/gora/utils/ExtraGoraInputFormat.java b/giraph-gora/src/main/java/org/apache/giraph/io/gora/utils/ExtraGoraInputFormat.java new file mode 100644 index 000000000..6374a266d --- /dev/null +++ b/giraph-gora/src/main/java/org/apache/giraph/io/gora/utils/ExtraGoraInputFormat.java @@ -0,0 +1,172 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.io.gora.utils; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.gora.mapreduce.GoraInputSplit; +import org.apache.gora.mapreduce.GoraMapReduceUtils; +import org.apache.gora.mapreduce.GoraRecordReader; +import org.apache.gora.persistency.Persistent; +import org.apache.gora.persistency.impl.PersistentBase; +import org.apache.gora.query.PartitionQuery; +import org.apache.gora.query.Query; +import org.apache.gora.store.DataStore; +import org.apache.gora.util.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.InputFormat; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.RecordReader; +import org.apache.hadoop.mapreduce.TaskAttemptContext; + +/** + * InputFormat to fetch the input from Gora data stores. The + * query to fetch the items from the datastore should be prepared and + * set via setQuery(Job, Query), before submitting the job. + * + * Hadoop jobs can be either configured through static + *setInput() methods, or from GoraMapper. + * @param KeyClass. + * @param PersistentClass. + */ +public class ExtraGoraInputFormat + extends InputFormat { + + /** + * String used to map partitioned queries into configuration object. + */ + public static final String QUERY_KEY = "gora.inputformat.query"; + + /** + * Data store to be used. + */ + private DataStore dataStore; + + /** + * Query to be performed. + */ + private Query query; + + /** + * @param split InputSplit to be used. + * @param context JobContext to be used. + * @return RecordReader record reader used inside Hadoop job. + */ + @Override + @SuppressWarnings("unchecked") + public RecordReader createRecordReader(InputSplit split, + TaskAttemptContext context) throws IOException, InterruptedException { + + PartitionQuery partitionQuery = (PartitionQuery) + ((GoraInputSplit) split).getQuery(); + + //setInputPath(partitionQuery, context); + return new GoraRecordReader(partitionQuery, context); + } + + @Override + public List getSplits(JobContext context) throws IOException, + InterruptedException { + List> queries = + getDataStore().getPartitions(getQuery()); + List splits = new ArrayList(queries.size()); + + for (PartitionQuery partQuery : queries) { + splits.add(new GoraInputSplit(context.getConfiguration(), partQuery)); + } + + return splits; + } + + /** + * @return the dataStore + */ + public DataStore getDataStore() { + return dataStore; + } + + /** + * @param datStore the dataStore to set + */ + public void setDataStore(DataStore datStore) { + this.dataStore = datStore; + } + + /** + * @return the query + */ + public Query getQuery() { + return query; + } + + /** + * @param query the query to set + */ + public void setQuery(Query query) { + this.query = query; + } + + /** + * Sets the partitioned query inside the job object. + * @param conf Configuration used. + * @param query Query to be executed. + * @param Key class + * @param Persistent class + * @throws java.io.IOException Exception that be might thrown. + */ + public static void setQuery(Configuration conf, + Query query) throws IOException { + IOUtils.storeToConf(query, conf, QUERY_KEY); + } + + /** + * Gets the partitioned query from the conf object passed. + * @param conf Configuration object. + * @return Query passed inside the configuration object + * @throws java.io.IOException Exception that might be thrown. + */ + public Query getQuery(Configuration conf) throws IOException { + return IOUtils.loadFromConf(conf, QUERY_KEY); + } + + /** + * Sets the input parameters for the job + * @param job the job to set the properties for + * @param query the query to get the inputs from + * @param dataStore the datastore as the input + * @param reuseObjects whether to reuse objects in serialization + * @param Key class + * @param Persistent class + * @throws java.io.IOException + */ + public static void setInput(Job job, + Query query, DataStore dataStore, boolean reuseObjects) + throws IOException { + + Configuration conf = job.getConfiguration(); + + GoraMapReduceUtils.setIOSerializations(conf, reuseObjects); + + job.setInputFormatClass(ExtraGoraInputFormat.class); + ExtraGoraInputFormat.setQuery(job.getConfiguration(), query); + } +} diff --git a/giraph-gora/src/main/java/org/apache/giraph/io/gora/utils/GoraUtils.java b/giraph-gora/src/main/java/org/apache/giraph/io/gora/utils/GoraUtils.java new file mode 100644 index 000000000..c3fc2684d --- /dev/null +++ b/giraph-gora/src/main/java/org/apache/giraph/io/gora/utils/GoraUtils.java @@ -0,0 +1,194 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.io.gora.utils; + +import org.apache.gora.persistency.Persistent; +import org.apache.gora.query.Query; +import org.apache.gora.query.Result; +import org.apache.gora.store.DataStore; +import org.apache.gora.store.DataStoreFactory; +import org.apache.gora.util.GoraException; +import org.apache.hadoop.conf.Configuration; + +/** + * Class used to handle the creation and querying of data stores through Gora. + */ +public class GoraUtils { + + /** + * Attribute handling the specific class to be created. + */ + private static Class DATASTORECLASS; + + /** + * Attribute handling configuration for data stores. + */ + private static Configuration CONF = new Configuration(); + + /** + * The default constructor is set to be private by default so that the + * class is not instantiated. + */ + private GoraUtils() { /* private constructor */ } + + /** + * Creates a generic data store using the data store class. + * set using the class property + * @param key class + * @param value class + * @param keyClass key class used + * @param persistentClass persistent class used + * @return created data store + * @throws GoraException exception threw + */ + @SuppressWarnings("unchecked") + public static DataStore + createDataStore(Class keyClass, Class persistentClass) + throws GoraException { + DataStoreFactory.createProps(); + DataStore dataStore = + DataStoreFactory.createDataStore((Class>) + DATASTORECLASS, + keyClass, persistentClass, + getConf()); + + return dataStore; + } + + /** + * Creates a specific data store specified by. + * @param key class + * @param value class + * @param dataStoreClass Defines the type of data store used. + * @param keyClass Handles the key class to be used. + * @param persistentClass Handles the persistent class to be used. + * @return DataStore created using parameters passed. + * @throws GoraException if an error occurs. + */ + public static DataStore + createSpecificDataStore(Class dataStoreClass, + Class keyClass, Class persistentClass) throws GoraException { + DATASTORECLASS = dataStoreClass; + return createDataStore(keyClass, persistentClass); + } + + /** + * Performs a range query to Gora datastores + * @param key class + * @param value class + * @param pDataStore data store being used. + * @param pStartKey start key for the range query. + * @param pEndKey end key for the range query. + * @return Result containing all results for the query. + */ + public static Result + getRequest(DataStore pDataStore, K pStartKey, K pEndKey) { + Query query = getQuery(pDataStore, pStartKey, pEndKey); + return getRequest(pDataStore, query); + } + + /** + * Performs a query to Gora datastores + * @param pDataStore data store being used. + * @param query query executed over data stores. + * @param key class + * @param value class + * @return Result containing all results for the query. + */ + public static Result + getRequest(DataStore pDataStore, Query query) { + return pDataStore.execute(query); + } + + /** + * Performs a range query to Gora datastores + * @param key class + * @param value class + * @param pDataStore data store being used. + * @param pStartKey start key for the range query. + * @return Result containing all results for the query. + */ + public static Result + getRequest(DataStore pDataStore, K pStartKey) { + return getRequest(pDataStore, pStartKey, null); + } + + /** + * Gets a query object to be used as a range query. + * @param pDataStore data store used. + * @param pStartKey range start key. + * @param pEndKey range end key. + * @param key class + * @param value class + * @return range query object. + */ + public static Query + getQuery(DataStore pDataStore, K pStartKey, K pEndKey) { + Query query = pDataStore.newQuery(); + query.setStartKey(pStartKey); + query.setEndKey(pEndKey); + return query; + } + + /** + * Gets a query object to be used as a simple get. + * @param pDataStore data store used. + * @param pStartKey range start key. + * @param key class + * @param value class + * @return query object. + */ + public static Query + getQuery(DataStore pDataStore, K pStartKey) { + Query query = pDataStore.newQuery(); + query.setStartKey(pStartKey); + query.setEndKey(null); + return query; + } + + /** + * Gets a query object to be used as a simple get. + * @param pDataStore data store used. + * @param key class + * @param value class + * @return query object. + */ + public static Query + getQuery(DataStore pDataStore) { + Query query = pDataStore.newQuery(); + query.setStartKey(null); + query.setEndKey(null); + return query; + } + + /** + * Gets the configuration object. + * @return the configuration object. + */ + public static Configuration getConf() { + return CONF; + } + + /** + * Sets the configuration object. + * @param conf to be set as the configuration object. + */ + public static void setConf(Configuration conf) { + CONF = conf; + } +} diff --git a/giraph-gora/src/main/java/org/apache/giraph/io/gora/utils/KeyFactory.java b/giraph-gora/src/main/java/org/apache/giraph/io/gora/utils/KeyFactory.java new file mode 100644 index 000000000..c8dd4a75a --- /dev/null +++ b/giraph-gora/src/main/java/org/apache/giraph/io/gora/utils/KeyFactory.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.io.gora.utils; + +import org.apache.gora.store.DataStore; + +/** + * Class used to convert strings into more complex keys. + */ +public abstract class KeyFactory { + + /** + * Data store used for creating a new key. + */ + private DataStore dataStore; + + /** + * Builds a key from a string parameter. + * @param keyString the key object as a string. + * @return the key object. + */ + public abstract Object buildKey(String keyString); + + /** + * Gets the data store used in this factory. + * @return the dataStore + */ + public DataStore getDataStore() { + return dataStore; + } + + /** + * Sets the data store used in this factory. + * @param dataStore the dataStore to set + */ + public void setDataStore(DataStore dataStore) { + this.dataStore = dataStore; + } +} diff --git a/giraph-gora/src/main/java/org/apache/giraph/io/gora/utils/package-info.java b/giraph-gora/src/main/java/org/apache/giraph/io/gora/utils/package-info.java new file mode 100644 index 000000000..02dd59f1c --- /dev/null +++ b/giraph-gora/src/main/java/org/apache/giraph/io/gora/utils/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Gora Utils for Giraph + */ +package org.apache.giraph.io.gora.utils; diff --git a/giraph-gora/src/test/java/org/apache/giraph/io/gora/GoraTestEdgeInputFormat.java b/giraph-gora/src/test/java/org/apache/giraph/io/gora/GoraTestEdgeInputFormat.java new file mode 100644 index 000000000..ba71ce405 --- /dev/null +++ b/giraph-gora/src/test/java/org/apache/giraph/io/gora/GoraTestEdgeInputFormat.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.io.gora; + +import java.io.IOException; + +import org.apache.avro.util.Utf8; +import org.apache.giraph.edge.Edge; +import org.apache.giraph.edge.EdgeFactory; +import org.apache.giraph.io.gora.GoraEdgeInputFormat; +import org.apache.giraph.io.gora.generated.GEdge; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.TaskAttemptContext; + +/** + * Implementation of a specific reader for a generated data bean. + */ +public class GoraTestEdgeInputFormat + extends GoraEdgeInputFormat { + + /** + * Default constructor + */ + public GoraTestEdgeInputFormat() { + } + + /** + * Creates specific vertex reader to be used inside Hadoop. + * @param split split to be read. + * @param context JobContext to be used. + * @return GoraEdgeReader Edge reader to be used by Hadoop. + */ + @Override + public GoraEdgeReader createEdgeReader( + InputSplit split, TaskAttemptContext context) throws IOException { + putArtificialData(); + return new GoraGEdgeEdgeReader(); + } + + /** + * Writes data into the data store in order to test it out. + */ + @SuppressWarnings("unchecked") + private static void putArtificialData() { + getDataStore().put("11-22", + createEdge("11-22", "11", "22", "11-22", (float)(11+22))); + getDataStore().put("22-11", + createEdge("22-11", "22", "11", "22-11", (float)(22+11))); + getDataStore().put("11-33", + createEdge("11-33", "11", "33", "11-33", (float)(11+33))); + getDataStore().put("33-11", + createEdge("33-11", "33", "11", "33-11", (float)(33+11))); + getDataStore().flush(); + } + + /** + * Creates an edge using an id and a set of edges. + * @param id Vertex id. + * @param vertexInId Vertex source Id. + * @param vertexOutId Vertex destination Id. + * @param edgeLabel Edge label. + * @param edgeWeight Edge wight. + * @return GEdge created. + */ + private static GEdge createEdge(String id, String vertexInId, + String vertexOutId, String edgeLabel, float edgeWeight) { + GEdge newEdge = new GEdge(); + newEdge.setEdgeId(new Utf8(id)); + newEdge.setVertexInId(new Utf8(vertexInId)); + newEdge.setVertexOutId(new Utf8(vertexOutId)); + newEdge.setLabel(new Utf8(edgeLabel)); + newEdge.setEdgeWeight(edgeWeight); + return newEdge; + } + + /** + * Gora edge reader + */ + protected class GoraGEdgeEdgeReader extends GoraEdgeReader { + + /** source vertex of the edge */ + private LongWritable sourceId; + + /** + * Transforms a GoraObject into an Edge object. + * @param goraObject Object from Gora to be translated. + * @return Edge Result from transforming the gora object. + */ + @Override + protected Edge transformEdge + (Object goraObject) { + Edge edge = null; + GEdge goraEdge = (GEdge) goraObject; + Long dest; + Long value; + dest = Long.valueOf(goraEdge.getVertexOutId().toString()); + this.sourceId = new LongWritable(); + this.sourceId.set(Long.valueOf(goraEdge.getVertexInId().toString())); + value = (long) goraEdge.getEdgeWeight(); + edge = EdgeFactory.create(new LongWritable(dest), + new FloatWritable(value)); + return edge; + } + + /** + * Gets the currentSourceId for the edge. + * @return LongWritable currentSourceId for the edge. + */ + @Override + public LongWritable getCurrentSourceId() throws IOException, + InterruptedException { + return this.sourceId; + } + } +} diff --git a/giraph-gora/src/test/java/org/apache/giraph/io/gora/GoraTestEdgeOutputFormat.java b/giraph-gora/src/test/java/org/apache/giraph/io/gora/GoraTestEdgeOutputFormat.java new file mode 100644 index 000000000..0254498a2 --- /dev/null +++ b/giraph-gora/src/test/java/org/apache/giraph/io/gora/GoraTestEdgeOutputFormat.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.io.gora; + +import java.io.IOException; + +import junit.framework.Assert; + +import org.apache.avro.util.Utf8; +import org.apache.giraph.edge.Edge; +import org.apache.giraph.io.gora.GoraEdgeOutputFormat; +import org.apache.giraph.io.gora.generated.GEdge; +import org.apache.giraph.io.gora.generated.GEdgeResult; +import org.apache.gora.persistency.Persistent; +import org.apache.hadoop.io.DoubleWritable; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapreduce.TaskAttemptContext; + +/** + * Implementation of a specific writer for a generated data bean. + */ +public class GoraTestEdgeOutputFormat + extends GoraEdgeOutputFormat { + + /** + * Default constructor + */ + public GoraTestEdgeOutputFormat() { + } + + @Override + public GoraEdgeWriter createEdgeWriter( + TaskAttemptContext context) throws IOException, InterruptedException { + return new GoraGEdgeEdgeWriter(); + } + + /** + * Gora edge writer. + */ + protected class GoraGEdgeEdgeWriter + extends GoraEdgeWriter { + + @Override + protected Persistent getGoraEdge(LongWritable srcId, + DoubleWritable srcValue, Edge edge) { + GEdgeResult tmpGEdge = new GEdgeResult(); + Utf8 keyLabel = new Utf8(srcId.toString() + "-" + + edge.getTargetVertexId().toString()); + tmpGEdge.setEdgeId(keyLabel); + tmpGEdge.setEdgeWeight(edge.getValue().get()); + tmpGEdge.setVertexInId(new Utf8(srcId.toString())); + tmpGEdge.setVertexOutId(new Utf8(edge.getTargetVertexId().toString())); + tmpGEdge.setLabel(keyLabel); + getLogger().debug("GoraObject created: " + tmpGEdge.toString()); + return tmpGEdge; + } + + @Override + protected Object getGoraKey(LongWritable srcId, + DoubleWritable srcValue, Edge edge) { + String goraKey = String.valueOf( + edge.getTargetVertexId().get() + edge.getValue().get()); + return goraKey; + } + + @Override + public void writeEdge(LongWritable srcId, DoubleWritable srcValue, + Edge edge) + throws IOException, InterruptedException { + super.writeEdge(srcId, srcValue, edge); + Object goraKey = getGoraKey(srcId, srcValue, edge); + String keyLabel = String.valueOf(srcId) + "-" + + String.valueOf(edge.getTargetVertexId()); + float weight = Float.valueOf(srcId.toString()) + + Float.valueOf(edge.getTargetVertexId().toString()); + // Asserting + Assert.assertEquals(createEdge(keyLabel, String.valueOf(srcId), + String.valueOf(edge.getTargetVertexId()),keyLabel, weight), + getDataStore().get(goraKey)); + } + + /** + * Creates an edge using an id and a set of edges. + * @param id Vertex id. + * @param vertexInId Vertex source Id. + * @param vertexOutId Vertex destination Id. + * @param edgeLabel Edge label. + * @param edgeWeight Edge wight. + * @return GEdge created. + */ + private GEdge createEdge(String id, String vertexInId, + String vertexOutId, String edgeLabel, float edgeWeight) { + GEdge newEdge = new GEdge(); + newEdge.setEdgeId(new Utf8(id)); + newEdge.setVertexInId(new Utf8(vertexInId)); + newEdge.setVertexOutId(new Utf8(vertexOutId)); + newEdge.setLabel(new Utf8(edgeLabel)); + newEdge.setEdgeWeight(edgeWeight); + return newEdge; + } + } +} diff --git a/giraph-gora/src/test/java/org/apache/giraph/io/gora/GoraTestVertexInputFormat.java b/giraph-gora/src/test/java/org/apache/giraph/io/gora/GoraTestVertexInputFormat.java new file mode 100644 index 000000000..7de934694 --- /dev/null +++ b/giraph-gora/src/test/java/org/apache/giraph/io/gora/GoraTestVertexInputFormat.java @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.io.gora; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + +import org.apache.avro.util.Utf8; +import org.apache.giraph.edge.Edge; +import org.apache.giraph.edge.EdgeFactory; +import org.apache.giraph.graph.Vertex; +import org.apache.giraph.io.gora.generated.GVertex; +import org.apache.hadoop.io.DoubleWritable; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.TaskAttemptContext; + +/** + * Implementation of a specific reader for a generated data bean. + */ +public class GoraTestVertexInputFormat + extends GoraVertexInputFormat { + + /** + * DEfault constructor + */ + public GoraTestVertexInputFormat() { + } + + /** + * Creates specific vertex reader to be used inside Hadoop. + * @param split split to be read. + * @param context JobContext to be used. + * @return GoraVertexReader Vertex reader to be used by Hadoop. + */ + @Override + public GoraVertexReader createVertexReader( + InputSplit split, TaskAttemptContext context) throws IOException { + putArtificialData(); + return new GoraGVertexVertexReader(); + } + + /** + * Writes data into the data store in order to test it out. + */ + @SuppressWarnings("unchecked") + private static void putArtificialData() { + getDataStore().put("1", createVertex("1", null)); + getDataStore().put("10", createVertex("10", null)); + getDataStore().put("100", createVertex("100", null)); + getDataStore().flush(); + } + + /** + * Creates a vertex using an id and a set of edges. + * @param id Vertex id. + * @param edges Set of edges. + * @return GVertex created. + */ + public static GVertex createVertex(String id, Map edges) { + GVertex newVrtx = new GVertex(); + newVrtx.setVertexId(new Utf8(id)); + if (edges != null) { + for (String edgeId : edges.keySet()) + newVrtx.putToEdges(new Utf8(edgeId), new Utf8(edges.get(edgeId))); + } + return newVrtx; + } + + /** + * Gora vertex reader + */ + protected class GoraGVertexVertexReader extends GoraVertexReader { + + /** + * Transforms a GoraObject into a Vertex object. + * @param goraObject Object from Gora to be translated. + * @return Vertex Result from transforming the gora object. + */ + @Override + protected Vertex + transformVertex(Object goraObject) { + Vertex vertex; + /* create the actual vertex */ + vertex = getConf().createVertex(); + GVertex tmpGVertex = (GVertex) goraObject; + + LongWritable vrtxId = new LongWritable( + Long.parseLong(tmpGVertex.getVertexId().toString())); + DoubleWritable vrtxValue = new DoubleWritable(tmpGVertex.getValue()); + vertex.initialize(vrtxId, vrtxValue); + if (tmpGVertex.getEdges() != null && !tmpGVertex.getEdges().isEmpty()) { + Set keyIt = tmpGVertex.getEdges().keySet(); + for (Utf8 key : keyIt) { + String keyVal = key.toString(); + String valVal = tmpGVertex.getEdges().get(key).toString(); + Edge edge; + if (!keyVal.contains("vertexId")) { + edge = EdgeFactory.create( + new LongWritable(Long.parseLong(keyVal)), + new FloatWritable(Float.parseFloat(valVal))); + vertex.addEdge(edge); + } + } + } + return vertex; + } + } +} diff --git a/giraph-gora/src/test/java/org/apache/giraph/io/gora/GoraTestVertexOutputFormat.java b/giraph-gora/src/test/java/org/apache/giraph/io/gora/GoraTestVertexOutputFormat.java new file mode 100644 index 000000000..5170d031a --- /dev/null +++ b/giraph-gora/src/test/java/org/apache/giraph/io/gora/GoraTestVertexOutputFormat.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.io.gora; + +import java.io.IOException; +import java.util.Iterator; +import java.util.Map; + +import junit.framework.Assert; + +import org.apache.avro.util.Utf8; +import org.apache.giraph.edge.Edge; +import org.apache.giraph.graph.Vertex; +import org.apache.giraph.io.VertexWriter; +import org.apache.giraph.io.gora.generated.GVertex; +import org.apache.giraph.io.gora.generated.GVertexResult; +import org.apache.gora.persistency.Persistent; +import org.apache.hadoop.io.DoubleWritable; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapreduce.TaskAttemptContext; + +/** + * Implementation of a specific reader for a generated data bean. + */ +public class GoraTestVertexOutputFormat + extends GoraVertexOutputFormat { + + /** + * DEfault constructor + */ + public GoraTestVertexOutputFormat() { + } + + @Override + public VertexWriter + createVertexWriter(TaskAttemptContext context) + throws IOException, InterruptedException { + return new GoraGVertexVertexWriter(); + } + + /** + * Gora vertex writer. + */ + protected class GoraGVertexVertexWriter extends GoraVertexWriter { + + @Override + protected Persistent getGoraVertex( + Vertex vertex) { + GVertexResult tmpGVertex = new GVertexResult(); + tmpGVertex.setVertexId(new Utf8(vertex.getId().toString())); + tmpGVertex.setValue(Float.parseFloat(vertex.getValue().toString())); + Iterator> it = + vertex.getEdges().iterator(); + while (it.hasNext()) { + Edge edge = it.next(); + tmpGVertex.putToEdges( + new Utf8(edge.getTargetVertexId().toString()), + new Utf8(edge.getValue().toString())); + } + getLogger().debug("GoraObject created: " + tmpGVertex.toString()); + return tmpGVertex; + } + + @Override + public void writeVertex( + Vertex vertex) + throws IOException, InterruptedException { + super.writeVertex(vertex); + // Asserting + Assert.assertEquals(createVertex(vertex.getId().toString(), null), + getDataStore().get(vertex.getId().toString())); + } + + /** + * Creates a vertex using an id and a set of edges. + * @param id Vertex id. + * @param edges Set of edges. + * @return GVertex created. + */ + public GVertex createVertex(String id, Map edges) { + GVertex newVrtx = new GVertex(); + newVrtx.setVertexId(new Utf8(id)); + if (edges != null) { + for (String edgeId : edges.keySet()) + newVrtx.putToEdges(new Utf8(edgeId), new Utf8(edges.get(edgeId))); + } + return newVrtx; + } + + @Override + protected Object getGoraKey( + Vertex vertex) { + String goraKey = String.valueOf(vertex.getId()); + return goraKey; + } + } +} diff --git a/giraph-gora/src/test/java/org/apache/giraph/io/gora/TestGoraEdgeInputFormat.java b/giraph-gora/src/test/java/org/apache/giraph/io/gora/TestGoraEdgeInputFormat.java new file mode 100644 index 000000000..a01fbd330 --- /dev/null +++ b/giraph-gora/src/test/java/org/apache/giraph/io/gora/TestGoraEdgeInputFormat.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.io.gora; + +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_DATASTORE_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_END_KEY; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_KEYS_FACTORY_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_KEY_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_PERSISTENT_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_START_KEY; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; + +import org.apache.giraph.conf.GiraphConfiguration; +import org.apache.giraph.graph.BasicComputation; +import org.apache.giraph.graph.Vertex; +import org.apache.giraph.io.formats.IdWithValueTextOutputFormat; +import org.apache.giraph.utils.InternalVertexRunner; +import org.apache.hadoop.io.DoubleWritable; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.LongWritable; +import org.junit.Test; +import org.junit.Assert; + +/** + * Test class for Gora vertex input/output formats. + */ +public class TestGoraEdgeInputFormat { + + @Test + public void getEmptyDb() throws Exception { + Iterable results; + Iterator result; + GiraphConfiguration conf = new GiraphConfiguration(); + GIRAPH_GORA_DATASTORE_CLASS. + set(conf, "org.apache.gora.memory.store.MemStore"); + GIRAPH_GORA_KEYS_FACTORY_CLASS. + set(conf,"org.apache.giraph.io.gora.utils.DefaultKeyFactory"); + GIRAPH_GORA_KEY_CLASS.set(conf,"java.lang.String"); + GIRAPH_GORA_PERSISTENT_CLASS. + set(conf,"org.apache.giraph.io.gora.generated.GEdge"); + GIRAPH_GORA_START_KEY.set(conf,"1"); + GIRAPH_GORA_END_KEY.set(conf,"3"); + conf.set("io.serializations", + "org.apache.hadoop.io.serializer.WritableSerialization," + + "org.apache.hadoop.io.serializer.JavaSerialization"); + conf.setComputationClass(EmptyComputation.class); + conf.setEdgeInputFormatClass(GoraGEdgeEdgeInputFormat.class); + results = InternalVertexRunner.run(conf, new String[0], new String[0]); + Assert.assertNotNull(results); + result = results.iterator(); + Assert.assertFalse(result.hasNext()); + } + + @Test + public void getTestDb() throws Exception { + Iterable results; + GiraphConfiguration conf = new GiraphConfiguration(); + GIRAPH_GORA_DATASTORE_CLASS. + set(conf, "org.apache.gora.memory.store.MemStore"); + GIRAPH_GORA_KEYS_FACTORY_CLASS. + set(conf,"org.apache.giraph.io.gora.utils.DefaultKeyFactory"); + GIRAPH_GORA_KEY_CLASS.set(conf,"java.lang.String"); + GIRAPH_GORA_PERSISTENT_CLASS. + set(conf,"org.apache.giraph.io.gora.generated.GEdge"); + GIRAPH_GORA_START_KEY.set(conf,"1"); + GIRAPH_GORA_END_KEY.set(conf,"4"); + conf.set("io.serializations", + "org.apache.hadoop.io.serializer.WritableSerialization," + + "org.apache.hadoop.io.serializer.JavaSerialization"); + conf.setComputationClass(EmptyComputation.class); + conf.setEdgeInputFormatClass(GoraTestEdgeInputFormat.class); + conf.setVertexOutputFormatClass(IdWithValueTextOutputFormat.class); + results = InternalVertexRunner.run(conf, new String[0], new String[0]); + Assert.assertNotNull(results); + Assert.assertEquals(3, ((ArrayList)results).size()); + if (results instanceof Collection + & (((Collection)results).size() == 2)) { + Assert.assertEquals("33\t0.0", + ((ArrayList)results).get(0).toString()); + Assert.assertEquals("22\t0.0", + ((ArrayList)results).get(1).toString()); + Assert.assertEquals("11\t0.0", + ((ArrayList)results).get(2).toString()); + } + } + + /* + Test compute method that sends each edge a notification of its parents. + The test set only has a 1-1 parent-to-child ratio for this unit test. + */ + public static class EmptyComputation + extends BasicComputation { + + @Override + public void compute( + Vertex vertex, + Iterable messages) throws IOException { + Assert.assertNotNull(vertex); + vertex.voteToHalt(); + } + } +} diff --git a/giraph-gora/src/test/java/org/apache/giraph/io/gora/TestGoraEdgeOutputFormat.java b/giraph-gora/src/test/java/org/apache/giraph/io/gora/TestGoraEdgeOutputFormat.java new file mode 100644 index 000000000..c9ac38a50 --- /dev/null +++ b/giraph-gora/src/test/java/org/apache/giraph/io/gora/TestGoraEdgeOutputFormat.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.io.gora; + +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_DATASTORE_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_END_KEY; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_KEYS_FACTORY_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_KEY_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_PERSISTENT_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_START_KEY; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_OUTPUT_DATASTORE_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_OUTPUT_KEY_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_OUTPUT_PERSISTENT_CLASS; + +import java.io.IOException; + +import org.apache.giraph.conf.GiraphConfiguration; +import org.apache.giraph.graph.BasicComputation; +import org.apache.giraph.graph.Vertex; +import org.apache.giraph.utils.InternalVertexRunner; +import org.apache.hadoop.io.DoubleWritable; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.LongWritable; +import org.junit.Assert; +import org.junit.Test; + +/** + * Test class for Gora edge output formats. + */ +public class TestGoraEdgeOutputFormat { + + @Test + public void getWritingDb() throws Exception { + Iterable results; + GiraphConfiguration conf = new GiraphConfiguration(); + // Parameters for input + GIRAPH_GORA_DATASTORE_CLASS. + set(conf, "org.apache.gora.memory.store.MemStore"); + GIRAPH_GORA_KEYS_FACTORY_CLASS. + set(conf,"org.apache.giraph.io.gora.utils.DefaultKeyFactory"); + GIRAPH_GORA_KEY_CLASS.set(conf,"java.lang.String"); + GIRAPH_GORA_PERSISTENT_CLASS. + set(conf,"org.apache.giraph.io.gora.generated.GEdge"); + GIRAPH_GORA_START_KEY.set(conf,"1"); + GIRAPH_GORA_END_KEY.set(conf,"4"); + conf.set("io.serializations", + "org.apache.hadoop.io.serializer.WritableSerialization," + + "org.apache.hadoop.io.serializer.JavaSerialization"); + conf.setComputationClass(EmptyComputation.class); + conf.setEdgeInputFormatClass(GoraTestEdgeInputFormat.class); + // Parameters for output + GIRAPH_GORA_OUTPUT_DATASTORE_CLASS. + set(conf, "org.apache.gora.memory.store.MemStore"); + GIRAPH_GORA_OUTPUT_KEY_CLASS.set(conf, "java.lang.String"); + GIRAPH_GORA_OUTPUT_PERSISTENT_CLASS. + set(conf,"org.apache.giraph.io.gora.generated.GEdge"); + conf.setEdgeOutputFormatClass(GoraTestEdgeOutputFormat.class); + results = InternalVertexRunner.run(conf, new String[0], new String[0]); + Assert.assertNotNull(results); + } + + /* + Test compute method that sends each edge a notification of its parents. + The test set only has a 1-1 parent-to-child ratio for this unit test. + */ + public static class EmptyComputation + extends BasicComputation { + + @Override + public void compute( + Vertex vertex, + Iterable messages) throws IOException { + Assert.assertNotNull(vertex); + vertex.voteToHalt(); + } + } +} diff --git a/giraph-gora/src/test/java/org/apache/giraph/io/gora/TestGoraVertexInputFormat.java b/giraph-gora/src/test/java/org/apache/giraph/io/gora/TestGoraVertexInputFormat.java new file mode 100644 index 000000000..b42ab497c --- /dev/null +++ b/giraph-gora/src/test/java/org/apache/giraph/io/gora/TestGoraVertexInputFormat.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.io.gora; + +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_DATASTORE_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_END_KEY; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_KEYS_FACTORY_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_KEY_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_PERSISTENT_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_START_KEY; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; + +import org.apache.giraph.conf.GiraphConfiguration; +import org.apache.giraph.graph.BasicComputation; +import org.apache.giraph.graph.Vertex; +import org.apache.giraph.io.formats.IdWithValueTextOutputFormat; +import org.apache.giraph.utils.InternalVertexRunner; +import org.apache.hadoop.io.DoubleWritable; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.LongWritable; +import org.junit.Test; +import org.junit.Assert; + +/** + * Test class for Gora vertex input/output formats. + */ +public class TestGoraVertexInputFormat { + + @Test + public void getEmptyDb() throws Exception { + Iterable results; + Iterator result; + GiraphConfiguration conf = new GiraphConfiguration(); + GIRAPH_GORA_DATASTORE_CLASS. + set(conf, "org.apache.gora.memory.store.MemStore"); + GIRAPH_GORA_KEYS_FACTORY_CLASS. + set(conf,"org.apache.giraph.io.gora.utils.DefaultKeyFactory"); + GIRAPH_GORA_KEY_CLASS.set(conf,"java.lang.String"); + GIRAPH_GORA_PERSISTENT_CLASS. + set(conf,"org.apache.giraph.io.gora.generated.GVertex"); + GIRAPH_GORA_START_KEY.set(conf,"1"); + GIRAPH_GORA_END_KEY.set(conf,"10"); + conf.set("io.serializations", + "org.apache.hadoop.io.serializer.WritableSerialization," + + "org.apache.hadoop.io.serializer.JavaSerialization"); + conf.setComputationClass(EmptyComputation.class); + conf.setVertexInputFormatClass(GoraTestVertexInputFormat.class); + results = InternalVertexRunner.run(conf, new String[0], new String[0]); + Assert.assertNotNull(results); + result = results.iterator(); + Assert.assertFalse(result.hasNext()); + } + + @Test + public void getTestDb() throws Exception { + Iterable results; + GiraphConfiguration conf = new GiraphConfiguration(); + GIRAPH_GORA_DATASTORE_CLASS. + set(conf, "org.apache.gora.memory.store.MemStore"); + GIRAPH_GORA_KEYS_FACTORY_CLASS. + set(conf,"org.apache.giraph.io.gora.utils.DefaultKeyFactory"); + GIRAPH_GORA_KEY_CLASS.set(conf,"java.lang.String"); + GIRAPH_GORA_PERSISTENT_CLASS. + set(conf,"org.apache.giraph.io.gora.generated.GVertex"); + GIRAPH_GORA_START_KEY.set(conf,"1"); + GIRAPH_GORA_END_KEY.set(conf,"100"); + conf.set("io.serializations", + "org.apache.hadoop.io.serializer.WritableSerialization," + + "org.apache.hadoop.io.serializer.JavaSerialization"); + conf.setComputationClass(EmptyComputation.class); + conf.setVertexInputFormatClass(GoraTestVertexInputFormat.class); + conf.setVertexOutputFormatClass(IdWithValueTextOutputFormat.class); + results = InternalVertexRunner.run(conf, new String[0], new String[0]); + Assert.assertNotNull(results); + Assert.assertEquals(3, ((ArrayList)results).size()); + if (results instanceof Collection + & (((Collection)results).size() == 3)) { + Assert.assertEquals("10\t0.0", + ((ArrayList)results).get(0).toString()); + Assert.assertEquals("1\t0.0", + ((ArrayList)results).get(1).toString()); + Assert.assertEquals("100\t0.0", + ((ArrayList)results).get(2).toString()); + } + } + + /* + Test compute method that sends each edge a notification of its parents. + The test set only has a 1-1 parent-to-child ratio for this unit test. + */ + public static class EmptyComputation + extends BasicComputation { + + @Override + public void compute( + Vertex vertex, + Iterable messages) throws IOException { + vertex.voteToHalt(); + } + } +} diff --git a/giraph-gora/src/test/java/org/apache/giraph/io/gora/TestGoraVertexOutputFormat.java b/giraph-gora/src/test/java/org/apache/giraph/io/gora/TestGoraVertexOutputFormat.java new file mode 100644 index 000000000..2c2cb5e9d --- /dev/null +++ b/giraph-gora/src/test/java/org/apache/giraph/io/gora/TestGoraVertexOutputFormat.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.io.gora; + +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_DATASTORE_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_END_KEY; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_KEYS_FACTORY_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_KEY_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_PERSISTENT_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_START_KEY; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_OUTPUT_DATASTORE_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_OUTPUT_KEY_CLASS; +import static org.apache.giraph.io.gora.constants.GiraphGoraConstants.GIRAPH_GORA_OUTPUT_PERSISTENT_CLASS; + +import org.apache.giraph.conf.GiraphConfiguration; +import org.apache.giraph.io.gora.TestGoraVertexInputFormat.EmptyComputation; +import org.apache.giraph.utils.InternalVertexRunner; +import org.junit.Assert; +import org.junit.Test; + +/** + * Test class for Gora vertex output formats. + */ +public class TestGoraVertexOutputFormat { + + @Test + public void getWritingDb() throws Exception { + Iterable results; + GiraphConfiguration conf = new GiraphConfiguration(); + GIRAPH_GORA_DATASTORE_CLASS. + set(conf, "org.apache.gora.memory.store.MemStore"); + GIRAPH_GORA_KEYS_FACTORY_CLASS. + set(conf,"org.apache.giraph.io.gora.utils.DefaultKeyFactory"); + GIRAPH_GORA_KEY_CLASS.set(conf,"java.lang.String"); + GIRAPH_GORA_PERSISTENT_CLASS. + set(conf,"org.apache.giraph.io.gora.generated.GVertex"); + GIRAPH_GORA_START_KEY.set(conf,"1"); + GIRAPH_GORA_END_KEY.set(conf,"10"); + GIRAPH_GORA_OUTPUT_DATASTORE_CLASS. + set(conf, "org.apache.gora.memory.store.MemStore"); + GIRAPH_GORA_OUTPUT_KEY_CLASS.set(conf, "java.lang.String"); + GIRAPH_GORA_OUTPUT_PERSISTENT_CLASS. + set(conf, "org.apache.giraph.io.gora.generated.GVertex"); + conf.set("io.serializations", + "org.apache.hadoop.io.serializer.WritableSerialization," + + "org.apache.hadoop.io.serializer.JavaSerialization"); + conf.setComputationClass(EmptyComputation.class); + conf.setVertexInputFormatClass(GoraTestVertexInputFormat.class); + // Parameters for output + GIRAPH_GORA_OUTPUT_DATASTORE_CLASS. + set(conf, "org.apache.gora.memory.store.MemStore"); + GIRAPH_GORA_OUTPUT_KEY_CLASS.set(conf, "java.lang.String"); + GIRAPH_GORA_OUTPUT_PERSISTENT_CLASS. + set(conf,"org.apache.giraph.io.gora.generated.GVertex"); + conf.setVertexOutputFormatClass(GoraTestVertexOutputFormat.class); + results = InternalVertexRunner.run(conf, new String[0], new String[0]); + Assert.assertNotNull(results); + } +} diff --git a/giraph-rexster/pom.xml b/giraph-rexster/pom.xml new file mode 100644 index 000000000..d060af9f3 --- /dev/null +++ b/giraph-rexster/pom.xml @@ -0,0 +1,143 @@ + + + + 4.0.0 + + + org.apache.giraph + giraph-parent + 1.1.0-SNAPSHOT + + giraph-rexster + jar + + Apache Giraph Rexster I/O + http://giraph.apache.org/giraph-rexster/ + Giraph Rexster input/output classes + + + ${project.basedir}/.. + + + + + + ${project.basedir}/src/test/resources + + + + + org.apache.maven.plugins + maven-assembly-plugin + + + org.apache.maven.plugins + maven-checkstyle-plugin + + + org.apache.maven.plugins + maven-jar-plugin + + + org.apache.maven.plugins + maven-javadoc-plugin + + + org.apache.maven.plugins + maven-site-plugin + + ${project.basedir}/src/site + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.6 + + + + prop.jarLocation + ${top.dir}/giraph-core/target/giraph-${project.version}-${forHadoop}-jar-with-dependencies.jar + + + + + + org.codehaus.mojo + findbugs-maven-plugin + + + + + + + + org.apache.giraph + giraph-core + + + org.json + json + + + org.slf4j + slf4j-api + + + org.slf4j + slf4j-log4j12 + + + commons-codec + commons-codec + 1.8 + + + org.apache.giraph + giraph-core + test-jar + + + + + junit + junit + test + + + org.mockito + mockito-all + test + + + com.tinkerpop.rexster + rexster-core + 2.3.0 + test + + + com.tinkerpop.rexster + rexster-server + 2.3.0 + test + + + diff --git a/giraph-rexster/src/main/assembly/compile.xml b/giraph-rexster/src/main/assembly/compile.xml new file mode 100644 index 000000000..fcaffa632 --- /dev/null +++ b/giraph-rexster/src/main/assembly/compile.xml @@ -0,0 +1,39 @@ + + + jar-with-dependencies + + jar + + false + + + + true + / + + + META-INF/LICENSE + + + true + runtime + + + diff --git a/giraph-rexster/src/main/java/org/apache/giraph/rexster/conf/GiraphRexsterConstants.java b/giraph-rexster/src/main/java/org/apache/giraph/rexster/conf/GiraphRexsterConstants.java new file mode 100644 index 000000000..c4b465582 --- /dev/null +++ b/giraph-rexster/src/main/java/org/apache/giraph/rexster/conf/GiraphRexsterConstants.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.giraph.rexster.conf; + +import org.apache.giraph.conf.StrConfOption; +import org.apache.giraph.conf.IntConfOption; +import org.apache.giraph.conf.BooleanConfOption; + +/** + * Constants used all over Giraph for configuration specific for Rexster + * REST API. + */ +// CHECKSTYLE: stop InterfaceIsTypeCheck +public interface GiraphRexsterConstants { + /** Rexster hostname which provides the REST API. */ + StrConfOption GIRAPH_REXSTER_HOSTNAME = + new StrConfOption("giraph.input.rexster.hostname", null, + "Rexster hostname which provides the REST API. " + + "- required"); + /** Rexster port where to contact the REST API. */ + IntConfOption GIRAPH_REXSTER_PORT = + new IntConfOption("giraph.input.rexster.port", 8182, + "Rexster port where to contact the REST API."); + /** Rexster flag to set the connection over SSL instaed of clear-text. */ + BooleanConfOption GIRAPH_REXSTER_USES_SSL = + new BooleanConfOption("giraph.input.rexster.ssl", false, + "Rexster flag to set the connection over SSL " + + "instaed of clear-text."); + /** Rexster graph. */ + StrConfOption GIRAPH_REXSTER_GRAPH = + new StrConfOption("giraph.input.rexster.graph", "graphdb", + "Rexster graph."); + /** Rexster number of estimated vertices in the graph to be loaded. */ + IntConfOption GIRAPH_REXSTER_V_ESTIMATE = + new IntConfOption("giraph.input.rexster.vertices", 1000, + "Rexster number of estimated vertices in the " + + "graph to be loaded."); + /** Rexster number of estimated edges in the graph to be loaded. */ + IntConfOption GIRAPH_REXSTER_E_ESTIMATE = + new IntConfOption("giraph.input.rexster.edges", 1000, + "Rexster number of estimated vertices in the " + + "graph to be loaded."); + /** Rexster username to access the REST API. */ + StrConfOption GIRAPH_REXSTER_USERNAME = + new StrConfOption("giraph.input.rexster.username", "", + "Rexster username to access the REST API."); + /** Rexster password to access the REST API. */ + StrConfOption GIRAPH_REXSTER_PASSWORD = + new StrConfOption("giraph.input.rexster.password", "", + "Rexster password to access the REST API."); + /** If the database is Gremlin enabled, the script will be used to retrieve + the vertices from the Rexster exposed database. */ + StrConfOption GIRAPH_REXSTER_GREMLIN_V_SCRIPT = + new StrConfOption("giraph.input.rexster.vertices.gremlinScript", "", + "If the database is Gremlin enabled, the script will " + + "be used to retrieve the vertices from the Rexster " + + "exposed database."); + /** If the database is Gremlin enabled, the script will be used to retrieve + the edges from the Rexster exposed database. */ + StrConfOption GIRAPH_REXSTER_GREMLIN_E_SCRIPT = + new StrConfOption("giraph.input.rexster.edges.gremlinScript", "", + "If the database is Gremlin enabled, the script will " + + "be used to retrieve the edges from the Rexster " + + "exposed database."); +} diff --git a/giraph-rexster/src/main/java/org/apache/giraph/rexster/conf/package-info.java b/giraph-rexster/src/main/java/org/apache/giraph/rexster/conf/package-info.java new file mode 100644 index 000000000..b4917bc0c --- /dev/null +++ b/giraph-rexster/src/main/java/org/apache/giraph/rexster/conf/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Package of configuration information for Rexster in Giraph. + */ +package org.apache.giraph.rexster.conf; diff --git a/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/RexsterEdgeInputFormat.java b/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/RexsterEdgeInputFormat.java new file mode 100644 index 000000000..4dee078b4 --- /dev/null +++ b/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/RexsterEdgeInputFormat.java @@ -0,0 +1,180 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.giraph.rexster.io; + +import static org.apache.giraph.rexster.conf.GiraphRexsterConstants.GIRAPH_REXSTER_E_ESTIMATE; + +import java.io.BufferedReader; +import java.io.IOException; +import java.util.List; + +import org.apache.giraph.edge.Edge; +import org.apache.giraph.io.EdgeInputFormat; +import org.apache.giraph.io.EdgeReader; +import org.apache.giraph.rexster.utils.RexsterUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.log4j.Logger; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONTokener; + +/** + * Abstract class that users should subclass to use their own Rexster based + * vertex input format. This class was inspired by the Rexster Input format + * available in Faunus authored by Stephen Mallette. + * @param Vertex id + * @param Edge data + */ +public abstract class RexsterEdgeInputFormat extends EdgeInputFormat { + + /** Class logger. */ + private static final Logger LOG = + Logger.getLogger(RexsterEdgeInputFormat.class); + + /** + * @param conf configuration parameters + */ + public void checkInputSpecs(Configuration conf) { } + + @Override + public List getSplits(JobContext context, int minSplitCountHint) + throws IOException, InterruptedException { + + return RexsterUtils.getSplits(context, + GIRAPH_REXSTER_E_ESTIMATE.get(getConf())); + } + + @Override + public abstract RexsterEdgeReader createEdgeReader(InputSplit split, + TaskAttemptContext context) throws IOException; + + /** + * Abstract class to be implemented by the user based on their specific + * vertex input. Easiest to ignore the key value separator and only use + * key instead. + */ + protected abstract class RexsterEdgeReader extends EdgeReader { + + /** Input stream from the HTTP connection to the REST endpoint */ + private BufferedReader rexsterBufferedStream; + /** JSON parser/tokenizer object */ + private JSONTokener tokener; + /** start index of the Rexster paging */ + private long splitStart; + /** end index of the Rexster paging */ + private long splitEnd; + /** number of iterated items */ + private long itemsIterated = 0; + /** current edge obtained from Rexster */ + private Edge edge; + + @Override + public void initialize(InputSplit inputSplit, TaskAttemptContext context) + throws IOException, InterruptedException { + + final RexsterInputSplit rexsterInputSplit = + (RexsterInputSplit) inputSplit; + + this.splitEnd = rexsterInputSplit.getEnd(); + this.splitStart = rexsterInputSplit.getStart(); + + this.rexsterBufferedStream = + RexsterUtils.Edge.openRexsterStream(getConf(), + this.splitStart, this.splitEnd); + + this.tokener = RexsterUtils.parseJSONEnvelope(this.rexsterBufferedStream); + } + + @Override + public void close() throws IOException { + this.rexsterBufferedStream.close(); + } + + @Override + public float getProgress() throws IOException, InterruptedException { + final float estimated = GIRAPH_REXSTER_E_ESTIMATE.get(getConf()); + + if (this.splitStart == this.splitEnd) { + return 0.0f; + } else { + /* assuming you got the estimate right this progress should be + pretty close; */ + return Math.min(1.0f, this.itemsIterated / (float) estimated); + } + } + + @Override + public Edge getCurrentEdge() + throws IOException, InterruptedException { + + return this.edge; + } + + @Override + public boolean nextEdge() throws IOException, InterruptedException { + try { + JSONObject obj; + char c; + + /* if the tokener was not set, no objects are in fact available */ + if (this.tokener == null) { + return false; + } + + obj = new JSONObject(this.tokener); + this.edge = parseEdge(obj); + + c = tokener.nextClean(); + if (c == RexsterUtils.ARRAY_SEPARATOR) { + itemsIterated += 1; + return true; + } else if (c == RexsterUtils.END_ARRAY) { + return false; + } else { + LOG.error(String.format("Expected a '%c' at the end of the array", + RexsterUtils.END_ARRAY)); + throw new InterruptedException(); + } + + } catch (JSONException e) { + LOG.error(e.toString()); + return false; + } + } + + /** + * Parser for a single edge JSON object + * + * @param jsonEdge edge represented as JSON object + * @return The edge object associated with the JSON object + */ + protected abstract Edge parseEdge(JSONObject jsonEdge) + throws JSONException; + + @Override + public abstract I getCurrentSourceId() + throws IOException, InterruptedException; + } +} diff --git a/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/RexsterInputSplit.java b/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/RexsterInputSplit.java new file mode 100644 index 000000000..ed6fee93c --- /dev/null +++ b/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/RexsterInputSplit.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.giraph.rexster.io; + +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.mapreduce.InputSplit; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +/** + * A InputSplit that spans a set of vertices. This code is taken from the + * Faunus project and was originally authored by Stephen Mallette. + */ +public class RexsterInputSplit extends InputSplit implements Writable { + /** End index for the Rexster paging */ + private long end = 0; + /** Start index for the Rexster paging */ + private long start = 0; + + /** + * Default constructor. + */ + public RexsterInputSplit() { + } + + /** + * Overloaded constructor + * @param start start of the paging provided by Rexster + * @param end end of the paging provided by Rexster + */ + public RexsterInputSplit(long start, long end) { + this.start = start; + this.end = end; + } + + /** + * Stub function returning empty list of locations + * @return String[] array of locations + * @throws java.io.IOException + */ + public String[] getLocations() { + return new String[]{}; + } + + /** + * Get the start of the paging. + * @return long start of the paging + */ + public long getStart() { + return start; + } + + /** + * Get the end of the paging. + * @return long end of the paging + */ + public long getEnd() { + return end; + } + + /** + * Get the length of the paging + * @return long length of the page + */ + public long getLength() { + return end - start; + } + + /** + * + * @param input data input from where to unserialize + * @throws java.io.IOException + */ + public void readFields(DataInput input) throws IOException { + start = input.readLong(); + end = input.readLong(); + } + + /** + * + * @param output data output where to serialize + * @throws java.io.IOException + */ + public void write(DataOutput output) throws IOException { + output.writeLong(start); + output.writeLong(end); + } + + @Override + public String toString() { + return String.format("Split at [%s to %s]", this.start, + this.end == Long.MAX_VALUE ? "END" : this.end - 1); + } +} diff --git a/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/RexsterVertexInputFormat.java b/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/RexsterVertexInputFormat.java new file mode 100644 index 000000000..0cc87fff4 --- /dev/null +++ b/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/RexsterVertexInputFormat.java @@ -0,0 +1,193 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.giraph.rexster.io; + +import java.io.BufferedReader; +import java.io.IOException; +import java.util.List; + +import org.apache.giraph.graph.Vertex; +import org.apache.giraph.io.VertexInputFormat; +import org.apache.giraph.io.VertexReader; +import org.apache.giraph.rexster.utils.RexsterUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.log4j.Logger; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONTokener; + +import static org.apache.giraph.rexster.conf.GiraphRexsterConstants.GIRAPH_REXSTER_V_ESTIMATE; + +/** + * Abstract class that users should subclass to use their own Rexster based + * vertex input format. This class was inspired by the Rexster Input format + * available in Faunus authored by Stephen Mallette. + * + * @param + * @param + * @param + */ +public abstract class RexsterVertexInputFormat + extends VertexInputFormat { + + /** Class logger. */ + private static final Logger LOG = + Logger.getLogger(RexsterVertexInputFormat.class); + + /** + * @param conf configuration parameters + */ + public void checkInputSpecs(Configuration conf) { } + + /** + * Create a vertex reader for a given split. Guaranteed to have been + * configured with setConf() prior to use. The framework will also call + * {@link org.apache.giraph.io.VertexReader#initialize(org.apache.hadoop.mapreduce.InputSplit, org.apache.hadoop.mapreduce.TaskAttemptContext)} before + * the split is used. + * + * @param split the split to be read + * @param context the information about the task + * @return a new record reader + * @throws java.io.IOException + */ + public abstract RexsterVertexReader createVertexReader(InputSplit split, + TaskAttemptContext context) throws IOException; + + @Override + public List getSplits(JobContext context, int minSplitCountHint) + throws IOException, InterruptedException { + + return RexsterUtils.getSplits(context, + GIRAPH_REXSTER_V_ESTIMATE.get(getConf())); + } + + /** + * Abstract class to be implemented by the user based on their specific + * vertex input. Easiest to ignore the key value separator and only use + * key instead. + */ + protected abstract class RexsterVertexReader extends VertexReader { + + /** Input stream from the HTTP connection to the REST endpoint */ + private BufferedReader rexsterBufferedStream; + /** JSON parser/tokenizer object */ + private JSONTokener tokener; + /** start index of the Rexster paging */ + private long splitStart; + /** end index of the Rexster paging */ + private long splitEnd; + /** index to access the iterated vertices */ + private long itemsIterated = 0; + /** current vertex */ + private Vertex vertex; + + @Override + public void initialize(InputSplit inputSplit, TaskAttemptContext context) + throws IOException, InterruptedException { + + final RexsterInputSplit rexsterInputSplit = + (RexsterInputSplit) inputSplit; + + this.splitEnd = rexsterInputSplit.getEnd(); + this.splitStart = rexsterInputSplit.getStart(); + + this.rexsterBufferedStream = + RexsterUtils.Vertex.openRexsterStream(getConf(), + this.splitStart, this.splitEnd); + + this.tokener = RexsterUtils.parseJSONEnvelope(this.rexsterBufferedStream); + } + + @Override + public boolean nextVertex() + throws IOException, InterruptedException { + + try { + JSONObject obj; + char c; + + /* if the tokener was not set, no objects are in fact available */ + if (this.tokener == null) { + return false; + } + + obj = new JSONObject(this.tokener); + this.vertex = parseVertex(obj); + + c = this.tokener.nextClean(); + if (c == RexsterUtils.ARRAY_SEPARATOR) { + itemsIterated += 1; + return true; + } else if (c == RexsterUtils.END_ARRAY) { + return false; + } else { + LOG.error(String.format("Expected a '%c' at the end of the array", + RexsterUtils.END_ARRAY)); + throw new InterruptedException( + String.format("Expected a '%c' at the end of the array", + RexsterUtils.END_ARRAY)); + } + } catch (JSONException e) { + /* this in case of empty results */ + LOG.error(e.toString()); + return false; + } + } + + @Override + public void close() throws IOException { + this.rexsterBufferedStream.close(); + } + + @Override + public float getProgress() throws IOException, InterruptedException { + final float vestimated = GIRAPH_REXSTER_V_ESTIMATE.get(getConf()); + + if (this.splitStart == this.splitEnd) { + return 0.0f; + } else { + // assuming you got the estimate right this progress should be + // pretty close; + return Math.min(1.0f, this.itemsIterated / (float) vestimated); + } + } + + @Override + public Vertex getCurrentVertex() + throws IOException, InterruptedException { + + return this.vertex; + } + + /** + * Parser for a single vertex JSON object + * + * @param jsonVertex vertex represented as JSON object + * @return The vertex object represented by the JSON object + */ + protected abstract Vertex parseVertex(JSONObject jsonVertex) + throws JSONException; + } +} diff --git a/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/formats/RexsterLongDoubleFloatVertexInputFormat.java b/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/formats/RexsterLongDoubleFloatVertexInputFormat.java new file mode 100644 index 000000000..03681c1aa --- /dev/null +++ b/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/formats/RexsterLongDoubleFloatVertexInputFormat.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.giraph.rexster.io.formats; + +import java.io.IOException; + +import org.apache.giraph.graph.Vertex; +import org.apache.giraph.rexster.io.RexsterVertexInputFormat; +import org.apache.hadoop.io.DoubleWritable; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.json.JSONException; +import org.json.JSONObject; + +/** + * Rexster Edge Input Format for Long vertex ID's and Float edge values + */ +public class RexsterLongDoubleFloatVertexInputFormat + extends RexsterVertexInputFormat { + + @Override + public RexsterVertexReader createVertexReader( + InputSplit split, TaskAttemptContext context) throws IOException { + + return new RexsterLongDoubleFloatVertexReader(); + } + + /** + * Rexster vertex reader + */ + protected class RexsterLongDoubleFloatVertexReader + extends RexsterVertexReader { + + @Override + protected Vertex parseVertex( + JSONObject jsonVertex) throws JSONException { + + Vertex vertex; + Long id; + + /* create the actual vertex */ + vertex = getConf().createVertex(); + id = jsonVertex.getLong("_id"); + vertex.initialize(new LongWritable(id), + new DoubleWritable(0)); + return vertex; + } + } +} diff --git a/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/formats/RexsterLongFloatEdgeInputFormat.java b/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/formats/RexsterLongFloatEdgeInputFormat.java new file mode 100644 index 000000000..b2d43afd6 --- /dev/null +++ b/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/formats/RexsterLongFloatEdgeInputFormat.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.giraph.rexster.io.formats; + +import java.io.IOException; + +import org.apache.giraph.edge.Edge; +import org.apache.giraph.edge.EdgeFactory; +import org.apache.giraph.rexster.io.RexsterEdgeInputFormat; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.json.JSONException; +import org.json.JSONObject; + +/** + * Rexster Edge Input Format for Long vertex ID's and Float edge values + */ +public class RexsterLongFloatEdgeInputFormat + extends RexsterEdgeInputFormat { + + @Override + public RexsterEdgeReader createEdgeReader( + InputSplit split, TaskAttemptContext context) throws IOException { + + return new RexsterLongFloatEdgeReader(); + } + + /** + * Rexster edge reader + */ + protected class RexsterLongFloatEdgeReader extends RexsterEdgeReader { + + /** source vertex of the edge */ + private LongWritable sourceId; + + @Override + public LongWritable getCurrentSourceId() throws IOException, + InterruptedException { + + return this.sourceId; + } + + @Override + protected Edge parseEdge(JSONObject jsonEdge) + throws JSONException { + + Edge edge = null; + Long dest; + Long value; + + value = jsonEdge.getLong("weight"); + dest = jsonEdge.getLong("_outV"); + edge = EdgeFactory.create(new LongWritable(dest), + new FloatWritable(value)); + this.sourceId = new LongWritable(jsonEdge.getLong("_inV")); + + return edge; + } + } +} diff --git a/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/formats/package-info.java b/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/formats/package-info.java new file mode 100644 index 000000000..b5ae44f59 --- /dev/null +++ b/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/formats/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Package of reusable Input/Output formats for Rexster in Giraph. + */ +package org.apache.giraph.rexster.io.formats; diff --git a/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/package-info.java b/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/package-info.java new file mode 100644 index 000000000..bbd5a7f0f --- /dev/null +++ b/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Package of reusable Input/Output formats for Rexster in Giraph. + */ +package org.apache.giraph.rexster.io; diff --git a/giraph-rexster/src/main/java/org/apache/giraph/rexster/utils/RexsterUtils.java b/giraph-rexster/src/main/java/org/apache/giraph/rexster/utils/RexsterUtils.java new file mode 100644 index 000000000..e669ca93f --- /dev/null +++ b/giraph-rexster/src/main/java/org/apache/giraph/rexster/utils/RexsterUtils.java @@ -0,0 +1,361 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.giraph.rexster.utils; + +import static org.apache.giraph.rexster.conf.GiraphRexsterConstants.GIRAPH_REXSTER_GRAPH; +import static org.apache.giraph.rexster.conf.GiraphRexsterConstants.GIRAPH_REXSTER_GREMLIN_E_SCRIPT; +import static org.apache.giraph.rexster.conf.GiraphRexsterConstants.GIRAPH_REXSTER_GREMLIN_V_SCRIPT; +import static org.apache.giraph.rexster.conf.GiraphRexsterConstants.GIRAPH_REXSTER_HOSTNAME; +import static org.apache.giraph.rexster.conf.GiraphRexsterConstants.GIRAPH_REXSTER_PASSWORD; +import static org.apache.giraph.rexster.conf.GiraphRexsterConstants.GIRAPH_REXSTER_PORT; +import static org.apache.giraph.rexster.conf.GiraphRexsterConstants.GIRAPH_REXSTER_USERNAME; +import static org.apache.giraph.rexster.conf.GiraphRexsterConstants.GIRAPH_REXSTER_USES_SSL; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.net.HttpURLConnection; +import java.net.URL; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.codec.binary.Base64; +import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration; +import org.apache.giraph.rexster.io.RexsterInputSplit; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.log4j.Logger; +import org.json.JSONException; +import org.json.JSONTokener; + +/** + * Utility functions for the Rexster REST interface + */ +public abstract class RexsterUtils { + /** start object symbol for JSON */ + public static final char KEY_VALUE_SEPARATOR = ':'; + /** start object symbol for JSON */ + public static final char START_OBJECT = '{'; + /** end object symbol for JSON */ + public static final char END_OBJECT = '}'; + /** start array symbol for JSON */ + public static final char START_ARRAY = '['; + /** end array symbol for JSON */ + public static final char END_ARRAY = ']'; + /** array elements separator symbol for JSON */ + public static final char ARRAY_SEPARATOR = ','; + /** Class logger. */ + private static final Logger LOG = Logger.getLogger(RexsterUtils.class); + + /** + * The default constructor is set to be private by default so that the + * class is not instantiated. + */ + private RexsterUtils() { /* private constructor */ } + + /** + * Parse all the vertices from the JSON retreived from Rexster. Inspired + * by the implementation of the JSONObject class. + * + * @param br buffer over the HTTP response content + * @return JSONTokener tokener over the HTTP JSON. Null in case the results + * array is empty. + */ + public static JSONTokener parseJSONEnvelope(BufferedReader br) + throws InterruptedException { + + JSONTokener tokener = null; + + try { + char c; + String key = null; + + tokener = new JSONTokener(br); + /* check that the JSON is well-formed by starting with a '{' */ + if (tokener.nextClean() != START_OBJECT) { + LOG.error( + String.format("A JSONObject text must begin with '%c'", + START_OBJECT)); + } + + /* loop on the whole array */ + for (;;) { + c = tokener.nextClean(); + switch (c) { + case 0: + LOG.error(String.format("A JSONObject text must end with '%c'", + END_OBJECT)); + break; + case END_OBJECT: + return tokener; + default: + tokener.back(); + key = tokener.nextValue().toString(); + } + + c = tokener.nextClean(); + + if (c != KEY_VALUE_SEPARATOR) { + LOG.error(String.format("Expected a %c after a key", c)); + } + + if (key != null && !key.equals("results")) { + tokener.nextValue(); + } else { + /* starting array */ + c = tokener.nextClean(); + if (c != START_ARRAY) { + LOG.error("'results' is expected to be an array"); + } + + /* check if the array is emty. If so, return null to signal that + no objects are available in the array, otherwise return the + tokener. */ + c = tokener.nextClean(); + if (c == END_ARRAY) { + return null; + } else { + tokener.back(); + return tokener; + } + } + + switch (tokener.nextClean()) { + case ';': + case ',': + if (tokener.nextClean() == '}') { + return tokener; + } + tokener.back(); + break; + case '}': + return tokener; + default: + LOG.error("Expected a ',' or '}'"); + } + } + + } catch (JSONException e) { + LOG.error("Unable to parse the JSON with the vertices.\n" + + e.getMessage()); + throw new InterruptedException(e.toString()); + } + } + + /** + * Splitter used by both Vertex and Edge Input Format. + * + * @param context The job context + * @param estimation Number of estimated objects + * @return splits to be generated to read the input + */ + public static List getSplits(JobContext context, + long estimation) throws IOException, InterruptedException { + + final int chunks = context.getConfiguration().getInt("mapred.map.tasks", 1); + final long chunkSize = estimation / chunks; + final List splits = new ArrayList(); + + if (LOG.isDebugEnabled()) { + LOG.debug(String.format("Estimated objects: %d", estimation)); + LOG.debug(String.format("Number of chunks: %d", chunks)); + } + + for (int i = 0; i < chunks; ++i) { + final RexsterInputSplit split; + final long start; + final long end; + + start = i * chunkSize; + end = ((i + 1) == chunks) ? Long.MAX_VALUE : + (i * chunkSize) + chunkSize; + split = new RexsterInputSplit(start, end); + splits.add(split); + + if (LOG.isDebugEnabled()) { + LOG.debug(String.format("Chunk: start %d; end %d;", start, end)); + LOG.debug(String.format("Chunk: size %d;", chunkSize)); + LOG.debug(split); + } + } + + return splits; + } + + /** + * Opens an HTTP connection to the specified Rexster server. + * + * @param conf giraph configuration + * @param start start index of the Rexster page split + * @param end end index of the Rexster page split + * @param urlSuffix stream type (vertices or edges) needed for the + * REST Url + * @param gremlinScript gremlin script. If set to null, will be ignored. + * @return BufferedReader the object used to retrieve the HTTP response + * content + */ + // CHECKSTYLE: stop IllegalCatch + protected static BufferedReader openRexsterStream( + ImmutableClassesGiraphConfiguration conf, + long start, long end, String urlSuffix, String gremlinScript) + throws InterruptedException { + + final String uriScriptFormat = + "/graphs/%s/tp/gremlin?script=%s" + + "&rexster.offset.start=%s&rexster.offset.end=%s"; + final String uriFormat = + "/graphs/%s/%s/" + + "?rexster.offset.start=%s&rexster.offset.end=%s"; + + final String endpoint = GIRAPH_REXSTER_HOSTNAME.get(conf); + + if (endpoint == null) { + throw new InterruptedException(GIRAPH_REXSTER_HOSTNAME.getKey() + + " is a mandatory "); + } + + final boolean isSsl = GIRAPH_REXSTER_USES_SSL.get(conf); + final int port = GIRAPH_REXSTER_PORT.get(conf); + final String graph = GIRAPH_REXSTER_GRAPH.get(conf); + + + try { + URL url; + /*final String url;*/ + final String auth; + final String username; + final String password; + final HttpURLConnection connection; + final InputStream is; + final InputStreamReader isr; + + if (gremlinScript != null && !gremlinScript.isEmpty()) { + url = new URL(isSsl ? "https" : "http", + endpoint, port, + String.format(uriScriptFormat, graph, gremlinScript, + start, end)); + } else { + url = new URL(isSsl ? "https" : "http", + endpoint, port, + String.format(uriFormat, graph, urlSuffix, start, end)); + } + + LOG.info(url); + + username = GIRAPH_REXSTER_USERNAME.get(conf); + password = GIRAPH_REXSTER_PASSWORD.get(conf); + byte[] authBytes = (username + ":" + password).getBytes( + Charset.defaultCharset()); + auth = "Basic " + Base64.encodeBase64URLSafeString(authBytes); + + connection = createConnection(url, auth); + connection.setDoOutput(true); + is = connection.getInputStream(); + isr = new InputStreamReader(is, Charset.defaultCharset()); + + return new BufferedReader(isr); + + } catch (Exception e) { + throw new RuntimeException(e.getMessage(), e); + } + } + // CHECKSTYLE: resume IllegalCatch + + /** + * Creates a new HTTP connection to the specified server. + * + * @param url URI to connec to + * @param authValue authetication value if available + * @return a new HTTP connection + */ + private static HttpURLConnection createConnection(final URL url, + final String authValue) throws Exception { + + final HttpURLConnection connection = + (HttpURLConnection) url.openConnection(); + + connection.setConnectTimeout(0); + connection.setReadTimeout(0); + connection.setRequestMethod("GET"); + connection.setRequestProperty("Authorization", authValue); + connection.setDoOutput(true); + + return connection; + } + + /** + * Specific Rexster utility functions for vertices + */ + public static class Vertex { + /** + * Empty private constructor. This class should not be instantiated. + */ + private Vertex() { /* private constructor */ } + + /** + * Opens an HTTP connection to the specified Rexster server for vertices. + * + * @param conf giraph configuration + * @param start start index of the Rexster page split + * @param end end index of the Rexster page split + * @return BufferedReader the object used to retrieve the HTTP response + */ + public static BufferedReader openRexsterStream( + ImmutableClassesGiraphConfiguration conf, long start, long end) + throws InterruptedException { + + String gremlinScript = null; + + gremlinScript = GIRAPH_REXSTER_GREMLIN_V_SCRIPT.get(conf); + return RexsterUtils.openRexsterStream(conf, start, end, "vertices", + gremlinScript); + } + } + + /** + * Specific Rexster utility functions for edges + */ + public static class Edge { + /** + * Empty private constructor. This class should not be instantiated. + */ + private Edge() { /* private constructor */ } + + /** + * Opens an HTTP connection to the specified Rexster server for edges. + * + * @param conf giraph configuration + * @param start start index of the Rexster page split + * @param end end index of the Rexster page split + * @return BufferedReader the object used to retrieve the HTTP response + */ + public static BufferedReader openRexsterStream( + ImmutableClassesGiraphConfiguration conf, long start, long end) + throws InterruptedException { + + String gremlinScript = null; + gremlinScript = GIRAPH_REXSTER_GREMLIN_E_SCRIPT.get(conf); + + return RexsterUtils.openRexsterStream(conf, start, end, "edges", + gremlinScript); + } + } +} diff --git a/giraph-rexster/src/main/java/org/apache/giraph/rexster/utils/package-info.java b/giraph-rexster/src/main/java/org/apache/giraph/rexster/utils/package-info.java new file mode 100644 index 000000000..3f6810fb1 --- /dev/null +++ b/giraph-rexster/src/main/java/org/apache/giraph/rexster/utils/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Package of reusable utils for Rexster in Giraph. + */ +package org.apache.giraph.rexster.utils; diff --git a/giraph-rexster/src/test/java/org/apache/giraph/rexster/io/formats/TestAbstractRexsterInputFormat.java b/giraph-rexster/src/test/java/org/apache/giraph/rexster/io/formats/TestAbstractRexsterInputFormat.java new file mode 100644 index 000000000..09b27d3bd --- /dev/null +++ b/giraph-rexster/src/test/java/org/apache/giraph/rexster/io/formats/TestAbstractRexsterInputFormat.java @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.giraph.rexster.io.formats; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.util.HashSet; +import java.util.List; + +import org.apache.commons.configuration.HierarchicalConfiguration; +import org.apache.commons.configuration.XMLConfiguration; +import org.apache.commons.io.FileUtils; +import org.junit.After; +import org.junit.Before; + +import com.tinkerpop.blueprints.Graph; +import com.tinkerpop.blueprints.impls.tg.TinkerGraph; +import com.tinkerpop.blueprints.util.io.graphson.GraphSONReader; +import com.tinkerpop.rexster.Tokens; +import com.tinkerpop.rexster.protocol.EngineController; +import com.tinkerpop.rexster.server.HttpRexsterServer; +import com.tinkerpop.rexster.server.RexsterApplication; +import com.tinkerpop.rexster.server.RexsterServer; +import com.tinkerpop.rexster.server.XmlRexsterApplication; + +public abstract class TestAbstractRexsterInputFormat { + /** temporary directory */ + private final String TMP_DIR = "/tmp/"; + /** input JSON extension */ + private final String INPUT_JSON_EXT = ".input.json"; + /** output JSON extension */ + protected final String OUTPUT_JSON_EXT = ".output.json"; + /** rexster XML configuration file */ + private final String REXSTER_CONF = "rexster.xml"; + /** string databases */ + protected final String DATABASES[] = + { + "empty-db", + "test-db" + }; + /** Rexster server instance */ + protected RexsterServer server; + + @Before + @SuppressWarnings("unchecked") + public void setUp() throws Exception { + final XMLConfiguration properties = new XMLConfiguration(); + final RexsterApplication application; + final List graphConfigs; + final InputStream rexsterConf; + final int scriptEngineThreshold; + final String scriptEngineInitFile; + final List scriptEngineNames; + + /* prepare all databases */ + for (int i = 0; i < DATABASES.length; ++i) { + prepareDb(DATABASES[i]); + } + + /* start the Rexster HTTP server using the prepared rexster configuration */ + rexsterConf = + this.getClass().getResourceAsStream(REXSTER_CONF); + properties.load(rexsterConf); + rexsterConf.close(); + + graphConfigs = properties.configurationsAt(Tokens.REXSTER_GRAPH_PATH); + application = new XmlRexsterApplication(graphConfigs); + this.server = new HttpRexsterServer(properties); + + scriptEngineThreshold = + properties.getInt("script-engine-reset-threshold", + EngineController.RESET_NEVER); + scriptEngineInitFile = properties.getString("script-engine-init", ""); + + /* allow scriptengines to be configured so that folks can drop in + different gremlin flavors. */ + scriptEngineNames = properties.getList("script-engines"); + + if (scriptEngineNames == null) { + // configure to default with gremlin-groovy + EngineController.configure(scriptEngineThreshold, scriptEngineInitFile); + } else { + EngineController.configure(scriptEngineThreshold, scriptEngineInitFile, + new HashSet(scriptEngineNames)); + } + + this.server.start(application); + } + + @After + public void tearDown() throws IOException { + for (int i = 0; i < DATABASES.length; ++i) { + FileUtils.deleteDirectory(new File(TMP_DIR + DATABASES[i])); + } + + try { + this.server.stop(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + private void prepareDb(String dbName) throws IOException { + final InputStream db; + final Graph tinkergraph; + + db = this.getClass().getResourceAsStream(dbName + INPUT_JSON_EXT); + tinkergraph = new TinkerGraph(TMP_DIR + dbName); + GraphSONReader.inputGraph(tinkergraph, db); + tinkergraph.shutdown(); + } +} diff --git a/giraph-rexster/src/test/java/org/apache/giraph/rexster/io/formats/TestRexsterLongDoubleFloatVertexInputFormat.java b/giraph-rexster/src/test/java/org/apache/giraph/rexster/io/formats/TestRexsterLongDoubleFloatVertexInputFormat.java new file mode 100644 index 000000000..a68db677e --- /dev/null +++ b/giraph-rexster/src/test/java/org/apache/giraph/rexster/io/formats/TestRexsterLongDoubleFloatVertexInputFormat.java @@ -0,0 +1,155 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.giraph.rexster.io.formats; + +import static org.apache.giraph.rexster.conf.GiraphRexsterConstants.GIRAPH_REXSTER_GREMLIN_E_SCRIPT; +import static org.apache.giraph.rexster.conf.GiraphRexsterConstants.GIRAPH_REXSTER_GREMLIN_V_SCRIPT; +import static org.apache.giraph.rexster.conf.GiraphRexsterConstants.GIRAPH_REXSTER_HOSTNAME; +import static org.apache.giraph.rexster.conf.GiraphRexsterConstants.GIRAPH_REXSTER_PORT; +import static org.apache.giraph.rexster.conf.GiraphRexsterConstants.GIRAPH_REXSTER_GRAPH; + +import java.io.File; +import java.io.IOException; +import java.net.URL; +import java.util.Iterator; + +import org.apache.giraph.conf.GiraphConfiguration; +import org.apache.giraph.graph.BasicComputation; +import org.apache.giraph.graph.Vertex; +import org.apache.giraph.io.formats.JsonLongDoubleFloatDoubleVertexOutputFormat; +import org.apache.giraph.utils.InternalVertexRunner; +import org.apache.hadoop.io.DoubleWritable; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.LongWritable; +import org.junit.Assert; +import org.junit.Test; + +import com.google.common.base.Charsets; +import com.google.common.io.Files; + +public class TestRexsterLongDoubleFloatVertexInputFormat + extends TestAbstractRexsterInputFormat { + + @Test + public void getEmptyDb() throws Exception { + Iterable results; + GiraphConfiguration conf = new GiraphConfiguration(); + final String dbName = super.DATABASES[0]; + Iterator result; + + GIRAPH_REXSTER_HOSTNAME.set(conf, "127.0.0.1"); + GIRAPH_REXSTER_PORT.set(conf, 18182); + GIRAPH_REXSTER_GRAPH.set(conf, dbName); + conf.setComputationClass(EmptyComputation.class); + conf.setVertexInputFormatClass(RexsterLongDoubleFloatVertexInputFormat.class); + conf.setEdgeInputFormatClass(RexsterLongFloatEdgeInputFormat.class); + conf.setVertexOutputFormatClass(JsonLongDoubleFloatDoubleVertexOutputFormat.class); + results = InternalVertexRunner.run(conf, new String[0], new String[0]); + Assert.assertNotNull(results); + + result = results.iterator(); + Assert.assertFalse(result.hasNext()); + } + + @Test + public void getTestDb() throws Exception { + Iterable results; + GiraphConfiguration conf = new GiraphConfiguration(); + final String dbName = super.DATABASES[1]; + Iterator result; + Iterator expected; + final File expectedFile; + final URL expectedFileUrl; + + GIRAPH_REXSTER_HOSTNAME.set(conf, "127.0.0.1"); + GIRAPH_REXSTER_PORT.set(conf, 18182); + GIRAPH_REXSTER_GRAPH.set(conf, dbName); + conf.setComputationClass(EmptyComputation.class); + conf.setVertexInputFormatClass(RexsterLongDoubleFloatVertexInputFormat.class); + conf.setEdgeInputFormatClass(RexsterLongFloatEdgeInputFormat.class); + conf.setVertexOutputFormatClass(JsonLongDoubleFloatDoubleVertexOutputFormat.class); + + results = InternalVertexRunner.run(conf, new String[0], new String[0]); + Assert.assertNotNull(results); + + expectedFileUrl = + this.getClass().getResource(dbName + super.OUTPUT_JSON_EXT); + expectedFile = new File(expectedFileUrl.toURI()); + expected = Files.readLines(expectedFile, Charsets.UTF_8).iterator(); + result = results.iterator(); + + while(expected.hasNext() && result.hasNext()) { + String resultLine = (String) result.next(); + String expectedLine = (String) expected.next(); + + Assert.assertTrue(expectedLine.equals(resultLine)); + } + } + + @Test + public void getGremlinDb() throws Exception { + Iterable results; + GiraphConfiguration conf = new GiraphConfiguration(); + final String dbName = super.DATABASES[1]; + Iterator result; + Iterator expected; + final File expectedFile; + final URL expectedFileUrl; + + GIRAPH_REXSTER_HOSTNAME.set(conf, "127.0.0.1"); + GIRAPH_REXSTER_PORT.set(conf, 18182); + GIRAPH_REXSTER_GRAPH.set(conf, dbName); + GIRAPH_REXSTER_GREMLIN_V_SCRIPT.set(conf, "g.V"); + GIRAPH_REXSTER_GREMLIN_E_SCRIPT.set(conf, "g.E"); + conf.setComputationClass(EmptyComputation.class); + conf.setVertexInputFormatClass(RexsterLongDoubleFloatVertexInputFormat.class); + conf.setEdgeInputFormatClass(RexsterLongFloatEdgeInputFormat.class); + conf.setVertexOutputFormatClass(JsonLongDoubleFloatDoubleVertexOutputFormat.class); + + results = InternalVertexRunner.run(conf, new String[0], new String[0]); + Assert.assertNotNull(results); + + expectedFileUrl = + this.getClass().getResource(dbName + super.OUTPUT_JSON_EXT); + expectedFile = new File(expectedFileUrl.toURI()); + expected = Files.readLines(expectedFile, Charsets.UTF_8).iterator(); + result = results.iterator(); + + while(expected.hasNext() && result.hasNext()) { + String resultLine = (String) result.next(); + String expectedLine = (String) expected.next(); + + Assert.assertTrue(expectedLine.equals(resultLine)); + } + } + + /* + Test compute method that sends each edge a notification of its parents. + The test set only has a 1-1 parent-to-child ratio for this unit test. + */ + public static class EmptyComputation + extends BasicComputation { + + @Override + public void compute(Vertex vertex, + Iterable messages) throws IOException { + vertex.voteToHalt(); + } + } +} diff --git a/giraph-rexster/src/test/resources/org/apache/giraph/rexster/io/formats/empty-db.input.json b/giraph-rexster/src/test/resources/org/apache/giraph/rexster/io/formats/empty-db.input.json new file mode 100644 index 000000000..66d6c5d46 --- /dev/null +++ b/giraph-rexster/src/test/resources/org/apache/giraph/rexster/io/formats/empty-db.input.json @@ -0,0 +1,8 @@ +{ + "graph": + { + "mode": "NORMAL", + "vertices": [], + "edges": [] + } +} diff --git a/giraph-rexster/src/test/resources/org/apache/giraph/rexster/io/formats/rexster.xml b/giraph-rexster/src/test/resources/org/apache/giraph/rexster/io/formats/rexster.xml new file mode 100644 index 000000000..e7de484e0 --- /dev/null +++ b/giraph-rexster/src/test/resources/org/apache/giraph/rexster/io/formats/rexster.xml @@ -0,0 +1,52 @@ + + + + 18182 + 127.0.0.1 + http://127.0.0.1 + public + UTF-8 + false + false + + + 8 + 8 + + + 4 + 4 + + + leader-follower + + -1 + data/init.groovy + gremlin-groovy + + + empty-db + tinkergraph + /tmp/empty-db + + + test-db + tinkergraph + /tmp/test-db + + + tp:gremlin + + + tp + gremlin + + script-directory + true + true + + + + + + diff --git a/giraph-rexster/src/test/resources/org/apache/giraph/rexster/io/formats/test-db.input.json b/giraph-rexster/src/test/resources/org/apache/giraph/rexster/io/formats/test-db.input.json new file mode 100644 index 000000000..955af9172 --- /dev/null +++ b/giraph-rexster/src/test/resources/org/apache/giraph/rexster/io/formats/test-db.input.json @@ -0,0 +1,126 @@ +{ + "graph": + { + "mode": "NORMAL", + "vertices": [ + { + "_id": 1, + "_type": "vertex" + }, + { + "_id": 2, + "_type":"vertex" + }, + { + "_id": 3, + "_type":"vertex" + }, + { + "_id": 4, + "_type":"vertex" + }, + { + "_id": 5, + "_type":"vertex" + } + ], + "edges": [ + { + "weight": 1, + "_id": 0, + "_type": "edge", + "_outV": 1, + "_inV": 2, + "_label": "_default" + }, + { + "weight": 3, + "_id": 1, + "_type": "edge", + "_outV": 1, + "_inV": 4, + "_label": "_default" + }, + { + "weight": 1, + "_id": 2, + "_type": "edge", + "_outV": 2, + "_inV": 1, + "_label": "_default" + }, + { + "weight": 2, + "_id": 3, + "_type": "edge", + "_outV": 2, + "_inV": 3, + "_label": "_default" + }, + { + "weight": 1, + "_id": 4, + "_type": "edge", + "_outV": 2, + "_inV": 4, + "_label": "_default" + }, + { + "weight": 2, + "_id": 5, + "_type": "edge", + "_outV": 3, + "_inV": 2, + "_label": "_default" + }, + { + "weight": 4, + "_id": 6, + "_type": "edge", + "_outV": 3, + "_inV": 5, + "_label": "_default" + }, + { + "weight": 3, + "_id": 7, + "_type": "edge", + "_outV": 4, + "_inV": 1, + "_label": "_default" + }, + { + "weight": 1, + "_id": 8, + "_type": "edge", + "_outV": 4, + "_inV": 2, + "_label": "_default" + }, + { + "weight": 4, + "_id": 9, + "_type": "edge", + "_outV": 4, + "_inV": 5, + "_label": "_default" + }, + { + "weight": 4, + "_id": 10, + "_type": "edge", + "_outV": 5, + "_inV": 4, + "_label": "_default" + }, + { + "weight": 4, + "_id": 11, + "_type": "edge", + "_outV": 5, + "_inV": 3, + "_label": "_default" + } + ] + } +} diff --git a/giraph-rexster/src/test/resources/org/apache/giraph/rexster/io/formats/test-db.output.json b/giraph-rexster/src/test/resources/org/apache/giraph/rexster/io/formats/test-db.output.json new file mode 100644 index 000000000..f1198dff3 --- /dev/null +++ b/giraph-rexster/src/test/resources/org/apache/giraph/rexster/io/formats/test-db.output.json @@ -0,0 +1,5 @@ +[5,0,[[3,4],[4,4]]] +[2,0,[[1,1],[3,2],[4,1]]] +[1,0,[[2,1],[4,3]]] +[3,0,[[2,2]]] +[4,0,[[1,3],[5,4],[2,1]]] diff --git a/pom.xml b/pom.xml index 75de7d688..ee99a99a0 100644 --- a/pom.xml +++ b/pom.xml @@ -576,6 +576,8 @@ under the License. giraph-accumulo giraph-hbase giraph-hcatalog + giraph-gora + giraph-rexster true