-
Notifications
You must be signed in to change notification settings - Fork 74
/
Copy pathJavaBean.java
97 lines (79 loc) · 2.62 KB
/
JavaBean.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
package dataset;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoder;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import java.io.Serializable;
import java.util.Arrays;
import java.util.List;
import static org.apache.spark.sql.functions.col;
//
// Create a Spark Dataset from an array of JavaBean instances.
// The inferred schema has convenient column names and it can
// be queried conveniently.
//
public class JavaBean {
//
// This must be a JavaBean in order for Spark to infer a schema for it
//
public static class Number implements Serializable {
private int i;
private String english;
private String french;
public Number(int i, String english, String french) {
this.i = i;
this.english = english;
this.french = french;
}
public int getI() {
return i;
}
public void setI(int i) {
this.i = i;
}
public String getEnglish() {
return english;
}
public void setEnglish(String english) {
this.english = english;
}
public String getFrench() {
return french;
}
public void setFrench(String french) {
this.french = french;
}
}
public static void main(String[] args) {
SparkSession spark = SparkSession
.builder()
.appName("Dataset-JavaBean")
.master("local[4]")
.getOrCreate();
//
// The Java API requires you to explicitly instantiate an encoder for
// any JavaBean you want to use for schema inference
//
Encoder<Number> numberEncoder = Encoders.bean(Number.class);
//
// Create a container of the JavaBean instances
//
List<Number> data = Arrays.asList(
new Number(1, "one", "un"),
new Number(2, "two", "deux"),
new Number(3, "three", "trois"));
//
// Use the encoder and the container of JavaBean instances to create a
// Dataset
//
Dataset<Number> ds = spark.createDataset(data, numberEncoder);
System.out.println("*** here is the schema inferred from the bean");
ds.printSchema();
System.out.println("*** here is the data");
ds.show();
// Use the convenient bean-inferred column names to query
System.out.println("*** filter by one column and fetch others");
ds.where(col("i").gt(2)).select(col("english"), col("french")).show();
spark.stop();
}
}