Skip to content

Commit

Permalink
Merge pull request #13 from kren1/sql_setup
Browse files Browse the repository at this point in the history
Sql to weka
  • Loading branch information
coopie committed Nov 3, 2015
2 parents 232c3ba + a6473c7 commit e16e913
Show file tree
Hide file tree
Showing 18 changed files with 739 additions and 2 deletions.
4 changes: 3 additions & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@ repositories {

dependencies {
compile group: 'nz.ac.waikato.cms.weka', name:'weka-stable', version: '3.6.13'
compile group: 'org.postgresql', name:'postgresql', version: '9.4-1204-jdbc42'
testCompile group: 'junit', name: 'junit', version: '4.11'

testCompile group: 'com.google.guava', name: 'guava', version: '15.0'
testCompile group: 'org.jmock', name: 'jmock-junit4', version: '2.8.1'
}

checkstyle {
Expand Down
3 changes: 3 additions & 0 deletions sql/db_drop.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
DROP TABLE data;
DROP TABLE batch;
DROP TABLE feature;
15 changes: 15 additions & 0 deletions sql/db_sample_data.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
INSERT INTO batch (added,description) VALUES (now(),'test');
INSERT INTO feature(name) VALUES
('f1'),
('f2'),
('f3')
;
INSERT INTO data (subjectID, featureID, batchID, featureValue,timeslice) VALUES
(1,1,1,60,0),
(1,2,1,2.22453,0),
(1,3,1,0.36453,0),
(2,3,1,5.32471,0),
(2,2,1,4.72453,0),
(2,1,1,70,0),
(3,3,1,1.22451,0)
;
26 changes: 26 additions & 0 deletions sql/db_setup.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
CREATE TABLE feature
(
featureID serial PRIMARY KEY,
name varchar(10) not null
);

CREATE TABLE batch
(
batchID serial PRIMARY KEY,
added date not null,
description text not null
);

CREATE TABLE data
(
subjectID bigserial,
featureID int REFERENCES feature,
batchID int REFERENCES batch,
featureValue numeric not null,
timeslice int not null,
PRIMARY KEY (subjectID, featureID, batchID, timeslice)

);

-- makes sure BPM always be feature with id 1
INSERT INTO feature(name) VALUES ('BPM') ;
17 changes: 17 additions & 0 deletions src/main/java/database/DataInstancesBuilder.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package database;

import weka.core.Instances;

import java.sql.Connection;
import java.sql.Statement;

/**
* Created by tk1713 on 02/11/15.
* @param <T_batch> type of the batch selection input
* @param <T_subject> type of the subject selection input
* @param <T_feature> type of the feature selection input
*/
public interface DataInstancesBuilder<T_feature, T_subject, T_batch> {
DataInstancesBuilder<T_feature, T_subject, T_batch> withFeature(T_feature feature);
<T> DatabaseDataset<T> build(Connection connection);
}
48 changes: 48 additions & 0 deletions src/main/java/database/DatabaseConnection.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package database;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.util.Map;

/**
* Created by Timotej on 31-Oct-15.
*/
public class DatabaseConnection {
/*
Gets the connection to our database. The information about the database is
stored in enviroment variables for 2 reasons. Firstly to not commit that data
to git and secondly so we can potentialy easily set up a database for travis.
It is easy to push encrypted enviroment variables to Travis.
*/
public static Connection getDatabaseConnection() {
Connection connection;
Map<String, String> env = System.getenv();
if (!env.containsKey("PGHOST") || !env.containsKey("PGPORT") ||
!env.containsKey("PGDATABASE") || !env.containsKey("PGUSER") ||
!env.containsKey("PGPASS")) {
throw new RuntimeException("Cannot get db data from enviroment variables PGHOST, " +
"PGPORT, PGDATABASE, PGUSER, PGPASS");
}

final String host = env.get("PGHOST");
final String port = env.get("PGPORT");
final String database = env.get("PGDATABASE");
final String username = env.get("PGUSER");
final String password = env.get("PGPASS");
try {
Class.forName("org.postgresql.Driver");
connection = DriverManager
.getConnection("jdbc:postgresql://" + host + ":" + port + "/" + database +
//need this to fix SSL error
"?ssl=true&sslfactory=org.postgresql.ssl.NonValidatingFactory",
username, password);
} catch (SQLException e) {
throw new RuntimeException("Cannot connect to your database " + database +
" user: " + username);
} catch (ClassNotFoundException e) {
throw new RuntimeException("JDBC not setup, build with gradle");
}
return connection;
}
}
10 changes: 10 additions & 0 deletions src/main/java/database/DatabaseDataset.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package database;

/**
* Created by Timotej on 02-Nov-15.
* @param <T> return type in which this dataset should be returned
*/
public interface DatabaseDataset<T> {
public T getBatch(int batchID, int timeslice);
public T getBatchForSubject(int subjectID, int batchID, int timeslice);
}
106 changes: 106 additions & 0 deletions src/main/java/database/DatabaseInstancesFetcher.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
package database;

import weka.core.Attribute;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;

import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Map;

/**
* Created by Timotej on 02-Nov-15.
*/
public class DatabaseInstancesFetcher implements DatabaseDataset<Instances>{
private final PreparedStatement stmt;
private final FastVector attributes;
private final Attribute subjectIdAttribute;
//Maps feautreID to attributes
private final Map<Integer, Attribute> attributeMap;
//Maps feautreID to the alias of the relation from where it's from
private final Map<Integer, String> features;

/*
Should not be constructed on it's own. Use DataInstancesBuilder
*/
DatabaseInstancesFetcher(PreparedStatement stmt,
FastVector attributes,
Attribute subjectIdAttribute,
Map<Integer, Attribute> attributeMap,
Map<Integer, String> features) {
this.stmt = stmt;
this.attributes = attributes;
this.subjectIdAttribute = subjectIdAttribute;
this.attributeMap = attributeMap;
this.features = features;
}


public Instances getBatch(int batchID, int timeslice){
try {
stmt.setInt(1, batchID);
//don't care about that as we want all of them
stmt.setInt(2, 2342);
//we want all the subjects
stmt.setBoolean(3, true);
stmt.setInt(4, timeslice);
ResultSet rs = stmt.executeQuery();
return resultSetToInstances(rs);

} catch (SQLException e) {
e.printStackTrace();
throw new RuntimeException("SQL failed to fetch data");
}
}

public Instances getBatchForSubject(int subjectID, int batchID, int timeslice) {
try {
stmt.setInt(1, batchID);
//don't care about that as we want all of them
stmt.setInt(2, subjectID);
//we want all the subjects
stmt.setBoolean(3, false);
stmt.setInt(4, timeslice);
ResultSet rs = stmt.executeQuery();
return resultSetToInstances(rs);

} catch (SQLException e) {
e.printStackTrace();
throw new RuntimeException("SQL failed to fetch data");
}
}

public void close() {
try {
stmt.close();
} catch (SQLException e) {
e.printStackTrace();
}
}

private Instances resultSetToInstances(ResultSet rs) throws SQLException {
Instances dataset;
dataset = new Instances("data", attributes, 10);
Instance inst;
int numFeatures = attributes.size();
while (rs.next()) {
inst = new Instance(numFeatures);
inst.setValue(subjectIdAttribute, rs.getInt("subjectID"));
for (Map.Entry<Integer, String> feature : features.entrySet()) {

inst.setValue(attributeMap.get(feature.getKey()),
rs.getDouble(feature.getValue() + "fv"));
}

dataset.add(inst);

}
//if BPM (featureID == 1) is one of the attributes, set is as the class
if (attributeMap.keySet().contains(1)) {
dataset.setClass(attributeMap.get(1));
}
return dataset;
}
}
44 changes: 44 additions & 0 deletions src/main/java/database/NameDataInstancesBuilder.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package database;

import weka.core.Instances;

import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.HashSet;

/**
* Created by tk1713 on 02/11/15.
*/
public class NameDataInstancesBuilder implements DataInstancesBuilder<String, Integer, Integer> {

private final DataInstancesBuilder<Integer, Integer, Integer> builder;
private final HashSet<String> features = new HashSet<String>();

public NameDataInstancesBuilder(DataInstancesBuilder<Integer, Integer, Integer> builder) {
this.builder = builder;
}
public NameDataInstancesBuilder withFeature(String featureName) {
features.add(featureName);
return this;
}

public DatabaseDataset<Instances> build(Connection conn) {
try {
PreparedStatement stmt = conn.prepareStatement("SELECT featureID, name FROM feature");
ResultSet rs = stmt.executeQuery();
while (rs.next()) {
if (features.contains(rs.getString("name"))) {
builder.withFeature(rs.getInt("featureID"));
}
}
stmt.close();
} catch (SQLException e) {
e.printStackTrace();
throw new RuntimeException("SQL failed to fetch data");
}

return builder.build(conn);
}
}
Loading

0 comments on commit e16e913

Please sign in to comment.