Skip to content

Commit

Permalink
Merge pull request #14 from Shiti/develop
Browse files Browse the repository at this point in the history
fixed errors in timestamp support and added tests
  • Loading branch information
helena committed Mar 21, 2016
2 parents 0fb7974 + 05f6646 commit 6f8eb26
Show file tree
Hide file tree
Showing 5 changed files with 89 additions and 12 deletions.
2 changes: 1 addition & 1 deletion build.sbt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name := "kafka-connect-cassandra"

version := "0.0.4"
version := "0.0.5"

crossScalaVersions := Seq("2.11.7", "2.10.6")

Expand Down
8 changes: 8 additions & 0 deletions src/it/resources/setup.cql
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,11 @@ CREATE TABLE githubstats.monthly_commits (
year INT,
PRIMARY KEY ((user), year, month)
) WITH CLUSTERING ORDER BY (year DESC, month DESC);

CREATE TABLE IF NOT EXISTS test.event_store(
app_id text,
event_type text,
subscription_type text,
event_ts timestamp,
PRIMARY KEY((app_id, event_type), event_ts)
);
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,12 @@

package com.tuplejump.kafka.connect.cassandra

import scala.collection.JavaConverters._
import java.util.concurrent.TimeUnit

import org.apache.kafka.connect.source.SourceTaskContext

import scala.collection.JavaConverters._

class CassandraSourceTaskSpec extends AbstractFlatSpec {

val query = "SELECT * FROM test.playlists"
Expand All @@ -39,7 +42,7 @@ class CassandraSourceTaskSpec extends AbstractFlatSpec {
sourceTask.stop()
}

it should "fetch records from cassandra" in {
it should "fetch records from cassandra in bulk" in {
val sourceTask = new CassandraSourceTask()
val mockContext = mock[SourceTaskContext]

Expand All @@ -53,4 +56,64 @@ class CassandraSourceTaskSpec extends AbstractFlatSpec {
sourceTask.stop()
}

def insertStmt(time: Long): String = {
"INSERT INTO test.event_store(app_id,event_type,subscription_type,event_ts) " +
s"VALUES ('website','renewal','annual',$time)"
}

it should "fetch only new records from cassandra" in {
val timeBasedQuery =
"""SELECT * FROM test.event_store WHERE app_id='website' AND event_type='renewal'
| AND event_ts >= previousTime()""".stripMargin

val topic = "events"
val cassandraSourceConfig = sourceConfig(timeBasedQuery, topic)

val sourceTask = new CassandraSourceTask()
val mockContext = mock[SourceTaskContext]

sourceTask.initialize(mockContext)
sourceTask.start(cassandraSourceConfig.asJava)

val oneHrAgo = System.currentTimeMillis() - TimeUnit.HOURS.toMillis(1)
sourceTask.session.execute(insertStmt(oneHrAgo))

sourceTask.poll().size() should be(0)

val oneHrLater = System.currentTimeMillis() + TimeUnit.HOURS.toMillis(1)
sourceTask.session.execute(insertStmt(oneHrLater))

val result = sourceTask.poll()

result.size() should be(1)

sourceTask.stop()
}

it should "fetch records from cassandra in given pollInterval" in {
val timeBasedQuery =
"""SELECT * FROM test.event_store WHERE app_id='website' AND event_type='renewal'
| AND event_ts >= previousTime() AND event_ts <= currentTime()""".stripMargin

val topic = "events"
val cassandraSourceConfig = sourceConfig(timeBasedQuery, topic)

val sourceTask = new CassandraSourceTask()
val mockContext = mock[SourceTaskContext]

sourceTask.initialize(mockContext)
sourceTask.start(cassandraSourceConfig.asJava)

val oneHrLater = System.currentTimeMillis() + TimeUnit.HOURS.toMillis(1)
sourceTask.session.execute(insertStmt(oneHrLater))

val fewSecLater = System.currentTimeMillis() + TimeUnit.SECONDS.toMillis(2)
sourceTask.session.execute(insertStmt(fewSecLater))

val result = sourceTask.poll()

result.size() should be(1)

sourceTask.stop()
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ class CassandraSourceTask extends SourceTask with TaskLifecycle {
//TODO remove https://github.com/tuplejump/kafka-connector/issues/9
Thread.sleep(source.pollInterval)

source slide timestamp
read(source)
val updatedSource = source slide timestamp
read(updatedSource)
case source =>
read(source)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import Configuration._
*
* TODO CassandraConnectionConfig
*/
private[kafka] final class Configuration private(val config: immutable.Map[String,String],
private[kafka] final class Configuration private(val config: immutable.Map[String, String],
val source: Option[SourceConfig],
val sink: immutable.List[SinkConfig]) {

Expand Down Expand Up @@ -70,8 +70,12 @@ object Configuration {

final val PreviousTime = "previousTime()"

final val PreviousTimeRegex = "previousTime\\(\\)"

final val CurrentTime = "currentTime()"

final val CurrentTimeRegex = "currentTime\\(\\)"

val Empty = new Configuration(Map.empty, SourceConfig.Empty, Nil)

/** Returns a new [[com.tuplejump.kafka.connect.cassandra.Configuration]]. */
Expand Down Expand Up @@ -127,8 +131,9 @@ object Configuration {
def slide(now: Long): SourceConfig =
if (timeseries) {
val timestamp = now - pollInterval
copy(query = query.replaceAll(PreviousTime, s"$timestamp")
.replaceAll(CurrentTime, s"$now"))
val updatedQuery: Query = query.replaceAll(PreviousTimeRegex, s"$timestamp")
.replaceAll(CurrentTimeRegex, s"$now")
copy(query = updatedQuery)
} else this

def timeseries: Boolean =
Expand All @@ -140,20 +145,20 @@ object Configuration {

val Empty = SourceConfig(Map.empty)

def apply(config: Map[String,String]): Option[SourceConfig] =
def apply(config: Map[String, String]): Option[SourceConfig] =
for {
topic <- get(config, TopicKey)
query <- get(config, QueryKey)
} yield SourceConfig(topic, query, pollInterval(config), None, None)

private def pollInterval(config: Map[String,String]): Long =
private def pollInterval(config: Map[String, String]): Long =
get(config, PollInterval).map(_.toLong).getOrElse(DefaultPollInterval)
}

/** A Kafka [[CassandraSink]] and [[CassandraSinkTask]] configuration.
* INTERNAL API.
*
* @param topic the kafka `topic` name
* @param topic the kafka `topic` name
* @param namespace the cassandra `keyspace.table`
*/
private[kafka] final case class SinkConfig(val topic: TopicName,
Expand Down Expand Up @@ -218,7 +223,7 @@ object Configuration {
val ConsistencyLevelKey = "cassandra.output.consistency.level"
val DefaultConsistencyLevel = ConsistencyLevel.LOCAL_QUORUM

/** The maximum total size of the batch in bytes. Overridden by BatchSizeRowsParam. */
/** The maximum total size of the batch in bytes. Overridden by BatchSizeRowsParam. */
val BatchSizeBytesKey = "cassandra.output.batch.size.bytes"
val BatchBufferSize = 1024

Expand All @@ -227,4 +232,5 @@ object Configuration {
val DefaultParallelismLevel = "5"

}

}

0 comments on commit 6f8eb26

Please sign in to comment.