Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Processing #105

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions gradle/libs.versions.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[versions]
kotlin = "1.8.10"
kotlin = "1.9.22"
intellij = "1.16.1"
detekt = "1.22.0"
buildconfig = "3.1.0"
Expand All @@ -20,6 +20,7 @@ postgres = "42.3.1"
logback = "1.4.11"
exposed = "0.44.0"
h2 = "2.2.224"
dataframe = "0.13.1"

[libraries]
detekt-formatting = { module = "io.gitlab.arturbosch.detekt:detekt-formatting", version.ref = "detekt" }
Expand All @@ -39,6 +40,7 @@ ktor-serialization-kotlinx-json = { module = "io.ktor:ktor-serialization-kotlinx
ktor-server-core = { module = "io.ktor:ktor-server-core-jvm", version.ref = "ktor" }
ktor-server-netty = { module = "io.ktor:ktor-server-netty-jvm", version.ref = "ktor" }
ktor-server-tests = { module = "io.ktor:ktor-server-tests-jvm", version.ref = "ktor" }
dataframe = {module = "org.jetbrains.kotlinx:dataframe", version.ref = "dataframe"}

slf4j = { module = "org.slf4j:slf4j-simple", version.ref = "slf4j" }
postgres = { module = "org.postgresql:postgresql", version.ref = "postgres" }
Expand All @@ -57,4 +59,5 @@ kotlin-jvm = { id = "org.jetbrains.kotlin.jvm", version.ref = "kotlin" }
buildconfig = { id = "com.github.gmazzo.buildconfig", version.ref = "buildconfig" }
serialization = { id = "org.jetbrains.kotlin.plugin.serialization", version.ref = "serialization" }
changelog = { id = "org.jetbrains.changelog", version.ref = "changelog" }
ktor = { id = "io.ktor.plugin", version.ref = "ktor" }
ktor = { id = "io.ktor.plugin", version.ref = "ktor" }
dataframe = {id = "org.jetbrains.kotlinx.dataframe", version.ref = "dataframe"}
4 changes: 2 additions & 2 deletions gradle/wrapper/gradle-wrapper.properties
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#Tue May 14 02:37:11 CEST 2024
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-8.0.2-bin.zip
networkTimeout=10000
distributionUrl=https\://services.gradle.org/distributions/gradle-8.5-bin.zip
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
30 changes: 30 additions & 0 deletions processing/build.gradle.kts
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import io.gitlab.arturbosch.detekt.Detekt

group = rootProject.group
version = rootProject.version

@Suppress("DSL_SCOPE_VIOLATION")
plugins {
id(libs.plugins.dataframe.get().pluginId) version libs.versions.dataframe.get()
}


val jdkVersion = libs.versions.jdk17.get()

dependencies {
implementation(rootProject.libs.dataframe)
}

tasks{
withType<JavaCompile> {
sourceCompatibility = jdkVersion
targetCompatibility = JavaVersion.VERSION_17.toString()
}
withType<org.jetbrains.kotlin.gradle.tasks.KotlinCompile> {
kotlinOptions.jvmTarget = JavaVersion.VERSION_17.toString()
}

withType<Detekt>().configureEach {
jvmTarget = jdkVersion
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
package org.jetbrains.research.tasktracker.progsnap2

import kotlinx.datetime.LocalDateTime
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.api.*
import org.jetbrains.kotlinx.dataframe.io.readCSV
import org.jetbrains.kotlinx.dataframe.io.writeCSV
import org.jetbrains.kotlinx.dataframe.size
import java.io.File
import java.time.OffsetDateTime
import java.time.format.DateTimeFormatter
import kotlin.io.path.createDirectories
import kotlin.io.path.exists

class ProgsnapParser(private val taskTrackerData: TaskTrackerData) {

companion object {
private val DATE_FORMAT: DateTimeFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSSSSS XXX")
private val DIRECTORIES_DELIMITERS = arrayOf("/", "\\")
private const val CODE_STATES_DIRECTORY_NAME = "CodeStates"
private const val MAIN_TABLE_FILENAME = "MainTable.csv"
private const val META_DATA_FILENAME = "DatasetMetadata.csv"
}

fun covert(destinationDirectory: File) {
require(destinationDirectory.exists()) { "destination directory $destinationDirectory doesn't exist" }
createMetaFile(destinationDirectory)
val researchGroups = getCodeDataFrame(destinationDirectory).groupBy("researchId")
createMainTableFile(destinationDirectory, researchGroups)
}

private fun createMetaFile(destinationDirectory: File) {
val metaDataFile = destinationDirectory.resolve(META_DATA_FILENAME)
taskTrackerData.metaData.toDataFrame().writeCSV(metaDataFile)
}

private fun createMainTableFile(destinationDirectory: File, researchGroups: GroupBy<*, *>) {
var id = 1
val activityData = getAllActivityData()
activityData.mapToFrame {
"EventID" from { id++ }
"subjectID" from { it["user"] }
"EventType" from { it.getEventType() }
"ToolInstance" from { "Kotlin" }
"CourseId" from { it["research_unique_id"] }
"CodeStateID" from {
researchGroups.getDirectoryByDateAndResearch(
it["date"] as LocalDateTime,
it["research_id"].toString()
)
}
}.dropNA().writeCSV(destinationDirectory.resolve(MAIN_TABLE_FILENAME))
}

private var activityData =
taskTrackerData.activityData.toDataFrame()
.add("data_type") { "activityData" }
private var documentData =
taskTrackerData.documentData.toDataFrame()
private var fileEditorData =
taskTrackerData.fileEditorData.toDataFrame()
.add("data_type") { "fileEditorData" }
private var researches =
taskTrackerData.researches.toDataFrame()
.convert("id").to<Int>()
private var surveyData =
taskTrackerData.surveyData.toDataFrame()
private var toolWindowData =
taskTrackerData.toolWindowData.toDataFrame()
.add("data_type") { "toolWindowData" }
private var users =
taskTrackerData.users.toDataFrame()

private fun File.toDataFrame() = DataFrame.readCSV(this)

private var directoryCounter = 1

private fun DataFrame<*>.convertDate() = this
.dropNA("date")
.update("date")
.with { OffsetDateTime.parse(it.toString(), DATE_FORMAT).toString() }
.convert("research_id").to<Int>()
.fullJoin(researches) { "research_id" match "id" }
.dropNA("date")
.convert("date")
.toLocalDateTime()

private fun getAllActivityData() =
activityData
.concat(fileEditorData)
.concat(toolWindowData)
.convertDate()
.sortBy("date")

private fun getAllDocumentData() =
documentData.convertDate()

private fun writeToFiles(codeState: Collection<DataRow<*>>, destinationDirectory: File) =
codeState.forEach { fileRow ->
val relativePathElements = fileRow["filename"].toString().split(delimiters = DIRECTORIES_DELIMITERS)
val fileName = relativePathElements.last()
val directories = relativePathElements.take(relativePathElements.size - 1)
.joinToString(separator = "/") // do not take filename
val directoryToWrite =
destinationDirectory.toPath().resolve(CODE_STATES_DIRECTORY_NAME).resolve(directoryCounter.toString())
.resolve(directories)
if (!directoryToWrite.exists()) {
directoryToWrite.createDirectories()
}
directoryToWrite.resolve(fileName).toFile().let { file ->
if (!file.exists()) {
file.createNewFile()
file.writeText(fileRow["fragment"].toString())
}
}
}

private fun getCodeDataFrame(destinationDirectory: File): DataFrame<*> {
val codeStates = mutableSetOf<FilesDirectory>()
val allDocuments = getAllDocumentData()
allDocuments.groupBy("research_id").forEach { entry ->
val currentFileState = mutableSetOf<DataRow<*>>()
val sortedGroup = entry.group.sortBy("date")
sortedGroup.rows().forEach { row ->
if (!currentFileState.any { it["filename"] == row["filename"] }) {
currentFileState.add(row)
}
}
val date = currentFileState.minOf { it["date"] as LocalDateTime }
writeToFiles(currentFileState, destinationDirectory) // write first state of files in the research
codeStates.addNewState(date, entry)
sortedGroup.rows().forEach { row ->
if (row !in currentFileState) {
currentFileState.removeIf { it["filename"] == row["filename"] }
currentFileState.add(row)
writeToFiles(currentFileState, destinationDirectory)
codeStates.addNewState(row["date"] as LocalDateTime, entry)
}
}
}
return codeStates.toDataFrame()
}

@Suppress("CyclomaticComplexMethod")
private fun DataRow<*>.getEventType(): String {
return when (val dataType = this["data_type"]) {
"activityData" -> {
when (val type = this["type"]) {
"Execution" -> {
val info = this["info"]
when {
info.toString().startsWith("Run") -> "Run.Program"
info.toString().startsWith("Debug") -> "Debug.Program"
else -> error("Undefined Execution type has been detected in the info `$info`")
}
}

"Shortcut" -> "X-Shortcut"
"KeyPressed" -> "X-KeyPressed"
"KeyReleased" -> "X-KeyReleased"
"Action" -> "X-Action"
else -> error("Undefined activity data type has been detected in the data `$type`")
}
}

"fileEditorData" -> when (val action = this["action"]) {
"FOCUS" -> "File.Focus"
"OPEN" -> "File.Open"
"CLOSE" -> "File.Close"
else -> error("Undefined action of file editor type has been detected: `$action`")
}

"toolWindowData" -> when (val action = this["action"]) {
"FOCUSED" -> "X-Toolwindow.Focus"
"OPENED" -> "X-Toolwindow.Open"
else -> error("Undefined action of tool window type has been detected: `$action`")
}

else -> error("Undefined datatype: `$dataType`")
}
}

private fun GroupBy<*, *>.getDirectoryByDateAndResearch(date: LocalDateTime, researchId: String): Int? {
val key = keys.firstOrNull { it["researchId"] == researchId } ?: return null
val rows = groups[key.index()].rows().reversed()
val index = rows.indexOfFirst { date > it["date"] as LocalDateTime }
return if (index == -1) rows.first()["directory"] as Int else rows.elementAt(index)["directory"] as Int
}

private fun MutableSet<FilesDirectory>.addNewState(date: LocalDateTime, entry: GroupBy.Entry<Any?, Any?>) = add(
FilesDirectory(date, entry.key.values().first().toString(), directoryCounter++)
)

private data class FilesDirectory(val date: LocalDateTime, val researchId: String, val directory: Int)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package org.jetbrains.research.tasktracker.progsnap2

import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
import java.io.File
import java.util.*

data class TaskTrackerData(
val activityData: File,
val documentData: File,
val fileEditorData: File,
val researches: File,
val surveyData: File,
val toolWindowData: File,
val users: File,
val metaData: ProgsnapMeta
)

data class ProgsnapMeta(
val version: Int,
val codeStateRepresentation: CodeStateRepresentation,
val eventOrderScope: EventOrderScope = EventOrderScope.NONE,
val isEventOrderingConsistent: Boolean = false,
val eventOrderScopeColumns: String = ""
) {
fun toDataFrame() = dataFrameOf("Property", "Value")(
"Version", version,
"IsEventOrderingConsistent", isEventOrderingConsistent.toString().lowercase(Locale.getDefault()),
"EventOrderScope", eventOrderScope.toString().toValueName(),
"EventOrderScopeColumns", eventOrderScopeColumns,
"CodeStateRepresentation", codeStateRepresentation.toString().toValueName(),
)
}

enum class EventOrderScope {
NONE,
GLOBAL,
RESTRICTED
}

enum class CodeStateRepresentation {
TABLE,
DIRECTORY,
GIT
}

fun String.toValueName() =
lowercase(Locale.getDefault())
.replaceFirstChar { if (it.isLowerCase()) it.titlecase(Locale.getDefault()) else it.toString() }
3 changes: 2 additions & 1 deletion settings.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ pluginManagement {

rootProject.name = "tasktracker-3"
include("ij-plugin")
include("ij-server")
include("ij-server")
include("processing")
Loading