From c93446889f03907bcf62309e769fcd467fa54f92 Mon Sep 17 00:00:00 2001 From: mikrise2 Date: Tue, 14 May 2024 02:50:29 +0200 Subject: [PATCH 1/5] [ML4SE-666] Initialized new module --- gradle/libs.versions.toml | 7 ++++-- gradle/wrapper/gradle-wrapper.properties | 4 ++-- processing/build.gradle.kts | 30 ++++++++++++++++++++++++ settings.gradle.kts | 3 ++- 4 files changed, 39 insertions(+), 5 deletions(-) create mode 100644 processing/build.gradle.kts diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 752139b0..7328c54f 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -1,5 +1,5 @@ [versions] -kotlin = "1.8.10" +kotlin = "1.9.22" intellij = "1.16.1" detekt = "1.22.0" buildconfig = "3.1.0" @@ -20,6 +20,7 @@ postgres = "42.3.1" logback = "1.4.11" exposed = "0.44.0" h2 = "2.2.224" +dataframe = "0.13.1" [libraries] detekt-formatting = { module = "io.gitlab.arturbosch.detekt:detekt-formatting", version.ref = "detekt" } @@ -39,6 +40,7 @@ ktor-serialization-kotlinx-json = { module = "io.ktor:ktor-serialization-kotlinx ktor-server-core = { module = "io.ktor:ktor-server-core-jvm", version.ref = "ktor" } ktor-server-netty = { module = "io.ktor:ktor-server-netty-jvm", version.ref = "ktor" } ktor-server-tests = { module = "io.ktor:ktor-server-tests-jvm", version.ref = "ktor" } +dataframe = {module = "org.jetbrains.kotlinx:dataframe", version.ref = "dataframe"} slf4j = { module = "org.slf4j:slf4j-simple", version.ref = "slf4j" } postgres = { module = "org.postgresql:postgresql", version.ref = "postgres" } @@ -57,4 +59,5 @@ kotlin-jvm = { id = "org.jetbrains.kotlin.jvm", version.ref = "kotlin" } buildconfig = { id = "com.github.gmazzo.buildconfig", version.ref = "buildconfig" } serialization = { id = "org.jetbrains.kotlin.plugin.serialization", version.ref = "serialization" } changelog = { id = "org.jetbrains.changelog", version.ref = "changelog" } -ktor = { id = "io.ktor.plugin", version.ref = "ktor" } \ No newline at end of file +ktor = { id = "io.ktor.plugin", version.ref = "ktor" } +dataframe = {id = "org.jetbrains.kotlinx.dataframe", version.ref = "dataframe"} \ No newline at end of file diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index bdc9a83b..068c0193 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,6 +1,6 @@ +#Tue May 14 02:37:11 CEST 2024 distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-8.0.2-bin.zip -networkTimeout=10000 +distributionUrl=https\://services.gradle.org/distributions/gradle-8.5-bin.zip zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists diff --git a/processing/build.gradle.kts b/processing/build.gradle.kts new file mode 100644 index 00000000..d2d6c31f --- /dev/null +++ b/processing/build.gradle.kts @@ -0,0 +1,30 @@ +import io.gitlab.arturbosch.detekt.Detekt + +group = rootProject.group +version = rootProject.version + +@Suppress("DSL_SCOPE_VIOLATION") +plugins { + id(libs.plugins.dataframe.get().pluginId) version libs.versions.dataframe.get() +} + + +val jdkVersion = libs.versions.jdk17.get() + +dependencies { + implementation(rootProject.libs.dataframe) +} + +tasks{ + withType { + sourceCompatibility = jdkVersion + targetCompatibility = JavaVersion.VERSION_17.toString() + } + withType { + kotlinOptions.jvmTarget = JavaVersion.VERSION_17.toString() + } + + withType().configureEach { + jvmTarget = jdkVersion + } +} diff --git a/settings.gradle.kts b/settings.gradle.kts index 5c9be412..e48435e9 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -7,4 +7,5 @@ pluginManagement { rootProject.name = "tasktracker-3" include("ij-plugin") -include("ij-server") \ No newline at end of file +include("ij-server") +include("processing") From 197dde2ca48ee3fcea5b92998d4d64fe2baa0852 Mon Sep 17 00:00:00 2001 From: mikrise2 Date: Wed, 15 May 2024 02:25:55 +0200 Subject: [PATCH 2/5] [ML4SE-666] getAllActivityData function --- .../jetbrains/research/tasktracker/Main.kt | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 processing/src/main/kotlin/org/jetbrains/research/tasktracker/Main.kt diff --git a/processing/src/main/kotlin/org/jetbrains/research/tasktracker/Main.kt b/processing/src/main/kotlin/org/jetbrains/research/tasktracker/Main.kt new file mode 100644 index 00000000..cfecfcfc --- /dev/null +++ b/processing/src/main/kotlin/org/jetbrains/research/tasktracker/Main.kt @@ -0,0 +1,49 @@ +@file:ImportDataSchema( + "Repository", + "https://raw.githubusercontent.com/Kotlin/dataframe/master/data/jetbrains_repositories.csv", +) + +package org.jetbrains.research.tasktracker + +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.annotations.ImportDataSchema +import org.jetbrains.kotlinx.dataframe.api.* +import org.jetbrains.kotlinx.dataframe.io.readCSV +import java.time.OffsetDateTime +import java.time.format.DateTimeFormatter + + +//TODO +private var activityData = + DataFrame.readCSV("processing/src/main/resources/tt_files/activitydata.csv") + .add("data_type") { "Activity" } +private var documentData = + DataFrame.readCSV("processing/src/main/resources/tt_files/documentdata.csv") +private var fileEditorData = + DataFrame.readCSV("processing/src/main/resources/tt_files/fileeditordata.csv") + .add("data_type") { "fileEditorData" } +private var researches = + DataFrame.readCSV("processing/src/main/resources/tt_files/researches.csv") +private var surveyData = + DataFrame.readCSV("processing/src/main/resources/tt_files/surveyData.csv") +private var toolWindowData = + DataFrame.readCSV("processing/src/main/resources/tt_files/toolwindowdata.csv") + .add("data_type") { "toolWindowData" } +private var users = + DataFrame.readCSV("processing/src/main/resources/tt_files/users.csv") + +val dateFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSSSSS XXX") + +fun getAllActivityData() = + activityData.concat(fileEditorData).concat(toolWindowData).dropNA("date") + .update("date") + .with { OffsetDateTime.parse(it.toString(), dateFormatter).toString() } + .join(researches){ "research_id" match "id" } + .convert("date") + .toLocalDateTime().sortBy("date") + + +fun main() { + val allActivity = getAllActivityData() + allActivity.print() +} From 9a9cc5a71bbc2e46271eb5d20b89f827d498e776 Mon Sep 17 00:00:00 2001 From: mikrise2 Date: Wed, 15 May 2024 19:27:25 +0200 Subject: [PATCH 3/5] [ML4SE-666] created code fragments directory and dataframe --- .../jetbrains/research/tasktracker/Main.kt | 70 +++++++++++++++++-- 1 file changed, 66 insertions(+), 4 deletions(-) diff --git a/processing/src/main/kotlin/org/jetbrains/research/tasktracker/Main.kt b/processing/src/main/kotlin/org/jetbrains/research/tasktracker/Main.kt index cfecfcfc..fe633ffe 100644 --- a/processing/src/main/kotlin/org/jetbrains/research/tasktracker/Main.kt +++ b/processing/src/main/kotlin/org/jetbrains/research/tasktracker/Main.kt @@ -5,14 +5,23 @@ package org.jetbrains.research.tasktracker +import kotlinx.datetime.LocalDateTime import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.annotations.ImportDataSchema import org.jetbrains.kotlinx.dataframe.api.* import org.jetbrains.kotlinx.dataframe.io.readCSV +import org.jetbrains.kotlinx.dataframe.size +import java.io.File import java.time.OffsetDateTime import java.time.format.DateTimeFormatter +import kotlin.io.path.createDirectories +import kotlin.io.path.createDirectory +import kotlin.io.path.exists +var counter = 1 + //TODO private var activityData = DataFrame.readCSV("processing/src/main/resources/tt_files/activitydata.csv") @@ -38,12 +47,65 @@ fun getAllActivityData() = activityData.concat(fileEditorData).concat(toolWindowData).dropNA("date") .update("date") .with { OffsetDateTime.parse(it.toString(), dateFormatter).toString() } - .join(researches){ "research_id" match "id" } + .join(researches) { "research_id" match "id" } + .convert("date") + .toLocalDateTime() + .sortBy("date") + +fun getAllDocumentData() = + documentData.dropNA("date") + .update("date") + .with { OffsetDateTime.parse(it.toString(), dateFormatter).toString() } + .join(researches) { "research_id" match "id" } .convert("date") - .toLocalDateTime().sortBy("date") + .toLocalDateTime() + +data class FilesDirectory(val date: LocalDateTime, val researchId: String, val directory: Int) +val directory = File("test-processing") + +fun writeToFiles(acc: Collection>) = acc.forEach { fileRow -> + val fileName = fileRow["filename"].toString().split("/", "\\").last() + val dir = directory.toPath().resolve(counter.toString()) + if (!dir.exists()) { + dir.createDirectories() + } + dir.resolve(fileName).toFile().let { + if (!it.exists()) { + it.createNewFile() + it.writeText(fileRow["fragment"].toString()) + } + } +} + +fun getCodeDataFrame(): DataFrame<*> { + val files = mutableSetOf() + val allDocuments = getAllDocumentData() + allDocuments.groupBy("research_id").forEach { + val acc = mutableSetOf>() + val sortBy = it.group.sortBy("date") + sortBy.rows().forEach { row -> + if (acc.find { it["filename"] == row["filename"] } == null) + acc.add(row) + } + val date = acc.minOf { it.get("date") as LocalDateTime } + writeToFiles(acc) + files.add(FilesDirectory(date, it.key.toString(), counter++)) + sortBy.rows().forEach { row -> + if (row !in acc) { + acc.removeIf { it["filename"] == row["filename"] } + acc.add(row) + //TODO write to files + writeToFiles(acc) + files.add(FilesDirectory(row["date"] as LocalDateTime, it.key.toString(), counter++)) + } + } + } + return dataFrameOf( + listOf("date", "research_id", "directory"), + files.flatMap { listOf(it.date, it.researchId, it.directory) }) +} fun main() { - val allActivity = getAllActivityData() - allActivity.print() + } From 613c4cda7bbd7a03d6686b269b70727954aad4cf Mon Sep 17 00:00:00 2001 From: mikrise2 Date: Thu, 16 May 2024 05:01:14 +0200 Subject: [PATCH 4/5] [ML4SE-666] parse Main Event Table --- .../jetbrains/research/tasktracker/Main.kt | 82 +++++++++++++++---- 1 file changed, 64 insertions(+), 18 deletions(-) diff --git a/processing/src/main/kotlin/org/jetbrains/research/tasktracker/Main.kt b/processing/src/main/kotlin/org/jetbrains/research/tasktracker/Main.kt index fe633ffe..ad0493ab 100644 --- a/processing/src/main/kotlin/org/jetbrains/research/tasktracker/Main.kt +++ b/processing/src/main/kotlin/org/jetbrains/research/tasktracker/Main.kt @@ -1,14 +1,8 @@ -@file:ImportDataSchema( - "Repository", - "https://raw.githubusercontent.com/Kotlin/dataframe/master/data/jetbrains_repositories.csv", -) - package org.jetbrains.research.tasktracker import kotlinx.datetime.LocalDateTime import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.DataRow -import org.jetbrains.kotlinx.dataframe.annotations.ImportDataSchema import org.jetbrains.kotlinx.dataframe.api.* import org.jetbrains.kotlinx.dataframe.io.readCSV import org.jetbrains.kotlinx.dataframe.size @@ -16,7 +10,6 @@ import java.io.File import java.time.OffsetDateTime import java.time.format.DateTimeFormatter import kotlin.io.path.createDirectories -import kotlin.io.path.createDirectory import kotlin.io.path.exists @@ -25,29 +18,32 @@ var counter = 1 //TODO private var activityData = DataFrame.readCSV("processing/src/main/resources/tt_files/activitydata.csv") - .add("data_type") { "Activity" } + .add("data_type") { "activityData" }.also { println(it.size()) } private var documentData = DataFrame.readCSV("processing/src/main/resources/tt_files/documentdata.csv") private var fileEditorData = DataFrame.readCSV("processing/src/main/resources/tt_files/fileeditordata.csv") - .add("data_type") { "fileEditorData" } + .add("data_type") { "fileEditorData" }.also { println(it.size()) } private var researches = - DataFrame.readCSV("processing/src/main/resources/tt_files/researches.csv") + DataFrame.readCSV("processing/src/main/resources/tt_files/researches.csv").convert("id") + .to() private var surveyData = DataFrame.readCSV("processing/src/main/resources/tt_files/surveyData.csv") private var toolWindowData = DataFrame.readCSV("processing/src/main/resources/tt_files/toolwindowdata.csv") - .add("data_type") { "toolWindowData" } + .add("data_type") { "toolWindowData" }.also { println(it.size()) } private var users = DataFrame.readCSV("processing/src/main/resources/tt_files/users.csv") -val dateFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSSSSS XXX") +val dateFormatter: DateTimeFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSSSSS XXX") fun getAllActivityData() = activityData.concat(fileEditorData).concat(toolWindowData).dropNA("date") .update("date") .with { OffsetDateTime.parse(it.toString(), dateFormatter).toString() } - .join(researches) { "research_id" match "id" } + .convert("research_id").to() + .fullJoin(researches) { "research_id" match "id" } + .dropNA("date") .convert("date") .toLocalDateTime() .sortBy("date") @@ -56,7 +52,8 @@ fun getAllDocumentData() = documentData.dropNA("date") .update("date") .with { OffsetDateTime.parse(it.toString(), dateFormatter).toString() } - .join(researches) { "research_id" match "id" } + .convert("research_id").to() + .fullJoin(researches) { "research_id" match "id" } .convert("date") .toLocalDateTime() @@ -95,17 +92,66 @@ fun getCodeDataFrame(): DataFrame<*> { if (row !in acc) { acc.removeIf { it["filename"] == row["filename"] } acc.add(row) - //TODO write to files writeToFiles(acc) files.add(FilesDirectory(row["date"] as LocalDateTime, it.key.toString(), counter++)) } } } - return dataFrameOf( - listOf("date", "research_id", "directory"), - files.flatMap { listOf(it.date, it.researchId, it.directory) }) + return files.toDataFrame() +// return dataFrameOf( +// listOf("date", "research_id", "directory"), +// files.flatMap { listOf(it.date, it.researchId, it.directory) }) +} + +fun DataRow<*>.getEventType(): String { + return when (this["data_type"]) { + "activityData" -> when (this["type"]) { + "Execution" -> { + if (this["info"].toString().startsWith("Run")) { + "Run.Program" + } else if (this["info"].toString().startsWith("Debug")) { + "Debug.Program" + } else { + TODO() + } + } + + "Shortcut" -> "X-Shortcut" + "KeyPressed" -> "X-KeyPressed" + "KeyReleased" -> "X-KeyReleased" + "Action" -> "X-Action" + else -> TODO() + } + + "fileEditorData" -> when (this["action"]) { + "FOCUS" -> "File.Focus" + "OPEN" -> "File.Open" + "CLOSE" -> "File.Close" + else -> TODO() + } + + "toolWindowData" -> when (this["action"]) { + "FOCUSED" -> "X-Toolwindow.Focus" + "OPENED" -> "X-Toolwindow.Open" + else -> TODO() + } + + else -> TODO() + } } fun main() { + val message = getAllActivityData() + message.print() + var counter = 1 + // TODO add Session.Start and Session.End + message.mapToFrame { + "EventID" from { counter++ } + "subjectID" from { it["user"] } + "EventType" from { it.getEventType() } + "ToolInstance" from { "Kotlin" } + "CourseId" from { it["research_unique_id"] } + "CodeStateID" from TODO() + }.size().let { println(it) } } From bf8c6f858afdb6febeb08f499f27aa07ec7eabdf Mon Sep 17 00:00:00 2001 From: mikrise2 Date: Fri, 17 May 2024 03:13:35 +0200 Subject: [PATCH 5/5] [ML4SE-666] first version of converter is ready --- .../jetbrains/research/tasktracker/Main.kt | 157 -------------- .../tasktracker/progsnap2/ProgsnapParser.kt | 196 ++++++++++++++++++ .../tasktracker/progsnap2/TaskTrackerData.kt | 48 +++++ 3 files changed, 244 insertions(+), 157 deletions(-) delete mode 100644 processing/src/main/kotlin/org/jetbrains/research/tasktracker/Main.kt create mode 100644 processing/src/main/kotlin/org/jetbrains/research/tasktracker/progsnap2/ProgsnapParser.kt create mode 100644 processing/src/main/kotlin/org/jetbrains/research/tasktracker/progsnap2/TaskTrackerData.kt diff --git a/processing/src/main/kotlin/org/jetbrains/research/tasktracker/Main.kt b/processing/src/main/kotlin/org/jetbrains/research/tasktracker/Main.kt deleted file mode 100644 index ad0493ab..00000000 --- a/processing/src/main/kotlin/org/jetbrains/research/tasktracker/Main.kt +++ /dev/null @@ -1,157 +0,0 @@ -package org.jetbrains.research.tasktracker - -import kotlinx.datetime.LocalDateTime -import org.jetbrains.kotlinx.dataframe.DataFrame -import org.jetbrains.kotlinx.dataframe.DataRow -import org.jetbrains.kotlinx.dataframe.api.* -import org.jetbrains.kotlinx.dataframe.io.readCSV -import org.jetbrains.kotlinx.dataframe.size -import java.io.File -import java.time.OffsetDateTime -import java.time.format.DateTimeFormatter -import kotlin.io.path.createDirectories -import kotlin.io.path.exists - - -var counter = 1 - -//TODO -private var activityData = - DataFrame.readCSV("processing/src/main/resources/tt_files/activitydata.csv") - .add("data_type") { "activityData" }.also { println(it.size()) } -private var documentData = - DataFrame.readCSV("processing/src/main/resources/tt_files/documentdata.csv") -private var fileEditorData = - DataFrame.readCSV("processing/src/main/resources/tt_files/fileeditordata.csv") - .add("data_type") { "fileEditorData" }.also { println(it.size()) } -private var researches = - DataFrame.readCSV("processing/src/main/resources/tt_files/researches.csv").convert("id") - .to() -private var surveyData = - DataFrame.readCSV("processing/src/main/resources/tt_files/surveyData.csv") -private var toolWindowData = - DataFrame.readCSV("processing/src/main/resources/tt_files/toolwindowdata.csv") - .add("data_type") { "toolWindowData" }.also { println(it.size()) } -private var users = - DataFrame.readCSV("processing/src/main/resources/tt_files/users.csv") - -val dateFormatter: DateTimeFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSSSSS XXX") - -fun getAllActivityData() = - activityData.concat(fileEditorData).concat(toolWindowData).dropNA("date") - .update("date") - .with { OffsetDateTime.parse(it.toString(), dateFormatter).toString() } - .convert("research_id").to() - .fullJoin(researches) { "research_id" match "id" } - .dropNA("date") - .convert("date") - .toLocalDateTime() - .sortBy("date") - -fun getAllDocumentData() = - documentData.dropNA("date") - .update("date") - .with { OffsetDateTime.parse(it.toString(), dateFormatter).toString() } - .convert("research_id").to() - .fullJoin(researches) { "research_id" match "id" } - .convert("date") - .toLocalDateTime() - -data class FilesDirectory(val date: LocalDateTime, val researchId: String, val directory: Int) - -val directory = File("test-processing") - -fun writeToFiles(acc: Collection>) = acc.forEach { fileRow -> - val fileName = fileRow["filename"].toString().split("/", "\\").last() - val dir = directory.toPath().resolve(counter.toString()) - if (!dir.exists()) { - dir.createDirectories() - } - dir.resolve(fileName).toFile().let { - if (!it.exists()) { - it.createNewFile() - it.writeText(fileRow["fragment"].toString()) - } - } -} - -fun getCodeDataFrame(): DataFrame<*> { - val files = mutableSetOf() - val allDocuments = getAllDocumentData() - allDocuments.groupBy("research_id").forEach { - val acc = mutableSetOf>() - val sortBy = it.group.sortBy("date") - sortBy.rows().forEach { row -> - if (acc.find { it["filename"] == row["filename"] } == null) - acc.add(row) - } - val date = acc.minOf { it.get("date") as LocalDateTime } - writeToFiles(acc) - files.add(FilesDirectory(date, it.key.toString(), counter++)) - sortBy.rows().forEach { row -> - if (row !in acc) { - acc.removeIf { it["filename"] == row["filename"] } - acc.add(row) - writeToFiles(acc) - files.add(FilesDirectory(row["date"] as LocalDateTime, it.key.toString(), counter++)) - } - } - } - return files.toDataFrame() -// return dataFrameOf( -// listOf("date", "research_id", "directory"), -// files.flatMap { listOf(it.date, it.researchId, it.directory) }) -} - -fun DataRow<*>.getEventType(): String { - return when (this["data_type"]) { - "activityData" -> when (this["type"]) { - "Execution" -> { - if (this["info"].toString().startsWith("Run")) { - "Run.Program" - } else if (this["info"].toString().startsWith("Debug")) { - "Debug.Program" - } else { - TODO() - } - } - - "Shortcut" -> "X-Shortcut" - "KeyPressed" -> "X-KeyPressed" - "KeyReleased" -> "X-KeyReleased" - "Action" -> "X-Action" - else -> TODO() - } - - "fileEditorData" -> when (this["action"]) { - "FOCUS" -> "File.Focus" - "OPEN" -> "File.Open" - "CLOSE" -> "File.Close" - else -> TODO() - } - - "toolWindowData" -> when (this["action"]) { - "FOCUSED" -> "X-Toolwindow.Focus" - "OPENED" -> "X-Toolwindow.Open" - else -> TODO() - } - - else -> TODO() - } -} - -fun main() { - val message = getAllActivityData() - message.print() - var counter = 1 - // TODO add Session.Start and Session.End - message.mapToFrame { - "EventID" from { counter++ } - "subjectID" from { it["user"] } - "EventType" from { it.getEventType() } - "ToolInstance" from { "Kotlin" } - "CourseId" from { it["research_unique_id"] } - "CodeStateID" from TODO() - }.size().let { println(it) } - -} diff --git a/processing/src/main/kotlin/org/jetbrains/research/tasktracker/progsnap2/ProgsnapParser.kt b/processing/src/main/kotlin/org/jetbrains/research/tasktracker/progsnap2/ProgsnapParser.kt new file mode 100644 index 00000000..b36146d3 --- /dev/null +++ b/processing/src/main/kotlin/org/jetbrains/research/tasktracker/progsnap2/ProgsnapParser.kt @@ -0,0 +1,196 @@ +package org.jetbrains.research.tasktracker.progsnap2 + +import kotlinx.datetime.LocalDateTime +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.api.* +import org.jetbrains.kotlinx.dataframe.io.readCSV +import org.jetbrains.kotlinx.dataframe.io.writeCSV +import org.jetbrains.kotlinx.dataframe.size +import java.io.File +import java.time.OffsetDateTime +import java.time.format.DateTimeFormatter +import kotlin.io.path.createDirectories +import kotlin.io.path.exists + +class ProgsnapParser(private val taskTrackerData: TaskTrackerData) { + + companion object { + private val DATE_FORMAT: DateTimeFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSSSSS XXX") + private val DIRECTORIES_DELIMITERS = arrayOf("/", "\\") + private const val CODE_STATES_DIRECTORY_NAME = "CodeStates" + private const val MAIN_TABLE_FILENAME = "MainTable.csv" + private const val META_DATA_FILENAME = "DatasetMetadata.csv" + } + + fun covert(destinationDirectory: File) { + require(destinationDirectory.exists()) { "destination directory $destinationDirectory doesn't exist" } + createMetaFile(destinationDirectory) + val researchGroups = getCodeDataFrame(destinationDirectory).groupBy("researchId") + createMainTableFile(destinationDirectory, researchGroups) + } + + private fun createMetaFile(destinationDirectory: File) { + val metaDataFile = destinationDirectory.resolve(META_DATA_FILENAME) + taskTrackerData.metaData.toDataFrame().writeCSV(metaDataFile) + } + + private fun createMainTableFile(destinationDirectory: File, researchGroups: GroupBy<*, *>) { + var id = 1 + val activityData = getAllActivityData() + activityData.mapToFrame { + "EventID" from { id++ } + "subjectID" from { it["user"] } + "EventType" from { it.getEventType() } + "ToolInstance" from { "Kotlin" } + "CourseId" from { it["research_unique_id"] } + "CodeStateID" from { + researchGroups.getDirectoryByDateAndResearch( + it["date"] as LocalDateTime, + it["research_id"].toString() + ) + } + }.dropNA().writeCSV(destinationDirectory.resolve(MAIN_TABLE_FILENAME)) + } + + private var activityData = + taskTrackerData.activityData.toDataFrame() + .add("data_type") { "activityData" } + private var documentData = + taskTrackerData.documentData.toDataFrame() + private var fileEditorData = + taskTrackerData.fileEditorData.toDataFrame() + .add("data_type") { "fileEditorData" } + private var researches = + taskTrackerData.researches.toDataFrame() + .convert("id").to() + private var surveyData = + taskTrackerData.surveyData.toDataFrame() + private var toolWindowData = + taskTrackerData.toolWindowData.toDataFrame() + .add("data_type") { "toolWindowData" } + private var users = + taskTrackerData.users.toDataFrame() + + private fun File.toDataFrame() = DataFrame.readCSV(this) + + private var directoryCounter = 1 + + private fun DataFrame<*>.convertDate() = this + .dropNA("date") + .update("date") + .with { OffsetDateTime.parse(it.toString(), DATE_FORMAT).toString() } + .convert("research_id").to() + .fullJoin(researches) { "research_id" match "id" } + .dropNA("date") + .convert("date") + .toLocalDateTime() + + private fun getAllActivityData() = + activityData + .concat(fileEditorData) + .concat(toolWindowData) + .convertDate() + .sortBy("date") + + private fun getAllDocumentData() = + documentData.convertDate() + + private fun writeToFiles(codeState: Collection>, destinationDirectory: File) = + codeState.forEach { fileRow -> + val relativePathElements = fileRow["filename"].toString().split(delimiters = DIRECTORIES_DELIMITERS) + val fileName = relativePathElements.last() + val directories = relativePathElements.take(relativePathElements.size - 1) + .joinToString(separator = "/") // do not take filename + val directoryToWrite = + destinationDirectory.toPath().resolve(CODE_STATES_DIRECTORY_NAME).resolve(directoryCounter.toString()) + .resolve(directories) + if (!directoryToWrite.exists()) { + directoryToWrite.createDirectories() + } + directoryToWrite.resolve(fileName).toFile().let { file -> + if (!file.exists()) { + file.createNewFile() + file.writeText(fileRow["fragment"].toString()) + } + } + } + + private fun getCodeDataFrame(destinationDirectory: File): DataFrame<*> { + val codeStates = mutableSetOf() + val allDocuments = getAllDocumentData() + allDocuments.groupBy("research_id").forEach { entry -> + val currentFileState = mutableSetOf>() + val sortedGroup = entry.group.sortBy("date") + sortedGroup.rows().forEach { row -> + if (!currentFileState.any { it["filename"] == row["filename"] }) { + currentFileState.add(row) + } + } + val date = currentFileState.minOf { it["date"] as LocalDateTime } + writeToFiles(currentFileState, destinationDirectory) // write first state of files in the research + codeStates.addNewState(date, entry) + sortedGroup.rows().forEach { row -> + if (row !in currentFileState) { + currentFileState.removeIf { it["filename"] == row["filename"] } + currentFileState.add(row) + writeToFiles(currentFileState, destinationDirectory) + codeStates.addNewState(row["date"] as LocalDateTime, entry) + } + } + } + return codeStates.toDataFrame() + } + + @Suppress("CyclomaticComplexMethod") + private fun DataRow<*>.getEventType(): String { + return when (val dataType = this["data_type"]) { + "activityData" -> { + when (val type = this["type"]) { + "Execution" -> { + val info = this["info"] + when { + info.toString().startsWith("Run") -> "Run.Program" + info.toString().startsWith("Debug") -> "Debug.Program" + else -> error("Undefined Execution type has been detected in the info `$info`") + } + } + + "Shortcut" -> "X-Shortcut" + "KeyPressed" -> "X-KeyPressed" + "KeyReleased" -> "X-KeyReleased" + "Action" -> "X-Action" + else -> error("Undefined activity data type has been detected in the data `$type`") + } + } + + "fileEditorData" -> when (val action = this["action"]) { + "FOCUS" -> "File.Focus" + "OPEN" -> "File.Open" + "CLOSE" -> "File.Close" + else -> error("Undefined action of file editor type has been detected: `$action`") + } + + "toolWindowData" -> when (val action = this["action"]) { + "FOCUSED" -> "X-Toolwindow.Focus" + "OPENED" -> "X-Toolwindow.Open" + else -> error("Undefined action of tool window type has been detected: `$action`") + } + + else -> error("Undefined datatype: `$dataType`") + } + } + + private fun GroupBy<*, *>.getDirectoryByDateAndResearch(date: LocalDateTime, researchId: String): Int? { + val key = keys.firstOrNull { it["researchId"] == researchId } ?: return null + val rows = groups[key.index()].rows().reversed() + val index = rows.indexOfFirst { date > it["date"] as LocalDateTime } + return if (index == -1) rows.first()["directory"] as Int else rows.elementAt(index)["directory"] as Int + } + + private fun MutableSet.addNewState(date: LocalDateTime, entry: GroupBy.Entry) = add( + FilesDirectory(date, entry.key.values().first().toString(), directoryCounter++) + ) + + private data class FilesDirectory(val date: LocalDateTime, val researchId: String, val directory: Int) +} diff --git a/processing/src/main/kotlin/org/jetbrains/research/tasktracker/progsnap2/TaskTrackerData.kt b/processing/src/main/kotlin/org/jetbrains/research/tasktracker/progsnap2/TaskTrackerData.kt new file mode 100644 index 00000000..73e08862 --- /dev/null +++ b/processing/src/main/kotlin/org/jetbrains/research/tasktracker/progsnap2/TaskTrackerData.kt @@ -0,0 +1,48 @@ +package org.jetbrains.research.tasktracker.progsnap2 + +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import java.io.File +import java.util.* + +data class TaskTrackerData( + val activityData: File, + val documentData: File, + val fileEditorData: File, + val researches: File, + val surveyData: File, + val toolWindowData: File, + val users: File, + val metaData: ProgsnapMeta +) + +data class ProgsnapMeta( + val version: Int, + val codeStateRepresentation: CodeStateRepresentation, + val eventOrderScope: EventOrderScope = EventOrderScope.NONE, + val isEventOrderingConsistent: Boolean = false, + val eventOrderScopeColumns: String = "" +) { + fun toDataFrame() = dataFrameOf("Property", "Value")( + "Version", version, + "IsEventOrderingConsistent", isEventOrderingConsistent.toString().lowercase(Locale.getDefault()), + "EventOrderScope", eventOrderScope.toString().toValueName(), + "EventOrderScopeColumns", eventOrderScopeColumns, + "CodeStateRepresentation", codeStateRepresentation.toString().toValueName(), + ) +} + +enum class EventOrderScope { + NONE, + GLOBAL, + RESTRICTED +} + +enum class CodeStateRepresentation { + TABLE, + DIRECTORY, + GIT +} + +fun String.toValueName() = + lowercase(Locale.getDefault()) + .replaceFirstChar { if (it.isLowerCase()) it.titlecase(Locale.getDefault()) else it.toString() }