diff --git a/src/main/kotlin/ch/srgssr/pillarbox/monitoring/event/model/DataProcessor.kt b/src/main/kotlin/ch/srgssr/pillarbox/monitoring/event/model/DataProcessor.kt new file mode 100644 index 0000000..fd51352 --- /dev/null +++ b/src/main/kotlin/ch/srgssr/pillarbox/monitoring/event/model/DataProcessor.kt @@ -0,0 +1,29 @@ +package ch.srgssr.pillarbox.monitoring.event.model + +/** + * An interface defining a contract for processing and enriching a data node post deserialization. + */ +internal interface DataProcessor { + /** + * Processes and potentially enriches the given data node post deserialization. + * + * Implementations may modify the node to add metadata, validate data, or transform fields + * based on custom logic before the final object is constructed. + * + * @param data The data node to process. + * + * @return The processed JSON node, which may be modified or left unchanged. + */ + fun process(data: MutableMap): MutableMap + + /** + * Determines whether this processor should be executed based on the event type. + * + * Implementations can override this method to specify which event types they should handle. + * + * @param eventName The name of the event being processed. + * + * @return `true` if the processor should handle this event, `false` otherwise. + */ + fun shouldProcess(eventName: String): Boolean = true +} diff --git a/src/main/kotlin/ch/srgssr/pillarbox/monitoring/event/model/ErrorProcessor.kt b/src/main/kotlin/ch/srgssr/pillarbox/monitoring/event/model/ErrorProcessor.kt new file mode 100644 index 0000000..d50e27b --- /dev/null +++ b/src/main/kotlin/ch/srgssr/pillarbox/monitoring/event/model/ErrorProcessor.kt @@ -0,0 +1,131 @@ +package ch.srgssr.pillarbox.monitoring.event.model + +/** + * A processor that identifies and categorizes error messages found in an error data node. + * + * This processor determines whether an error message corresponds to a known content restriction type + * and annotates the data node with an appropriate classification. If the message is unknown or missing, + * it flags the error as a business error. + */ +internal class ErrorProcessor : DataProcessor { + /** + * Process only on ERROR events. + */ + override fun shouldProcess(eventName: String): Boolean = eventName == "ERROR" + + /** + * Processes the given data node to determine the type of error based on its message. + * + * If the `message` field is empty or null, the node is marked as a business error. + * If the message matches a predefined content restriction category, an `error_type` field is added. + * Otherwise, the node is flagged as a business error. + * + * @param data The data node to process. + * + * @return The enriched data node with additional error classification. + */ + override fun process(data: MutableMap): MutableMap { + val message = (data["message"] as? String) ?: return data + + val type = + ContentRestriction + .findByMessage(message) + ?.also { data["error_type"] = it.name } + + data["business_error"] = type != null + + return data + } +} + +/** + * Enum representing different content restriction types based on predefined error messages. + */ +internal enum class ContentRestriction( + val messages: List, +) { + AGERATING12( + listOf( + "To protect children this content is only available between 8PM and 6AM.", + "Pour protéger les enfants, ce contenu est accessible entre 20h et 6h.", + "Per proteggere i bambini, questo media è disponibile solo fra le 20 e le 6.", + "Per proteger uffants, è quest cuntegn disponibel mo tranter las 20.00 e las 06.00.", + ), + ), + AGERATING18( + listOf( + "To protect children this content is only available between 10PM and 5AM.", + "Pour protéger les enfants, ce contenu est accessible entre 23h et 5h.", + "Per proteggere i bambini, questo media è disponibile solo fra le 23 le 5.", + "Per proteger uffants, è quest cuntegn disponibel mo tranter las 23.00 e las 05.00.", + ), + ), + COMMERCIAL( + listOf( + "This commercial content is not available.", + "Ce contenu n'est actuellement pas disponible.", + "Questo contenuto commerciale non è disponibile.", + "Quest medium commerzial n'è betg disponibel.", + ), + ), + ENDDATE( + listOf( + "This content is not available anymore.", + "Ce contenu n'est plus disponible.", + "Questo media non è più disponibile.", + "Quest cuntegn n'è betg pli disponibel.", + ), + ), + GEOBLOCK( + listOf( + "This content is not available outside Switzerland.", + "La RTS ne dispose pas des droits de diffusion en dehors de la Suisse.", + "Questo media non è disponibile fuori dalla Svizzera.", + "Quest cuntegn n'è betg disponibel ordaifer la Svizra.", + ), + ), + JOURNALISTIC( + listOf( + "This content is temporarily unavailable for journalistic reasons.", + "Ce contenu est temporairement indisponible pour des raisons éditoriales.", + "Questo contenuto è temporaneamente non disponibile per motivi editoriali.", + "Quest cuntegn na stat ad interim betg a disposiziun per motivs publicistics.", + ), + ), + LEGAL( + listOf( + "This content is not available due to legal restrictions.", + "Pour des raisons juridiques, ce contenu n'est pas disponible.", + "Il contenuto non è fruibile a causa di restrizioni legali.", + "Quest cuntegn n'è betg disponibel perquai ch'el è scadì.", + ), + ), + STARTDATE( + listOf( + "This content is not available yet.", + "Ce contenu n'est pas encore disponible. Veuillez réessayer plus tard.", + "Il contenuto non è ancora disponibile. Per cortesia prova più tardi.", + "Quest cuntegn n'è betg anc disponibel. Empruvai pli tard.", + ), + ), + UNKNOWN( + listOf( + "This content is not available.", + "Ce contenu n'est actuellement pas disponible.", + "Questo media non è disponibile.", + "Quest cuntegn n'è betg disponibel.", + ), + ), + ; + + companion object { + private val messageToTypeMap: Map by lazy { + entries + .flatMap { type -> + type.messages.map { message -> message to type } + }.toMap() + } + + fun findByMessage(message: String): ContentRestriction? = messageToTypeMap[message] + } +} diff --git a/src/main/kotlin/ch/srgssr/pillarbox/monitoring/event/model/EventRequest.kt b/src/main/kotlin/ch/srgssr/pillarbox/monitoring/event/model/EventRequest.kt index 73b6d28..da206f8 100644 --- a/src/main/kotlin/ch/srgssr/pillarbox/monitoring/event/model/EventRequest.kt +++ b/src/main/kotlin/ch/srgssr/pillarbox/monitoring/event/model/EventRequest.kt @@ -2,14 +2,7 @@ package ch.srgssr.pillarbox.monitoring.event.model import com.fasterxml.jackson.annotation.JsonIgnore import com.fasterxml.jackson.annotation.JsonProperty -import com.fasterxml.jackson.core.JsonParser -import com.fasterxml.jackson.databind.DeserializationContext -import com.fasterxml.jackson.databind.JsonDeserializer -import com.fasterxml.jackson.databind.JsonNode import com.fasterxml.jackson.databind.annotation.JsonDeserialize -import com.fasterxml.jackson.databind.node.ObjectNode -import nl.basjes.parse.useragent.UserAgent -import nl.basjes.parse.useragent.UserAgentAnalyzer import org.springframework.data.annotation.Id import org.springframework.data.elasticsearch.annotations.DateFormat import org.springframework.data.elasticsearch.annotations.Document @@ -34,6 +27,7 @@ import org.springframework.data.elasticsearch.annotations.WriteTypeHint writeTypeHint = WriteTypeHint.FALSE, storeIdInSource = false, ) +@JsonDeserialize(converter = EventRequestDataConverter::class) data class EventRequest( @Id @JsonIgnore @@ -52,89 +46,7 @@ data class EventRequest( var ip: String?, @JsonProperty(required = true) var version: Long, - @JsonDeserialize(using = DataDeserializer::class) @JsonProperty(required = true) var data: Any, var session: Any?, ) - -/** - * Custom deserializer for the 'data' field in [EventRequest]. - * - * This deserializer processes the incoming JSON data to extract the user agent string from the - * `browser.agent` field and enriches the JSON node with detailed information about the browser, - * device, and operating system. - * - * If the `browser.agent` field is not present, the deserializer returns the node unmodified. - */ -private class DataDeserializer : JsonDeserializer() { - companion object { - private val userAgentAnalyzer = - UserAgentAnalyzer - .newBuilder() - .hideMatcherLoadStats() - .withCache(10000) - .build() - } - - fun isHackerOrRobot(userAgent: UserAgent): Boolean = - listOf(UserAgent.DEVICE_CLASS, UserAgent.LAYOUT_ENGINE_CLASS, UserAgent.AGENT_CLASS, UserAgent.AGENT_SECURITY) - .any { field -> - userAgent.getValue(field)?.let { value -> - value.startsWith("Hacker", ignoreCase = true) || - value.startsWith("Robot", ignoreCase = true) - } ?: false - } - - override fun deserialize( - parser: JsonParser, - ctxt: DeserializationContext, - ): Any? { - val node: JsonNode = parser.codec.readTree(parser) - val browserNode = (node as? ObjectNode)?.get("browser") - val userAgent = - (browserNode as? ObjectNode) - ?.get("user_agent") - ?.asText() - ?.let(userAgentAnalyzer::parse) ?: return parser.codec.treeToValue(node, Any::class.java) - - node.set( - "browser", - browserNode.apply { - put("name", userAgent.getValueOrNull(UserAgent.AGENT_NAME)) - put("version", userAgent.getValueOrNull(UserAgent.AGENT_VERSION)) - }, - ) - - node.set( - "device", - ObjectNode(ctxt.nodeFactory).apply { - put("model", userAgent.getValueOrNull(UserAgent.DEVICE_NAME)) - put("type", userAgent.getValueOrNull(UserAgent.DEVICE_CLASS)) - }, - ) - - node.set( - "os", - ObjectNode(ctxt.nodeFactory).apply { - put("name", userAgent.getValueOrNull(UserAgent.OPERATING_SYSTEM_NAME)) - put("version", userAgent.getValueOrNull(UserAgent.OPERATING_SYSTEM_VERSION)) - }, - ) - - node.put("robot", isHackerOrRobot(userAgent)) - - return parser.codec.treeToValue(node, Any::class.java) - } -} - -/** - * Private extension function for [UserAgent] to return `null` instead of "??" for unknown values. - * - * @param fieldName The name of the field to retrieve. - * @return The value of the field, or `null` if the value is "??". - */ -private fun UserAgent.getValueOrNull(fieldName: String): String? { - val value = this.getValue(fieldName) - return if (value == "??") null else value -} diff --git a/src/main/kotlin/ch/srgssr/pillarbox/monitoring/event/model/EventRequestDataConverter.kt b/src/main/kotlin/ch/srgssr/pillarbox/monitoring/event/model/EventRequestDataConverter.kt new file mode 100644 index 0000000..d037bce --- /dev/null +++ b/src/main/kotlin/ch/srgssr/pillarbox/monitoring/event/model/EventRequestDataConverter.kt @@ -0,0 +1,28 @@ +package ch.srgssr.pillarbox.monitoring.event.model + +import com.fasterxml.jackson.databind.util.StdConverter + +/** + * Custom converter for [EventRequest]. + * + * This converter enriches the incoming event request data node and applies transformations using + * registered processors before deserializing it into the appropriate format. + * + * If no transformation is needed, the converter returns the data node unmodified. + * + * @see [DataProcessor] + */ +internal class EventRequestDataConverter : StdConverter() { + private val processors = listOf(UserAgentProcessor(), ErrorProcessor()) + + @Suppress("UNCHECKED_CAST") + override fun convert(value: EventRequest): EventRequest { + (value.data as? MutableMap)?.let { data -> + processors + .filter { it.shouldProcess(value.eventName) } + .forEach { processor -> value.data = processor.process(data) } + } + + return value + } +} diff --git a/src/main/kotlin/ch/srgssr/pillarbox/monitoring/event/model/UserAgentProcessor.kt b/src/main/kotlin/ch/srgssr/pillarbox/monitoring/event/model/UserAgentProcessor.kt new file mode 100644 index 0000000..d7e6efe --- /dev/null +++ b/src/main/kotlin/ch/srgssr/pillarbox/monitoring/event/model/UserAgentProcessor.kt @@ -0,0 +1,83 @@ +package ch.srgssr.pillarbox.monitoring.event.model + +import nl.basjes.parse.useragent.UserAgent +import nl.basjes.parse.useragent.UserAgentAnalyzer + +/** + * A processor that analyzes and enriches user agent within data node. + * + * This processor extracts relevant details from the `user_agent` string using [UserAgentAnalyzer] + * and enriches the data node with structured information about the browser, device, and operating system. + * It also determines whether the user agent belongs to a robot. + */ +internal class UserAgentProcessor : DataProcessor { + companion object { + private val userAgentAnalyzer = + UserAgentAnalyzer + .newBuilder() + .hideMatcherLoadStats() + .withCache(10000) + .build() + } + + private fun isHackerOrRobot(userAgent: UserAgent): Boolean = + listOf(UserAgent.DEVICE_CLASS, UserAgent.LAYOUT_ENGINE_CLASS, UserAgent.AGENT_CLASS, UserAgent.AGENT_SECURITY) + .any { field -> + userAgent.getValue(field)?.let { value -> + value.startsWith("Hacker", ignoreCase = true) || + value.startsWith("Robot", ignoreCase = true) + } ?: false + } + + /** + * Process only on START events. + */ + override fun shouldProcess(eventName: String): Boolean = eventName == "START" + + /** + * Processes the given data node to extract and enrich user agent details. + * + * - If the `user_agent` field is found under the `browser` node, it is parsed using [UserAgentAnalyzer]. + * - Extracted data is used to populate the `browser`, `device`, and `os` fields with structured information. + * - The `robot` field is set to `true` if the user agent is identified as a robot. + * + * @param data The data node to process. + * + * @return The enriched data node with additional user agent classification. + */ + @Suppress("UNCHECKED_CAST") + override fun process(data: MutableMap): MutableMap { + val browserNode = data["browser"] as? MutableMap + val userAgent = (browserNode?.get("user_agent") as? String)?.let(userAgentAnalyzer::parse) ?: return data + + browserNode["name"] = userAgent.getValueOrNull(UserAgent.AGENT_NAME) + browserNode["version"] = userAgent.getValueOrNull(UserAgent.AGENT_VERSION) + + data["device"] = + (data["device"] as? MutableMap ?: mutableMapOf()).also { + it["model"] = userAgent.getValueOrNull(UserAgent.DEVICE_NAME) + it["type"] = userAgent.getValueOrNull(UserAgent.DEVICE_CLASS) + } + + data["os"] = + (data["os"] as? MutableMap ?: mutableMapOf()).also { + it["name"] = userAgent.getValueOrNull(UserAgent.OPERATING_SYSTEM_NAME) + it["version"] = userAgent.getValueOrNull(UserAgent.OPERATING_SYSTEM_VERSION) + } + + data["robot"] = isHackerOrRobot(userAgent) + + return data + } +} + +/** + * Private extension function for [UserAgent] to return `null` instead of "??" for unknown values. + * + * @param fieldName The name of the field to retrieve. + * @return The value of the field, or `null` if the value is "??". + */ +private fun UserAgent.getValueOrNull(fieldName: String): String? { + val value = this.getValue(fieldName) + return if (value == "??") null else value +} diff --git a/src/main/resources/opensearch/index.json b/src/main/resources/opensearch/index.json index a46387b..9c13b5d 100644 --- a/src/main/resources/opensearch/index.json +++ b/src/main/resources/opensearch/index.json @@ -5,40 +5,3 @@ } } } - -{ - "query": { - "bool": { - "filter": [ - { - "range": { - "@timestamp": { - "gte": 1737719669146, - "lte": 1737721469146, - "format": "epoch_millis" - } - } - }, - { - "query_string": { - "analyze_wildcard": true, - "query": "event_name:START" - } - } - ] - } - }, - "aggs": { - "2": { - "terms": { - "field": "data.media.id", - "size": 10, - "order": { - "_count": "desc" - }, - "min_doc_count": 1 - }, - "aggs": {} - } - } -} diff --git a/src/main/resources/opensearch/index_template.json b/src/main/resources/opensearch/index_template.json index ab26250..231c8cf 100644 --- a/src/main/resources/opensearch/index_template.json +++ b/src/main/resources/opensearch/index_template.json @@ -114,6 +114,13 @@ "type": "keyword", "ignore_above": 256 }, + "error_type": { + "type": "keyword", + "ignore_above": 256 + }, + "critical": { + "type": "boolean" + }, "name": { "type": "keyword", "ignore_above": 256 diff --git a/src/test/kotlin/ch/srgssr/pillarbox/monitoring/event/model/EventRequestTest.kt b/src/test/kotlin/ch/srgssr/pillarbox/monitoring/event/model/EventRequestTest.kt index 63a8e1a..5d9570f 100644 --- a/src/test/kotlin/ch/srgssr/pillarbox/monitoring/event/model/EventRequestTest.kt +++ b/src/test/kotlin/ch/srgssr/pillarbox/monitoring/event/model/EventRequestTest.kt @@ -184,4 +184,29 @@ class EventRequestTest( dataNode["device"] shouldBe null dataNode["os"] shouldBe null } + + should("classify error messages correctly based on predefined content restrictions") { + // Given: an input with an error message + val jsonInput = + """ + { + "session_id": "12345", + "event_name": "ERROR", + "timestamp": 1630000000000, + "user_ip": "127.0.0.1", + "version": 1, + "data": { + "message": "This content is not available outside Switzerland." + } + } + """.trimIndent() + + // When: the event is deserialized + val eventRequest = objectMapper.readValue(jsonInput) + + // Then: The error should be classified correctly + val dataNode = eventRequest.data as Map<*, *> + dataNode["error_type"] shouldBe "GEOBLOCK" + dataNode["business_error"] shouldBe true + } })