Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: error classification #25

Merged
merged 1 commit into from
Jan 31, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package ch.srgssr.pillarbox.monitoring.event.model

/**
* An interface defining a contract for processing and enriching a data node post deserialization.
*/
internal interface DataProcessor {
/**
* Processes and potentially enriches the given data node post deserialization.
*
* Implementations may modify the node to add metadata, validate data, or transform fields
* based on custom logic before the final object is constructed.
*
* @param data The data node to process.
*
* @return The processed JSON node, which may be modified or left unchanged.
*/
fun process(data: MutableMap<String, Any?>): MutableMap<String, Any?>

/**
* Determines whether this processor should be executed based on the event type.
*
* Implementations can override this method to specify which event types they should handle.
*
* @param eventName The name of the event being processed.
*
* @return `true` if the processor should handle this event, `false` otherwise.
*/
fun shouldProcess(eventName: String): Boolean = true
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
package ch.srgssr.pillarbox.monitoring.event.model

/**
* A processor that identifies and categorizes error messages found in an error data node.
*
* This processor determines whether an error message corresponds to a known content restriction type
* and annotates the data node with an appropriate classification.
*/
internal class ErrorProcessor : DataProcessor {
/**
* Process only on ERROR events.
*/
override fun shouldProcess(eventName: String): Boolean = eventName == "ERROR"

/**
* Processes the given data node to determine the type of error based on its message:
*
* If the message matches a predefined content restriction category, an `error_type` field is added
* and the error is flagged as a business error.
*
* @param data The data node to process.
*
* @return The enriched data node with additional error classification.
*/
override fun process(data: MutableMap<String, Any?>): MutableMap<String, Any?> {
val type =
(data["message"] as? String)?.let {
ContentRestriction.findByMessage(it)
}

data["error_type"] = type?.name
data["business_error"] = type != null

return data
}
}

/**
* Enum representing different content restriction types based on predefined error messages.
*/
internal enum class ContentRestriction(
val messages: List<String>,
) {
AGERATING12(
listOf(
"To protect children this content is only available between 8PM and 6AM.",
"Pour protéger les enfants, ce contenu est accessible entre 20h et 6h.",
"Per proteggere i bambini, questo media è disponibile solo fra le 20 e le 6.",
"Per proteger uffants, è quest cuntegn disponibel mo tranter las 20.00 e las 06.00.",
),
),
AGERATING18(
listOf(
"To protect children this content is only available between 10PM and 5AM.",
"Pour protéger les enfants, ce contenu est accessible entre 23h et 5h.",
"Per proteggere i bambini, questo media è disponibile solo fra le 23 le 5.",
"Per proteger uffants, è quest cuntegn disponibel mo tranter las 23.00 e las 05.00.",
),
),
COMMERCIAL(
listOf(
"This commercial content is not available.",
"Ce contenu n'est actuellement pas disponible.",
"Questo contenuto commerciale non è disponibile.",
"Quest medium commerzial n'è betg disponibel.",
),
),
ENDDATE(
listOf(
"This content is not available anymore.",
"Ce contenu n'est plus disponible.",
"Questo media non è più disponibile.",
"Quest cuntegn n'è betg pli disponibel.",
),
),
GEOBLOCK(
listOf(
"This content is not available outside Switzerland.",
"La RTS ne dispose pas des droits de diffusion en dehors de la Suisse.",
"Questo media non è disponibile fuori dalla Svizzera.",
"Quest cuntegn n'è betg disponibel ordaifer la Svizra.",
),
),
JOURNALISTIC(
listOf(
"This content is temporarily unavailable for journalistic reasons.",
"Ce contenu est temporairement indisponible pour des raisons éditoriales.",
"Questo contenuto è temporaneamente non disponibile per motivi editoriali.",
"Quest cuntegn na stat ad interim betg a disposiziun per motivs publicistics.",
),
),
LEGAL(
listOf(
"This content is not available due to legal restrictions.",
"Pour des raisons juridiques, ce contenu n'est pas disponible.",
"Il contenuto non è fruibile a causa di restrizioni legali.",
"Quest cuntegn n'è betg disponibel perquai ch'el è scadì.",
),
),
STARTDATE(
listOf(
"This content is not available yet.",
"Ce contenu n'est pas encore disponible. Veuillez réessayer plus tard.",
"Il contenuto non è ancora disponibile. Per cortesia prova più tardi.",
"Quest cuntegn n'è betg anc disponibel. Empruvai pli tard.",
),
),
UNKNOWN(
listOf(
"This content is not available.",
"Ce contenu n'est actuellement pas disponible.",
"Questo media non è disponibile.",
"Quest cuntegn n'è betg disponibel.",
),
),
;

companion object {
private val messageToTypeMap: Map<String, ContentRestriction> by lazy {
entries
.flatMap { type ->
type.messages.map { message -> message to type }
}.toMap()
}

fun findByMessage(message: String): ContentRestriction? = messageToTypeMap[message]
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,7 @@ package ch.srgssr.pillarbox.monitoring.event.model

import com.fasterxml.jackson.annotation.JsonIgnore
import com.fasterxml.jackson.annotation.JsonProperty
import com.fasterxml.jackson.core.JsonParser
import com.fasterxml.jackson.databind.DeserializationContext
import com.fasterxml.jackson.databind.JsonDeserializer
import com.fasterxml.jackson.databind.JsonNode
import com.fasterxml.jackson.databind.annotation.JsonDeserialize
import com.fasterxml.jackson.databind.node.ObjectNode
import nl.basjes.parse.useragent.UserAgent
import nl.basjes.parse.useragent.UserAgentAnalyzer
import org.springframework.data.annotation.Id
import org.springframework.data.elasticsearch.annotations.DateFormat
import org.springframework.data.elasticsearch.annotations.Document
Expand All @@ -34,6 +27,7 @@ import org.springframework.data.elasticsearch.annotations.WriteTypeHint
writeTypeHint = WriteTypeHint.FALSE,
storeIdInSource = false,
)
@JsonDeserialize(converter = EventRequestDataConverter::class)
data class EventRequest(
@Id
@JsonIgnore
Expand All @@ -52,89 +46,7 @@ data class EventRequest(
var ip: String?,
@JsonProperty(required = true)
var version: Long,
@JsonDeserialize(using = DataDeserializer::class)
@JsonProperty(required = true)
var data: Any,
var session: Any?,
)

/**
* Custom deserializer for the 'data' field in [EventRequest].
*
* This deserializer processes the incoming JSON data to extract the user agent string from the
* `browser.agent` field and enriches the JSON node with detailed information about the browser,
* device, and operating system.
*
* If the `browser.agent` field is not present, the deserializer returns the node unmodified.
*/
private class DataDeserializer : JsonDeserializer<Any?>() {
companion object {
private val userAgentAnalyzer =
UserAgentAnalyzer
.newBuilder()
.hideMatcherLoadStats()
.withCache(10000)
.build()
}

fun isHackerOrRobot(userAgent: UserAgent): Boolean =
listOf(UserAgent.DEVICE_CLASS, UserAgent.LAYOUT_ENGINE_CLASS, UserAgent.AGENT_CLASS, UserAgent.AGENT_SECURITY)
.any { field ->
userAgent.getValue(field)?.let { value ->
value.startsWith("Hacker", ignoreCase = true) ||
value.startsWith("Robot", ignoreCase = true)
} ?: false
}

override fun deserialize(
parser: JsonParser,
ctxt: DeserializationContext,
): Any? {
val node: JsonNode = parser.codec.readTree(parser)
val browserNode = (node as? ObjectNode)?.get("browser")
val userAgent =
(browserNode as? ObjectNode)
?.get("user_agent")
?.asText()
?.let(userAgentAnalyzer::parse) ?: return parser.codec.treeToValue(node, Any::class.java)

node.set<ObjectNode>(
"browser",
browserNode.apply {
put("name", userAgent.getValueOrNull(UserAgent.AGENT_NAME))
put("version", userAgent.getValueOrNull(UserAgent.AGENT_VERSION))
},
)

node.set<ObjectNode>(
"device",
ObjectNode(ctxt.nodeFactory).apply {
put("model", userAgent.getValueOrNull(UserAgent.DEVICE_NAME))
put("type", userAgent.getValueOrNull(UserAgent.DEVICE_CLASS))
},
)

node.set<ObjectNode>(
"os",
ObjectNode(ctxt.nodeFactory).apply {
put("name", userAgent.getValueOrNull(UserAgent.OPERATING_SYSTEM_NAME))
put("version", userAgent.getValueOrNull(UserAgent.OPERATING_SYSTEM_VERSION))
},
)

node.put("robot", isHackerOrRobot(userAgent))

return parser.codec.treeToValue(node, Any::class.java)
}
}

/**
* Private extension function for [UserAgent] to return `null` instead of "??" for unknown values.
*
* @param fieldName The name of the field to retrieve.
* @return The value of the field, or `null` if the value is "??".
*/
private fun UserAgent.getValueOrNull(fieldName: String): String? {
val value = this.getValue(fieldName)
return if (value == "??") null else value
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package ch.srgssr.pillarbox.monitoring.event.model

import com.fasterxml.jackson.databind.util.StdConverter

/**
* Custom converter for [EventRequest].
*
* This converter enriches the incoming event request data node and applies transformations using
* registered processors before deserializing it into the appropriate format.
*
* If no transformation is needed, the converter returns the data node unmodified.
*
* @see [DataProcessor]
*/
internal class EventRequestDataConverter : StdConverter<EventRequest, EventRequest>() {
private val processors = listOf(UserAgentProcessor(), ErrorProcessor())

@Suppress("UNCHECKED_CAST")
override fun convert(value: EventRequest): EventRequest {
(value.data as? MutableMap<String, Any?>)?.let { data ->
processors
.filter { it.shouldProcess(value.eventName) }
.forEach { processor -> value.data = processor.process(data) }
}

return value
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
package ch.srgssr.pillarbox.monitoring.event.model

import nl.basjes.parse.useragent.UserAgent
import nl.basjes.parse.useragent.UserAgentAnalyzer

/**
* A processor that analyzes and enriches user agent within data node.
*
* This processor extracts relevant details from the `user_agent` string using [UserAgentAnalyzer]
* and enriches the data node with structured information about the browser, device, and operating system.
* It also determines whether the user agent belongs to a robot.
*/
internal class UserAgentProcessor : DataProcessor {
companion object {
private val userAgentAnalyzer =
UserAgentAnalyzer
.newBuilder()
.hideMatcherLoadStats()
.withCache(10000)
.build()
}

private fun isHackerOrRobot(userAgent: UserAgent): Boolean =
listOf(UserAgent.DEVICE_CLASS, UserAgent.LAYOUT_ENGINE_CLASS, UserAgent.AGENT_CLASS, UserAgent.AGENT_SECURITY)
.any { field ->
userAgent.getValue(field)?.let { value ->
value.startsWith("Hacker", ignoreCase = true) ||
value.startsWith("Robot", ignoreCase = true)
} ?: false
}

/**
* Process only on START events.
*/
override fun shouldProcess(eventName: String): Boolean = eventName == "START"

/**
* Processes the given data node to extract and enrich user agent details.
*
* - If the `user_agent` field is found under the `browser` node, it is parsed using [UserAgentAnalyzer].
* - Extracted data is used to populate the `browser`, `device`, and `os` fields with structured information.
* - The `robot` field is set to `true` if the user agent is identified as a robot.
*
* @param data The data node to process.
*
* @return The enriched data node with additional user agent classification.
*/
@Suppress("UNCHECKED_CAST")
override fun process(data: MutableMap<String, Any?>): MutableMap<String, Any?> {
val browserNode = data["browser"] as? MutableMap<String, Any?>
val userAgent = (browserNode?.get("user_agent") as? String)?.let(userAgentAnalyzer::parse) ?: return data

browserNode["name"] = userAgent.getValueOrNull(UserAgent.AGENT_NAME)
browserNode["version"] = userAgent.getValueOrNull(UserAgent.AGENT_VERSION)

data["device"] =
(data["device"] as? MutableMap<String, Any?> ?: mutableMapOf()).also {
it["model"] = userAgent.getValueOrNull(UserAgent.DEVICE_NAME)
it["type"] = userAgent.getValueOrNull(UserAgent.DEVICE_CLASS)
}

data["os"] =
(data["os"] as? MutableMap<String, Any?> ?: mutableMapOf()).also {
it["name"] = userAgent.getValueOrNull(UserAgent.OPERATING_SYSTEM_NAME)
it["version"] = userAgent.getValueOrNull(UserAgent.OPERATING_SYSTEM_VERSION)
}

data["robot"] = isHackerOrRobot(userAgent)

return data
}
}

/**
* Private extension function for [UserAgent] to return `null` instead of "??" for unknown values.
*
* @param fieldName The name of the field to retrieve.
* @return The value of the field, or `null` if the value is "??".
*/
private fun UserAgent.getValueOrNull(fieldName: String): String? {
val value = this.getValue(fieldName)
return if (value == "??") null else value
}
Loading