From 5a2080231939e0cc60acac6dcc1c6cb49377f8b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=A1s=20Tamargo?= Date: Wed, 8 Jan 2025 13:21:01 +0100 Subject: [PATCH 1/2] MBS-13893: Block invisible characters (Hangul, Braille) in usernames Invisible characters are problematic in usernames, since they make it easy to pose as someone else. While there can be cases where these characters could make sense elsewhere, in usernames we should play it a bit safer. As such, this creates a separate sanitize_username method that can call further cleanups on top of the basic sanitize, and runs the pre-existing remove_invisible_characters in it. --- lib/MusicBrainz/Server/Data/Editor.pm | 4 ++-- lib/MusicBrainz/Server/Data/Utils.pm | 12 ++++++++++++ lib/MusicBrainz/Server/Form/Utils.pm | 4 ++-- .../t/MusicBrainz/Server/Controller/User/Register.pm | 9 +++++++++ 4 files changed, 25 insertions(+), 4 deletions(-) diff --git a/lib/MusicBrainz/Server/Data/Editor.pm b/lib/MusicBrainz/Server/Data/Editor.pm index 2b0a4fc8b10..ce4cd2f8774 100644 --- a/lib/MusicBrainz/Server/Data/Editor.pm +++ b/lib/MusicBrainz/Server/Data/Editor.pm @@ -19,7 +19,7 @@ use MusicBrainz::Server::Data::Utils qw( hash_to_row load_subobjects placeholders - sanitize + sanitize_username ); use MusicBrainz::Server::Constants qw( :create_entity @@ -282,7 +282,7 @@ sub find_subscribers sub _die_if_username_invalid { my $name = shift; - my $sanitized_name = sanitize($name); + my $sanitized_name = sanitize_username($name); die 'Invalid user name' if ( $name ne $sanitized_name || diff --git a/lib/MusicBrainz/Server/Data/Utils.pm b/lib/MusicBrainz/Server/Data/Utils.pm index 5a3771afa4f..a6de6c6bbb0 100644 --- a/lib/MusicBrainz/Server/Data/Utils.pm +++ b/lib/MusicBrainz/Server/Data/Utils.pm @@ -73,6 +73,7 @@ our @EXPORT_OK = qw( remove_equal remove_invisible_characters sanitize + sanitize_username take_while trim trim_comment @@ -349,6 +350,17 @@ sub sanitize { return $t; } +sub sanitize_username { + my $t = shift; + + return '' unless non_empty($t); + + $t = sanitize($t); + $t = remove_invisible_characters($t); + + return $t; +} + sub trim { my $t = shift; diff --git a/lib/MusicBrainz/Server/Form/Utils.pm b/lib/MusicBrainz/Server/Form/Utils.pm index cc5b3b01bfa..35c0b9b842d 100644 --- a/lib/MusicBrainz/Server/Form/Utils.pm +++ b/lib/MusicBrainz/Server/Form/Utils.pm @@ -3,7 +3,7 @@ package MusicBrainz::Server::Form::Utils; use strict; use warnings; -use MusicBrainz::Server::Data::Utils qw( sanitize ); +use MusicBrainz::Server::Data::Utils qw( sanitize_username ); use MusicBrainz::Server::Translation qw( l lp ); use List::AllUtils qw( sort_by ); @@ -212,7 +212,7 @@ sub validate_username { if (defined $username) { unless (defined $previous_username && $editor_model->are_names_equivalent($previous_username, $username)) { - my $sanitized_name = sanitize($username); + my $sanitized_name = sanitize_username($username); if ( $username ne $sanitized_name || $sanitized_name =~ qr{://} diff --git a/t/lib/t/MusicBrainz/Server/Controller/User/Register.pm b/t/lib/t/MusicBrainz/Server/Controller/User/Register.pm index 79d637227b4..c3f105203fa 100644 --- a/t/lib/t/MusicBrainz/Server/Controller/User/Register.pm +++ b/t/lib/t/MusicBrainz/Server/Controller/User/Register.pm @@ -97,6 +97,15 @@ test 'Trying to register with an invalid name' => sub { like($mech->uri, qr{/register}, 'stays on registration page'); $mech->content_contains('username contains invalid characters', 'form has error message for consecutive spaces in username'); + $mech->submit_form( with_fields => { + 'register.username' => "test\N{HANGUL FILLER}\N{HALFWIDTH HANGUL FILLER}\N{BRAILLE PATTERN BLANK}\N{HANGUL CHOSEONG FILLER}\N{HANGUL JUNGSEONG FILLER}", + 'register.password' => 'foo', + 'register.confirm_password' => 'foo', + 'register.email' => 'foobar@example.org', + }); + like($mech->uri, qr{/register}, 'stays on registration page'); + $mech->content_contains('username contains invalid characters', 'form has error message for invisible characters in username'); + $mech->submit_form( with_fields => { 'register.username' => 'looks://like_a_url_to_me', 'register.password' => 'foo', From 67e9b80d00dcd5bb49b0149db81f4f0ec9b66c1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=A1s=20Tamargo?= Date: Wed, 8 Jan 2025 13:51:51 +0100 Subject: [PATCH 2/2] MBS-13857: Block unicode Tags block in usernames These can be used for flag emojis, which can have their place on entity names, annotations and the like. But they can (and have been) also be used as invisible characters to create seemingly-duplicate usernames, which is gaming the project. The downside seems higher than the dubious benefit of emoji flags in usernames, so this blocks their use there. In any case, it seems most emoji flags use regional indicator symbols instead. --- lib/MusicBrainz/Server/Data/Utils.pm | 11 +++++++++++ .../t/MusicBrainz/Server/Controller/User/Register.pm | 9 +++++++++ 2 files changed, 20 insertions(+) diff --git a/lib/MusicBrainz/Server/Data/Utils.pm b/lib/MusicBrainz/Server/Data/Utils.pm index a6de6c6bbb0..ae023eb6521 100644 --- a/lib/MusicBrainz/Server/Data/Utils.pm +++ b/lib/MusicBrainz/Server/Data/Utils.pm @@ -357,6 +357,7 @@ sub sanitize_username { $t = sanitize($t); $t = remove_invisible_characters($t); + $t = remove_tag_characters($t); return $t; } @@ -482,6 +483,16 @@ sub remove_lineformatting_characters { =~ s/[\N{ZERO WIDTH SPACE}\N{SOFT HYPHEN}\p{Cc}]//gr; } +sub remove_tag_characters { + my $string = shift; + + # https://en.wikipedia.org/wiki/Tags_(Unicode_block) + # Can be used for flag emojis but also as invisible chars + $string =~ s/[\x{E0000}-\x{E007F}]//g; + + return $string; +} + sub type_to_model { return $TYPE_TO_MODEL{$_[0]} || die "$_[0] is not a type that has a model"; diff --git a/t/lib/t/MusicBrainz/Server/Controller/User/Register.pm b/t/lib/t/MusicBrainz/Server/Controller/User/Register.pm index c3f105203fa..56180849ccb 100644 --- a/t/lib/t/MusicBrainz/Server/Controller/User/Register.pm +++ b/t/lib/t/MusicBrainz/Server/Controller/User/Register.pm @@ -106,6 +106,15 @@ test 'Trying to register with an invalid name' => sub { like($mech->uri, qr{/register}, 'stays on registration page'); $mech->content_contains('username contains invalid characters', 'form has error message for invisible characters in username'); + $mech->submit_form( with_fields => { + 'register.username' => "test\N{TAG LATIN CAPITAL LETTER T}\N{TAG LATIN CAPITAL LETTER E}\N{TAG LATIN CAPITAL LETTER S}\N{TAG LATIN CAPITAL LETTER T}", + 'register.password' => 'foo', + 'register.confirm_password' => 'foo', + 'register.email' => 'foobar@example.org', + }); + like($mech->uri, qr{/register}, 'stays on registration page'); + $mech->content_contains('username contains invalid characters', 'form has error message for tag characters in username'); + $mech->submit_form( with_fields => { 'register.username' => 'looks://like_a_url_to_me', 'register.password' => 'foo',