From 68af29891342cf627d61ca04ca1d403bc795afb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tina=20M=C3=BCller?= Date: Sat, 18 Jan 2025 04:20:57 +0100 Subject: [PATCH] Forbid unknown tags by default * Add Catchall schema to allow them * Add --catchall option --- bin/yamlpp-load | 5 +++ bin/yamlpp-load-dump | 5 +++ lib/YAML/PP/Schema.pm | 9 ++++ lib/YAML/PP/Schema/Catchall.pm | 76 ++++++++++++++++++++++++++++++++++ lib/YAML/PP/Schema/Failsafe.pm | 22 ++++++++++ lib/YAML/PP/Schema/Perl.pm | 20 +++++---- t/37.schema-catchall.t | 58 ++++++++++++++++++++++++++ t/45.binary.t | 3 +- t/lib/YAML/PP/Test.pm | 4 +- 9 files changed, 191 insertions(+), 11 deletions(-) create mode 100644 lib/YAML/PP/Schema/Catchall.pm create mode 100644 t/37.schema-catchall.t diff --git a/bin/yamlpp-load b/bin/yamlpp-load index 0d76a21d..7b50e8bf 100755 --- a/bin/yamlpp-load +++ b/bin/yamlpp-load @@ -15,6 +15,7 @@ GetOptions( 'cyclic' => \my $cyclic, 'duplicate-keys' => \my $duplicate_keys, 'merge' => \my $merge, + 'catchall' => \my $catchall, 'perl' => \my $perl, 'module|M=s' => \my $module, 'yaml-version=s' => \my $yaml_version, @@ -33,6 +34,9 @@ if ($merge) { if ($perl) { push @schema, 'Perl'; } +if ($catchall) { + push @schema, 'Catchall'; +} my ($file) = @ARGV; my $yaml; @@ -137,6 +141,7 @@ Options: --cyclic Allow cyclic references --duplicate-keys Allow duplicate keys --merge Enable loading merge keys '<<' + --catchall Ignore any unknown tags --perl Enable loading perl types and objects (use only on trusted input!) --module -M YAML::PP (default), YAML, YAML::PP::LibYAML, diff --git a/bin/yamlpp-load-dump b/bin/yamlpp-load-dump index c501d8b9..c689fbae 100755 --- a/bin/yamlpp-load-dump +++ b/bin/yamlpp-load-dump @@ -21,6 +21,7 @@ GetOptions( 'header!' => \my $header, 'footer!' => \my $footer, 'merge' => \my $merge, + 'catchall' => \my $catchall, 'perl' => \my $perl, 'preserve|P=s' => \my $preserve, 'module|M=s' => \my $module, @@ -50,6 +51,9 @@ if ($merge) { if ($perl) { push @schema, 'Perl'; } +if ($catchall) { + push @schema, 'Catchall'; +} my $preserve_order = 1; if (defined $preserve) { @@ -287,6 +291,7 @@ Options: --[no-]header Print '---' (default) --[no-]footer Print '...' --merge Enable loading merge keys '<<' + --catchall Ignore any unknown tags --perl Enable loading perl types and objects (use only on trusted input!) --preserve, -P Comma separated: 'order', 'scalar', 'flow', 'alias'. diff --git a/lib/YAML/PP/Schema.pm b/lib/YAML/PP/Schema.pm index 7f2528a3..2c2f3ae4 100644 --- a/lib/YAML/PP/Schema.pm +++ b/lib/YAML/PP/Schema.pm @@ -8,6 +8,7 @@ our $VERSION = '0.000'; # VERSION use YAML::PP::Common qw/ YAML_PLAIN_SCALAR_STYLE /; +use Carp qw/ croak /; use Scalar::Util qw/ blessed /; sub new { @@ -306,6 +307,9 @@ sub load_scalar { my $resolvers = $self->resolvers; my $res; if ($tag) { + if ($tag eq '!') { + return $value; + } $res = $resolvers->{tag}->{ $tag }; if (not $res and my $matches = $resolvers->{tags}) { for my $match (@$matches) { @@ -316,6 +320,9 @@ sub load_scalar { } } } + unless ($res) { + croak "Unknown tag '$tag'. Use schema 'Catchall' to ignore unknown tags"; + } } else { $res = $resolvers->{value}; @@ -376,6 +383,7 @@ sub create_sequence { } } } + croak "Unknown tag '$tag'. Use schema 'Catchall' to ignore unknown tags"; } return ($data, $on_data); @@ -406,6 +414,7 @@ sub create_mapping { } } } + croak "Unknown tag '$tag'. Use schema 'Catchall' to ignore unknown tags"; } return ($data, $on_data); diff --git a/lib/YAML/PP/Schema/Catchall.pm b/lib/YAML/PP/Schema/Catchall.pm new file mode 100644 index 00000000..a7d8aef3 --- /dev/null +++ b/lib/YAML/PP/Schema/Catchall.pm @@ -0,0 +1,76 @@ +use strict; +use warnings; +package YAML::PP::Schema::Catchall; + +our $VERSION = '0.000'; # VERSION + +use Carp qw/ croak /; + +use YAML::PP::Common qw/ YAML_PLAIN_SCALAR_STYLE YAML_SINGLE_QUOTED_SCALAR_STYLE /; + +sub register { + my ($self, %args) = @_; + my $schema = $args{schema}; + my $options = $args{options}; + my $empty_null = 0; + for my $opt (@$options) { + if ($opt eq 'empty=str') { + } + elsif ($opt eq 'empty=null') { + $empty_null = 1; + } + else { + croak "Invalid option for JSON Schema: '$opt'"; + } + } + + $schema->add_resolver( + tag => qr{^(?:!|tag:)}, + match => [ all => sub { + my ($constructor, $event) = @_; + my $value = $event->{value}; + return $value; + }], + implicit => 0, + ); + $schema->add_sequence_resolver( + tag => qr{^(?:!|tag:)}, + on_data => sub { + my ($constructor, $ref, $list) = @_; + push @$$ref, @$list; + }, + ); + $schema->add_mapping_resolver( + tag => qr{^(?:!|tag:)}, + on_data => sub { + my ($constructor, $ref, $list) = @_; + for (my $i = 0; $i < @$list; $i += 2) { + $$ref->{ $list->[ $i ] } = $list->[ $i + 1 ]; + } + }, + ); + + return; +} + +1; + +__END__ + +=pod + +=encoding utf-8 + +=head1 NAME + +YAML::PP::Schema::JSON - YAML 1.2 JSON Schema + +=head1 SYNOPSIS + + my $yp = YAML::PP->new( schema => ['JSON'] ); + my $yp = YAML::PP->new( schema => [qw/ JSON empty=str /] ); + my $yp = YAML::PP->new( schema => [qw/ JSON empty=null /] ); + +=head1 DESCRIPTION + +=cut diff --git a/lib/YAML/PP/Schema/Failsafe.pm b/lib/YAML/PP/Schema/Failsafe.pm index 1dcce68f..1b9e5895 100644 --- a/lib/YAML/PP/Schema/Failsafe.pm +++ b/lib/YAML/PP/Schema/Failsafe.pm @@ -6,6 +6,28 @@ our $VERSION = '0.000'; # VERSION sub register { my ($self, %args) = @_; + my $schema = $args{schema}; + + $schema->add_resolver( + tag => 'tag:yaml.org,2002:str', + match => [ all => sub { $_[1]->{value} } ], + ); + $schema->add_sequence_resolver( + tag => 'tag:yaml.org,2002:seq', + on_data => sub { + my ($constructor, $ref, $list) = @_; + push @$$ref, @$list; + }, + ); + $schema->add_mapping_resolver( + tag => 'tag:yaml.org,2002:map', + on_data => sub { + my ($constructor, $ref, $list) = @_; + for (my $i = 0; $i < @$list; $i += 2) { + $$ref->{ $list->[ $i ] } = $list->[ $i + 1 ]; + } + }, + ); return; } diff --git a/lib/YAML/PP/Schema/Perl.pm b/lib/YAML/PP/Schema/Perl.pm index 5f0cc309..b8339d3d 100644 --- a/lib/YAML/PP/Schema/Perl.pm +++ b/lib/YAML/PP/Schema/Perl.pm @@ -157,7 +157,7 @@ sub register { tag => qr{^$perl_regex/code:.+}, match => [ all => $loadcode_dummy ], implicit => 0, - ) if $no_objects; + ); } # Glob @@ -207,6 +207,10 @@ sub register { my ($constructor, $event) = @_; return $self->construct_regex($event->{value}); }; + my $load_regex_dummy = sub { + my ($constructor, $event) = @_; + return $event->{value}; + }; my $load_regex_blessed = sub { my ($constructor, $event) = @_; my $class = $event->{tag}; @@ -225,10 +229,10 @@ sub register { implicit => 0, ); $schema->add_resolver( - tag => qr{^$perl_regex/regexp:$class_regex$}, - match => [ all => $load_regex ], + tag => qr{^$perl_regex/regexp:.*$}, + match => [ all => $load_regex_dummy ], implicit => 0, - ) if $no_objects; + ); my $load_sequence = sub { return [] }; my $load_sequence_blessed = sub { @@ -248,7 +252,7 @@ sub register { $schema->add_sequence_resolver( tag => qr{^$perl_regex/array:.+$}, on_create => $load_sequence, - ) if $no_objects; + );# if $no_objects; my $load_mapping = sub { return {} }; my $load_mapping_blessed = sub { @@ -268,7 +272,7 @@ sub register { $schema->add_mapping_resolver( tag => qr{^$perl_regex/hash:.+$}, on_create => $load_mapping, - ) if $no_objects; + ); # if $no_objects; # Ref my $load_ref = sub { @@ -305,7 +309,7 @@ sub register { my ($constructor, $ref, $list) = @_; $$$ref = $self->construct_ref($list); }, - ) if $no_objects; + ); # if $no_objects; # Scalar ref my $load_scalar_ref = sub { @@ -342,7 +346,7 @@ sub register { my ($constructor, $ref, $list) = @_; $$$ref = $self->construct_scalar($list); }, - ) if $no_objects; + ); # if $no_objects; $schema->add_representer( scalarref => 1, diff --git a/t/37.schema-catchall.t b/t/37.schema-catchall.t new file mode 100644 index 00000000..26e40b05 --- /dev/null +++ b/t/37.schema-catchall.t @@ -0,0 +1,58 @@ +#!/usr/bin/env perl +use strict; +use warnings; +use Test::More; +use FindBin '$Bin'; +use Data::Dumper; +use Scalar::Util (); +use YAML::PP; + +my $yp = YAML::PP->new( + schema => [qw/ JSON /], +); +my $catch = YAML::PP->new( + schema => [qw/ JSON Catchall /], +); + +my $yaml = <<'EOM'; +- !foo null +EOM + +my $data = eval { $yp->load_string($yaml) }; +my $err = $@; +like $err, qr{Unknown tag '!foo'. Use schema 'Catchall'}, "unknoen tags are fatal by default"; + +$data = $catch->load_string($yaml); +is $data->[0], 'null', "Catchall loads unknown tag as string"; + +$yaml = "! 023"; + +$data = $yp->load_string($yaml); +is $data, '023', "Tag '!' still works without catchall"; + +$data = $catch->load_string($yaml); +is $data, '023', "Tag '!' still works with catchall"; + +$yaml = <<'EOM'; +!foo +- a +EOM +$data = eval { $yp->load_string($yaml) }; +$err = $@; +like $err, qr{Unknown tag '!foo'. Use schema 'Catchall'}, "unknoen tags are fatal by default"; + +$data = $catch->load_string($yaml); +is $data->[0], 'a', "Catchall loads unknown tag on a sequence"; + +$yaml = <<'EOM'; +!foo +a: b +EOM +$data = eval { $yp->load_string($yaml) }; +$err = $@; +like $err, qr{Unknown tag '!foo'. Use schema 'Catchall'}, "unknoen tags are fatal by default"; + +$data = $catch->load_string($yaml); +is $data->{a}, 'b', "Catchall loads unknown tag on a mapping"; + +done_testing; diff --git a/t/45.binary.t b/t/45.binary.t index caed5696..3920fb59 100644 --- a/t/45.binary.t +++ b/t/45.binary.t @@ -8,7 +8,7 @@ use Encode; use Data::Dumper; my $yp_binary = YAML::PP->new( schema => [qw/ JSON Binary /] ); -my $yp = YAML::PP->new( schema => [qw/ JSON /] ); +my $yp = YAML::PP->new( schema => [qw/ JSON Catchall /] ); my $gif = "GIF89a\f\0\f\0\204\0\0\377\377\367\365\365\356\351\351\345fff" . "\0\0\0\347\347\347^^^\363\363\355\216\216\216\340\340\340\237\237\237" @@ -63,6 +63,7 @@ my @tests = ( [binary => [$gif, 'foo'],], ); + subtest roundtrip => sub { for my $item (@tests) { select undef, undef, undef, 0.1; diff --git a/t/lib/YAML/PP/Test.pm b/t/lib/YAML/PP/Test.pm index 1a9ad801..25ba3b69 100644 --- a/t/lib/YAML/PP/Test.pm +++ b/t/lib/YAML/PP/Test.pm @@ -487,7 +487,7 @@ sub compare_invalid_parse_events { sub load_json { my ($self, $testcase) = @_; - my $ypp = YAML::PP->new(boolean => 'JSON::PP', schema => [qw/ Core /]); + my $ypp = YAML::PP->new(boolean => 'JSON::PP', schema => [qw/ Core Catchall /]); my @docs = eval { $ypp->load_string($testcase->{in_yaml}) }; my $err = $@; @@ -554,7 +554,7 @@ sub dump_yaml { my ($self, $testcase) = @_; my $id = $testcase->{id}; - my $ypp = YAML::PP->new( boolean => 'JSON::PP', duplicate_keys => 1 ); + my $ypp = YAML::PP->new( boolean => 'JSON::PP', duplicate_keys => 1, schema => [qw/ + Catchall /] ); my @docs = eval { $ypp->load_string($testcase->{in_yaml}) }; my $err = $@; my $result = {};