diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 1ca4a6e9..582b63b5 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -20,6 +20,7 @@ jobs: with: ruby-version: ${{ matrix.ruby }} bundler-cache: true + - run: flex --help - run: bundle install - run: bundle exec rspec test-windows: @@ -34,6 +35,8 @@ jobs: with: ruby-version: ${{ matrix.ruby }} bundler-cache: true + - run: choco install winflexbison + - run: win_flex --help - run: bundle install - run: bundle exec rspec check-misc: @@ -65,6 +68,8 @@ jobs: fail-fast: false matrix: ruby: ['head'] + env: + INSTALL_STEEP: 'true' steps: - uses: actions/checkout@v4 - uses: ruby/setup-ruby@v1 diff --git a/Gemfile b/Gemfile index 52479896..caa7e49f 100644 --- a/Gemfile +++ b/Gemfile @@ -5,8 +5,13 @@ gemspec gem "pry" gem "racc", "1.7.3" gem "rake" -gem "rbs", require: false gem "rspec" gem "simplecov", require: false gem "stackprof", platforms: [:ruby] # stackprof doesn't support Windows -gem "steep", require: false + +# Recent steep requires Ruby >= 3.0.0. +# Then skip install on some CI jobs. +if !ENV['GITHUB_ACTION'] || ENV['INSTALL_STEEP'] == 'true' + gem "rbs", "3.3.0", require: false + gem "steep", "1.6.0", require: false +end diff --git a/Steepfile b/Steepfile index d3c4b401..1847c602 100644 --- a/Steepfile +++ b/Steepfile @@ -6,8 +6,12 @@ target :lib do check "lib/lrama/bitmap.rb" check "lib/lrama/digraph.rb" + check "lib/lrama/grammar/code.rb" + check "lib/lrama/grammar/code/printer_code.rb" check "lib/lrama/grammar/counter.rb" check "lib/lrama/grammar/percent_code.rb" + check "lib/lrama/grammar/precedence.rb" + check "lib/lrama/grammar/printer.rb" check "lib/lrama/grammar/reference.rb" check "lib/lrama/grammar/rule_builder.rb" check "lib/lrama/lexer/token/char.rb" diff --git a/lib/lrama/grammar.rb b/lib/lrama/grammar.rb index b58b4901..9664caeb 100644 --- a/lib/lrama/grammar.rb +++ b/lib/lrama/grammar.rb @@ -373,10 +373,10 @@ def append_special_symbols # def normalize_rules # 1. Add $accept rule to the top of rules - accept = find_symbol_by_s_value!("$accept") - eof = find_symbol_by_number!(0) + accept = @accept_symbol + eof = @eof_symbol lineno = @rule_builders.first ? @rule_builders.first.line : 0 - @rules << Rule.new(id: @rule_counter.increment, lhs: accept, _rhs: [@rule_builders.first.lhs, eof], token_code: nil, lineno: lineno) + @rules << Rule.new(id: @rule_counter.increment, _lhs: accept.id, _rhs: [@rule_builders.first.lhs, eof.id], token_code: nil, lineno: lineno) setup_rules @@ -387,12 +387,12 @@ def normalize_rules end builder.rules.each do |rule| - add_nterm(id: rule.lhs) + add_nterm(id: rule._lhs) @rules << rule end builder.midrule_action_rules.each do |rule| - add_nterm(id: rule.lhs) + add_nterm(id: rule._lhs) end end end @@ -405,8 +405,6 @@ def collect_symbols add_term(id: s) when Lrama::Lexer::Token # skip - when Symbol - # skip else raise "Unknown class: #{s}" end @@ -488,7 +486,7 @@ def fill_symbol_number def replace_token_with_symbol @rules.each do |rule| - rule.lhs = token_to_symbol(rule.lhs) + rule.lhs = token_to_symbol(rule._lhs) if rule._lhs rule.rhs = rule._rhs.map do |t| token_to_symbol(t) @@ -500,8 +498,6 @@ def token_to_symbol(token) case token when Lrama::Lexer::Token find_symbol_by_id!(token) - when Symbol - token else raise "Unknown class: #{token}" end diff --git a/lib/lrama/grammar/parameterizing_rules/builder/list.rb b/lib/lrama/grammar/parameterizing_rules/builder/list.rb index 44ecb12f..f8141604 100644 --- a/lib/lrama/grammar/parameterizing_rules/builder/list.rb +++ b/lib/lrama/grammar/parameterizing_rules/builder/list.rb @@ -8,9 +8,9 @@ def build rules = [] list_token = Lrama::Lexer::Token::Ident.new(s_value: "list_#{@token.s_value}") - rules << Rule.new(id: @rule_counter.increment, lhs: @lhs, _rhs: [list_token], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line) - rules << Rule.new(id: @rule_counter.increment, lhs: list_token, _rhs: [], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line) - rules << Rule.new(id: @rule_counter.increment, lhs: list_token, _rhs: [list_token, @token], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line) + rules << Rule.new(id: @rule_counter.increment, _lhs: @lhs, _rhs: [list_token], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line) + rules << Rule.new(id: @rule_counter.increment, _lhs: list_token, _rhs: [], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line) + rules << Rule.new(id: @rule_counter.increment, _lhs: list_token, _rhs: [list_token, @token], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line) rules end end diff --git a/lib/lrama/grammar/parameterizing_rules/builder/nonempty_list.rb b/lib/lrama/grammar/parameterizing_rules/builder/nonempty_list.rb index cbb99af8..142d6c15 100644 --- a/lib/lrama/grammar/parameterizing_rules/builder/nonempty_list.rb +++ b/lib/lrama/grammar/parameterizing_rules/builder/nonempty_list.rb @@ -8,9 +8,9 @@ def build rules = [] nonempty_list_token = Lrama::Lexer::Token::Ident.new(s_value: "nonempty_list_#{@token.s_value}") - rules << Rule.new(id: @rule_counter.increment, lhs: @lhs, _rhs: [nonempty_list_token], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line) - rules << Rule.new(id: @rule_counter.increment, lhs: nonempty_list_token, _rhs: [@token], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line) - rules << Rule.new(id: @rule_counter.increment, lhs: nonempty_list_token, _rhs: [nonempty_list_token, @token], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line) + rules << Rule.new(id: @rule_counter.increment, _lhs: @lhs, _rhs: [nonempty_list_token], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line) + rules << Rule.new(id: @rule_counter.increment, _lhs: nonempty_list_token, _rhs: [@token], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line) + rules << Rule.new(id: @rule_counter.increment, _lhs: nonempty_list_token, _rhs: [nonempty_list_token, @token], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line) rules end end diff --git a/lib/lrama/grammar/parameterizing_rules/builder/option.rb b/lib/lrama/grammar/parameterizing_rules/builder/option.rb index 707e6e8c..f751609e 100644 --- a/lib/lrama/grammar/parameterizing_rules/builder/option.rb +++ b/lib/lrama/grammar/parameterizing_rules/builder/option.rb @@ -8,9 +8,9 @@ def build rules = [] option_token = Lrama::Lexer::Token::Ident.new(s_value: "option_#{@token.s_value}") - rules << Rule.new(id: @rule_counter.increment, lhs: @lhs, _rhs: [option_token], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line) - rules << Rule.new(id: @rule_counter.increment, lhs: option_token, _rhs: [], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line) - rules << Rule.new(id: @rule_counter.increment, lhs: option_token, _rhs: [@token], token_code: @ser_code, precedence_sym: @precedence_sym, lineno: @line) + rules << Rule.new(id: @rule_counter.increment, _lhs: @lhs, _rhs: [option_token], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line) + rules << Rule.new(id: @rule_counter.increment, _lhs: option_token, _rhs: [], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line) + rules << Rule.new(id: @rule_counter.increment, _lhs: option_token, _rhs: [@token], token_code: @ser_code, precedence_sym: @precedence_sym, lineno: @line) rules end end diff --git a/lib/lrama/grammar/parameterizing_rules/builder/separated_list.rb b/lib/lrama/grammar/parameterizing_rules/builder/separated_list.rb index fe4e293f..95f81564 100644 --- a/lib/lrama/grammar/parameterizing_rules/builder/separated_list.rb +++ b/lib/lrama/grammar/parameterizing_rules/builder/separated_list.rb @@ -15,10 +15,10 @@ def build rules = [] separated_list_token = Lrama::Lexer::Token::Ident.new(s_value: "separated_list_#{@token.s_value}") - rules << Rule.new(id: @rule_counter.increment, lhs: @lhs, _rhs: [separated_list_token], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line) - rules << Rule.new(id: @rule_counter.increment, lhs: separated_list_token, _rhs: [], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line) - rules << Rule.new(id: @rule_counter.increment, lhs: separated_list_token, _rhs: [@token], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line) - rules << Rule.new(id: @rule_counter.increment, lhs: separated_list_token, _rhs: [separated_list_token, @separator, @token], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line) + rules << Rule.new(id: @rule_counter.increment, _lhs: @lhs, _rhs: [separated_list_token], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line) + rules << Rule.new(id: @rule_counter.increment, _lhs: separated_list_token, _rhs: [], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line) + rules << Rule.new(id: @rule_counter.increment, _lhs: separated_list_token, _rhs: [@token], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line) + rules << Rule.new(id: @rule_counter.increment, _lhs: separated_list_token, _rhs: [separated_list_token, @separator, @token], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line) rules end end diff --git a/lib/lrama/grammar/parameterizing_rules/builder/separated_nonempty_list.rb b/lib/lrama/grammar/parameterizing_rules/builder/separated_nonempty_list.rb index 785e75d2..64662180 100644 --- a/lib/lrama/grammar/parameterizing_rules/builder/separated_nonempty_list.rb +++ b/lib/lrama/grammar/parameterizing_rules/builder/separated_nonempty_list.rb @@ -15,9 +15,9 @@ def build rules = [] separated_list_token = Lrama::Lexer::Token::Ident.new(s_value: "separated_nonempty_list_#{@token.s_value}") - rules << Rule.new(id: @rule_counter.increment, lhs: @lhs, _rhs: [separated_list_token], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line) - rules << Rule.new(id: @rule_counter.increment, lhs: separated_list_token, _rhs: [@token], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line) - rules << Rule.new(id: @rule_counter.increment, lhs: separated_list_token, _rhs: [separated_list_token, @separator, @token], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line) + rules << Rule.new(id: @rule_counter.increment, _lhs: @lhs, _rhs: [separated_list_token], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line) + rules << Rule.new(id: @rule_counter.increment, _lhs: separated_list_token, _rhs: [@token], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line) + rules << Rule.new(id: @rule_counter.increment, _lhs: separated_list_token, _rhs: [separated_list_token, @separator, @token], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line) rules end end diff --git a/lib/lrama/grammar/rule.rb b/lib/lrama/grammar/rule.rb index a825d544..13b44c00 100644 --- a/lib/lrama/grammar/rule.rb +++ b/lib/lrama/grammar/rule.rb @@ -1,7 +1,7 @@ module Lrama class Grammar # _rhs holds original RHS element. Use rhs to refer to Symbol. - class Rule < Struct.new(:id, :lhs, :_rhs, :rhs, :token_code, :position_in_original_rule_rhs, :nullable, :precedence_sym, :lineno, keyword_init: true) + class Rule < Struct.new(:id, :_lhs, :lhs, :_rhs, :rhs, :token_code, :position_in_original_rule_rhs, :nullable, :precedence_sym, :lineno, keyword_init: true) attr_accessor :original_rule def ==(other) diff --git a/lib/lrama/grammar/rule_builder.rb b/lib/lrama/grammar/rule_builder.rb index 58078d72..80cd0c2c 100644 --- a/lib/lrama/grammar/rule_builder.rb +++ b/lib/lrama/grammar/rule_builder.rb @@ -87,7 +87,7 @@ def build_rules @midrule_action_rules = [] else rule = Rule.new( - id: @rule_counter.increment, lhs: lhs, _rhs: tokens, token_code: user_code, + id: @rule_counter.increment, _lhs: lhs, _rhs: tokens, token_code: user_code, position_in_original_rule_rhs: @position_in_original_rule_rhs, precedence_sym: precedence_sym, lineno: line ) @rules = [rule] diff --git a/lib/lrama/lexer.rb b/lib/lrama/lexer.rb index ccddd7e0..18d702a4 100644 --- a/lib/lrama/lexer.rb +++ b/lib/lrama/lexer.rb @@ -71,8 +71,8 @@ def lex_token # noop when @scanner.scan(/\/\*/) lex_comment - when @scanner.scan(/\/\/.*?\n/) - newline + when @scanner.scan(/\/\/.*(?\n)?/) + newline if @scanner[:newline] else break end @@ -141,7 +141,11 @@ def lex_c_code when @scanner.scan(/'.*?'/) code += %Q(#{@scanner.matched}) else - code += @scanner.getch + if @scanner.scan(/[^\"'\{\}\n#{@end_symbol}]+/) + code += @scanner.matched + else + code += @scanner.getch + end end end raise ParseError, "Unexpected code: #{code}." diff --git a/rbs_collection.lock.yaml b/rbs_collection.lock.yaml index f0b1c203..633c317a 100644 --- a/rbs_collection.lock.yaml +++ b/rbs_collection.lock.yaml @@ -2,7 +2,7 @@ sources: - type: git name: ruby/gem_rbs_collection - revision: 2de2d4535caba275f3b8533684aab110d921f553 + revision: 25286c51a19927f28623aee3cd36655f902399ba remote: https://github.com/ruby/gem_rbs_collection.git repo_dir: gems path: ".gem_rbs_collection" @@ -15,6 +15,10 @@ gems: version: '0' source: type: stdlib +- name: forwardable + version: '0' + source: + type: stdlib - name: rake version: '13.0' source: diff --git a/rbs_collection.yaml b/rbs_collection.yaml index 248712f2..62644a93 100644 --- a/rbs_collection.yaml +++ b/rbs_collection.yaml @@ -20,3 +20,4 @@ gems: # It's unnecessary if you don't use rbs as a library. - name: rbs ignore: true + - name: forwardable diff --git a/sig/lrama/grammar/code.rbs b/sig/lrama/grammar/code.rbs new file mode 100644 index 00000000..0a8ba403 --- /dev/null +++ b/sig/lrama/grammar/code.rbs @@ -0,0 +1,24 @@ +module Lrama + class Grammar + class Code + extend Forwardable + + attr_accessor type: untyped + attr_accessor token_code: untyped + + # delegated + def s_value: -> String + def line: -> Integer + def column: -> untyped + def references: -> untyped + + def initialize: (?type: untyped, ?token_code: untyped) -> void + + def translated_code: () -> String + + private + + def reference_to_c: (untyped ref) -> untyped + end + end +end \ No newline at end of file diff --git a/sig/lrama/grammar/code/printer_code.rbs b/sig/lrama/grammar/code/printer_code.rbs new file mode 100644 index 00000000..d067b3f0 --- /dev/null +++ b/sig/lrama/grammar/code/printer_code.rbs @@ -0,0 +1,15 @@ +module Lrama + class Grammar + class Code + class PrinterCode < Code + @tag: untyped + def initialize: (?type: untyped, ?token_code: untyped, ?tag: untyped) -> void + + private + + # ref: Lrama::Grammar::Code.token_code.references + def reference_to_c: (untyped ref) -> untyped + end + end + end +end diff --git a/sig/lrama/grammar/precedence.rbs b/sig/lrama/grammar/precedence.rbs new file mode 100644 index 00000000..ccb5873c --- /dev/null +++ b/sig/lrama/grammar/precedence.rbs @@ -0,0 +1,11 @@ +module Lrama + class Grammar + class Precedence + include Comparable + attr_accessor type: Symbol + attr_accessor precedence: Integer + + def <=>: (Precedence other) -> Integer + end + end +end diff --git a/sig/lrama/grammar/printer.rbs b/sig/lrama/grammar/printer.rbs new file mode 100644 index 00000000..947273dc --- /dev/null +++ b/sig/lrama/grammar/printer.rbs @@ -0,0 +1,11 @@ +module Lrama + class Grammar + class Printer + attr_accessor ident_or_tags: Array[Lexer::Token::Ident|Lexer::Token::Tag] + attr_accessor token_code: Grammar::Code + attr_accessor lineno: Integer + + def translated_code: (Lexer::Token member) -> String + end + end +end diff --git a/sig/lrama/grammar/rule.rbs b/sig/lrama/grammar/rule.rbs index 02edc7a2..8de89c19 100644 --- a/sig/lrama/grammar/rule.rbs +++ b/sig/lrama/grammar/rule.rbs @@ -4,7 +4,7 @@ module Lrama attr_accessor original_rule: Rule def initialize: ( - ?id: untyped, ?lhs: untyped, ?_rhs: untyped, ?rhs: untyped, + ?id: untyped, ?_lhs: untyped, ?lhs: untyped, ?_rhs: untyped, ?rhs: untyped, ?token_code: untyped, ?position_in_original_rule_rhs: untyped, ?nullable: untyped, ?precedence_sym: untyped, ?lineno: untyped ) -> void diff --git a/sig/lrama/grammar/rule_builder.rbs b/sig/lrama/grammar/rule_builder.rbs index ac9bed92..b0a5b4e9 100644 --- a/sig/lrama/grammar/rule_builder.rbs +++ b/sig/lrama/grammar/rule_builder.rbs @@ -7,7 +7,16 @@ module Lrama attr_reader user_code: Lexer::Token::UserCode? attr_reader precedence_sym: Lexer::Token? + @rule_counter: Counter + @midrule_action_counter: Counter + @position_in_original_rule_rhs: Integer? + @skip_preprocess_references: bool @user_code: Lexer::Token::UserCode? + @rule_builders_for_derived_rules: Array[RuleBuilder] + @rules: Array[Rule] + @replaced_rhs: Array[Lexer::Token] + @parameterizing_rules: Array[Rule] + @midrule_action_rules: Array[Rule] def initialize: (Counter rule_counter, Counter midrule_action_counter, ?Integer position_in_original_rule_rhs, ?skip_preprocess_references: bool) -> void def add_rhs: (Lexer::Token rhs) -> void @@ -15,18 +24,18 @@ module Lrama def precedence_sym=: (Lexer::Token user_code) -> void def complete_input: () -> void def setup_rules: () -> void - def preprocess_references: () -> void + def parameterizing_rules: () -> Array[Rule] def midrule_action_rules: () -> Array[Rule] - def rhs_with_new_tokens: () -> Array[Lexer::Token] + def rules: () -> Array[Rule] private def freeze_rhs: () -> void + def preprocess_references: () -> void + def build_rules: () -> void def process_rhs: () -> void def numberize_references: () -> void - def setup_references: () -> void def flush_user_code: () -> void - def build_rules: () -> void end end end diff --git a/spec/fixtures/integration/calculator.l b/spec/fixtures/integration/calculator.l new file mode 100644 index 00000000..4e0eb0dc --- /dev/null +++ b/spec/fixtures/integration/calculator.l @@ -0,0 +1,40 @@ +%option noinput nounput noyywrap never-interactive bison-bridge bison-locations + +%{ + +#include +#include +#include "calculator.h" + +%} + +NUMBER [0-9]+ + +%% + +{NUMBER} { + ((void) yylloc); + yylval->val = atoi(yytext); + return NUM; +} + +[+\-\*\/\(\)] { + return yytext[0]; +} + +[\n|\r\n] { + return(YYEOF); +} + +[[:space:]] {} + +<> { + return(YYEOF); +} + +. { + fprintf(stderr, "Illegal character '%s'\n", yytext); + return(YYEOF); +} + +%% diff --git a/spec/fixtures/integration/calculator.y b/spec/fixtures/integration/calculator.y new file mode 100644 index 00000000..64e43747 --- /dev/null +++ b/spec/fixtures/integration/calculator.y @@ -0,0 +1,47 @@ +%{ + +#include +#include "calculator.h" +#include "calculator-lexer.h" + +static int yyerror(YYLTYPE *loc, const char *str); + +%} + +%union { + int val; +} + +%token NUM +%type expr +%left '+' '-' +%left '*' '/' + +%% + +program : /* empty */ + | expr { printf("=> %d", $1); } + ; +expr : NUM + | expr '+' expr { $$ = $1 + $3; } + | expr '-' expr { $$ = $1 - $3; } + | expr '*' expr { $$ = $1 * $3; } + | expr '/' expr { $$ = $1 / $3; } + | '(' expr ')' { $$ = $2; } + ; + +%% + +static int yyerror(YYLTYPE *loc, const char *str) { + fprintf(stderr, "parse error: %s\n", str); + return 0; +} + +int main(int argc, char *argv[]) { + if (argc == 2) { + yy_scan_string(argv[1]); + } + + yyparse(); + return 0; +} diff --git a/spec/fixtures/integration/error_recovery.l b/spec/fixtures/integration/error_recovery.l new file mode 100644 index 00000000..35050f87 --- /dev/null +++ b/spec/fixtures/integration/error_recovery.l @@ -0,0 +1,40 @@ +%option noinput nounput noyywrap never-interactive bison-bridge bison-locations + +%{ + +#include +#include +#include "error_recovery.h" + +%} + +NUMBER [0-9]+ + +%% + +{NUMBER} { + ((void) yylloc); + yylval->val = atoi(yytext); + return NUM; +} + +[+\-\*\/\(\)] { + return yytext[0]; +} + +[\n|\r\n] { + return(YYEOF); +} + +[[:space:]] {} + +<> { + return(YYEOF); +} + +. { + fprintf(stderr, "Illegal character '%s'\n", yytext); + return(YYEOF); +} + +%% diff --git a/spec/fixtures/integration/error_recovery.y b/spec/fixtures/integration/error_recovery.y new file mode 100644 index 00000000..e44eb751 --- /dev/null +++ b/spec/fixtures/integration/error_recovery.y @@ -0,0 +1,51 @@ +%{ + +#include +#include "error_recovery.h" +#include "error_recovery-lexer.h" + +static int yyerror(YYLTYPE *loc, const char *str); + +%} + +%union { + int val; +} + +%token NUM +%type expr +%left '+' '-' +%left '*' '/' + +%error-token { + $$ = 100; +} NUM + +%% + +program : /* empty */ + | expr { printf("=> %d", $1); } + ; +expr : NUM + | expr '+' expr { $$ = $1 + $3; } + | expr '-' expr { $$ = $1 - $3; } + | expr '*' expr { $$ = $1 * $3; } + | expr '/' expr { $$ = $1 / $3; } + | '(' expr ')' { $$ = $2; } + ; + +%% + +static int yyerror(YYLTYPE *loc, const char *str) { + fprintf(stderr, "parse error: %s\n", str); + return 0; +} + +int main(int argc, char *argv[]) { + if (argc == 2) { + yy_scan_string(argv[1]); + } + + yyparse(); + return 0; +} diff --git a/spec/fixtures/integration/named_references.l b/spec/fixtures/integration/named_references.l new file mode 100644 index 00000000..8e10f31e --- /dev/null +++ b/spec/fixtures/integration/named_references.l @@ -0,0 +1,47 @@ +%option noinput nounput noyywrap never-interactive yylineno bison-bridge bison-locations + +%{ + +#include +#include +#include "named_references.h" + +int yycolumn = 0; + +#define YY_USER_ACTION \ + yylloc->first_line = yylloc->last_line = yylineno; \ + yylloc->first_column = yycolumn; \ + yylloc->last_column = yycolumn + yyleng; \ + yycolumn += yyleng; \ + +%} + +NUMBER [0-9]+ + +%% + +{NUMBER} { + yylval->val = atoi(yytext); + return NUM; +} + +[+\-\*\/\(\)] { + return yytext[0]; +} + +[\n|\r\n] { + return(YYEOF); +} + +[[:space:]] {} + +<> { + return(YYEOF); +} + +. { + fprintf(stderr, "Illegal character '%s'\n", yytext); + return(YYEOF); +} + +%% diff --git a/spec/fixtures/integration/named_references.y b/spec/fixtures/integration/named_references.y new file mode 100644 index 00000000..e4accac7 --- /dev/null +++ b/spec/fixtures/integration/named_references.y @@ -0,0 +1,86 @@ +%{ +#include + +typedef struct code_location { + int first_line; + int first_column; + int last_line; + int last_column; +} code_location_t; + +#define YYLTYPE code_location_t +#define YYLLOC_DEFAULT(Current, Rhs, N) \ + do \ + if (N) \ + { \ + (Current).first_line = YYRHSLOC(Rhs, 1).first_line; \ + (Current).first_column = YYRHSLOC(Rhs, 1).first_column; \ + (Current).last_line = YYRHSLOC(Rhs, N).last_line; \ + (Current).last_column = YYRHSLOC(Rhs, N).last_column; \ + } \ + else \ + { \ + (Current).first_line = YYRHSLOC(Rhs, 0).last_line; \ + (Current).first_column = YYRHSLOC(Rhs, 0).last_column; \ + (Current).last_line = YYRHSLOC(Rhs, 0).last_line; \ + (Current).last_column = YYRHSLOC(Rhs, 0).last_column; \ + } \ + while (0) + +#include "named_references.h" +#include "named_references-lexer.h" + +static void print_location(YYLTYPE *loc); +static int yyerror(YYLTYPE *loc, const char *str); + +%} + +%union { + int val; +} +%token NUM +%type expr + +%% + +line: expr + { + printf("line (%d): ", @expr.first_line); + print_location(&@expr); + + printf("=> %d", $expr); + } + ; + +expr[result]: NUM + | expr[ex-left] expr[ex.right] '+' + { + printf("expr[ex-left] (%d): ", @[ex-left].first_line); + print_location(&@[ex-left]); + + printf("expr[ex.right] (%d): ", @[ex.right].first_line); + print_location(&@[ex.right]); + + $result = $[ex-left] + $[ex.right]; + } + ; + +%% + +static void print_location(YYLTYPE *loc) { + printf("%d.%d-%d.%d. ", loc->first_line, loc->first_column, loc->last_line, loc->last_column); +} + +static int yyerror(YYLTYPE *loc, const char *str) { + fprintf(stderr, "parse error: %s\\n", str); + return 0; +} + +int main(int argc, char *argv[]) { + if (argc == 2) { + yy_scan_string(argv[1]); + } + + yyparse(); + return 0; +} diff --git a/spec/fixtures/integration/params.l b/spec/fixtures/integration/params.l new file mode 100644 index 00000000..5fc6c60f --- /dev/null +++ b/spec/fixtures/integration/params.l @@ -0,0 +1,49 @@ +%option noinput nounput noyywrap never-interactive yylineno + +%{ + +#include +#include +#include "params.h" + +#define YY_DECL int yylex (YYSTYPE *yylval, YYLTYPE *yylloc, int parser_params) + +int yycolumn = 0; + +#define YY_USER_ACTION \ + yylloc->first_line = yylloc->last_line = yylineno; \ + yylloc->first_column = yycolumn; \ + yylloc->last_column = yycolumn + yyleng; \ + yycolumn += yyleng; \ + +%} + +NUMBER [0-9]+ + +%% + +{NUMBER} { + yylval->val = atoi(yytext); + return NUM; +} + +[+\-\*\/\(\)] { + return yytext[0]; +} + +[\n|\r\n] { + return(YYEOF); +} + +[[:space:]] {} + +<> { + return(YYEOF); +} + +. { + fprintf(stderr, "Illegal character '%s'\n", yytext); + return(YYEOF); +} + +%% diff --git a/spec/fixtures/integration/params.y b/spec/fixtures/integration/params.y new file mode 100644 index 00000000..fd4d4d5a --- /dev/null +++ b/spec/fixtures/integration/params.y @@ -0,0 +1,56 @@ +%{ +#define YYDEBUG 1 +#include + +#define YY_LOCATION_PRINT(File, loc, p) ((void) 0) + +#define YY_DECL yylex (YYSTYPE *lval, YYLTYPE *yylloc, int parser_params) + +#include "params.h" +#include "params-lexer.h" + +extern int yylex(YYSTYPE *lval, YYLTYPE *yylloc, int parser_params); +static int yyerror(YYLTYPE *loc, int parse_param, const char *str); + +%} + +%lex-param {int parse_param} +%parse-param {int parse_param} + +%union { + int val; +} + +%token NUM +%type expr +%left '+' '-' +%left '*' '/' + +%% + +program : /* empty */ + | expr { printf("=> %d", $1); } + ; +expr : NUM + | expr '+' expr { $$ = $1 + $3; } + | expr '-' expr { $$ = $1 - $3; } + | expr '*' expr { $$ = $1 * $3; } + | expr '/' expr { $$ = $1 / $3; } + | '(' expr ')' { $$ = $2; } + ; + +%% + +static int yyerror(YYLTYPE *loc, int parse_param, const char *str) { + fprintf(stderr, "parse error: %s\\n", str); + return 0; +} + +int main(int argc, char *argv[]) { + if (argc == 2) { + yy_scan_string(argv[1]); + } + + yyparse(0); + return 0; +} diff --git a/spec/fixtures/integration/printers.l b/spec/fixtures/integration/printers.l new file mode 100644 index 00000000..c9ad7dc2 --- /dev/null +++ b/spec/fixtures/integration/printers.l @@ -0,0 +1,47 @@ +%option noinput nounput noyywrap never-interactive yylineno bison-bridge bison-locations + +%{ + +#include +#include +#include "printers.h" + +int yycolumn = 0; + +#define YY_USER_ACTION \ + yylloc->first_line = yylloc->last_line = yylineno; \ + yylloc->first_column = yycolumn; \ + yylloc->last_column = yycolumn + yyleng; \ + yycolumn += yyleng; \ + +%} + +NUMBER [0-9]+ + +%% + +{NUMBER} { + yylval->val1 = atoi(yytext); + return NUM; +} + +[+\-\*\/\(\)] { + return yytext[0]; +} + +[\n|\r\n] { + return(YYEOF); +} + +[[:space:]] {} + +<> { + return(YYEOF); +} + +. { + fprintf(stderr, "Illegal character '%s'\n", yytext); + return(YYEOF); +} + +%% diff --git a/spec/fixtures/integration/printers.y b/spec/fixtures/integration/printers.y new file mode 100644 index 00000000..5b57bd3e --- /dev/null +++ b/spec/fixtures/integration/printers.y @@ -0,0 +1,69 @@ +%{ + +#define YYDEBUG 1 + +#include +#include "printers.h" +#include "printers-lexer.h" + +static int yyerror(YYLTYPE *loc, const char *str); + +%} + +%union { + int val1; + int val2; + int val3; +} + +%token NUM +%type add +%type expr +%left '+' '-' +%left '*' '/' + +%printer { + printf("val1: %d\n", $$); +} // printer for TAG + +%printer { + printf("val2: %d\n", $$); +} + +%printer { + printf("expr: %d\n", $$); +} expr // printer for symbol + +%% + +program : /* empty */ + | expr { printf("=> %d", $1); } + ; + +add : expr '+' expr { $$ = $1 + $3; } + +expr : NUM + | add + | expr '-' expr { $$ = $1 - $3; } + | expr '*' expr { $$ = $1 * $3; } + | expr '/' expr { $$ = $1 / $3; } + | '(' expr ')' { $$ = $2; } + ; + +%% + +static int yyerror(YYLTYPE *loc, const char *str) { + fprintf(stderr, "parse error: %s\n", str); + return 0; +} + +int main(int argc, char *argv[]) { + yydebug = 1; + + if (argc == 2) { + yy_scan_string(argv[1]); + } + + yyparse(); + return 0; +} diff --git a/spec/fixtures/integration/prologue_epilogue_optional.l b/spec/fixtures/integration/prologue_epilogue_optional.l new file mode 100644 index 00000000..fc7291bb --- /dev/null +++ b/spec/fixtures/integration/prologue_epilogue_optional.l @@ -0,0 +1,40 @@ +%option noinput nounput noyywrap never-interactive yylineno bison-bridge bison-locations + +%{ + +#include +#include +#include "prologue_epilogue_optional.h" + +%} + +NUMBER [0-9]+ + +%% + +{NUMBER} { + ((void) yylloc); + yylval->val = atoi(yytext); + return 1; +} + +[+\-\*\/\(\)] { + return yytext[0]; +} + +[\n|\r\n] { + return(YYEOF); +} + +[[:space:]] {} + +<> { + return(YYEOF); +} + +. { + fprintf(stderr, "Illegal character '%s'\n", yytext); + return(YYEOF); +} + +%% diff --git a/spec/fixtures/integration/prologue_epilogue_optional.y b/spec/fixtures/integration/prologue_epilogue_optional.y new file mode 100644 index 00000000..651b4eee --- /dev/null +++ b/spec/fixtures/integration/prologue_epilogue_optional.y @@ -0,0 +1,34 @@ +%{ + +#include +#include "prologue_epilogue_optional.h" +#include "prologue_epilogue_optional-lexer.h" + +static int yyerror(YYLTYPE *loc, const char *str); + +%} + +%union { + int val; +} + +%% + +program : /* empty */ + ; + +%% + +static int yyerror(YYLTYPE *loc, const char *str) { + fprintf(stderr, "parse error: %s\n", str); + return 0; +} + +int main(int argc, char *argv[]) { + if (argc == 2) { + yy_scan_string(argv[1]); + } + + yyparse(); + return 0; +} diff --git a/spec/lrama/grammar/rule_builder_spec.rb b/spec/lrama/grammar/rule_builder_spec.rb index 3634afa5..0f184eca 100644 --- a/spec/lrama/grammar/rule_builder_spec.rb +++ b/spec/lrama/grammar/rule_builder_spec.rb @@ -327,10 +327,10 @@ rules = rule_builder.midrule_action_rules expect(rules.count).to eq 2 - expect(rules[0].lhs.s_value).to eq '@1' + expect(rules[0]._lhs.s_value).to eq '@1' expect(rules[0].token_code.s_value).to eq '$1' expect(rules[0].original_rule).to eq rule - expect(rules[1].lhs.s_value).to eq '$@2' + expect(rules[1]._lhs.s_value).to eq '$@2' expect(rules[1].token_code.s_value).to eq '$2 + $3' expect(rules[1].original_rule).to eq rule end diff --git a/spec/lrama/integration_spec.rb b/spec/lrama/integration_spec.rb index aaf20320..5f0c7b45 100644 --- a/spec/lrama/integration_spec.rb +++ b/spec/lrama/integration_spec.rb @@ -4,89 +4,41 @@ RSpec.describe "integration" do module IntegrationHelper - def generate_object(grammar_file_path, c_path, obj_path, command_args: []) - Lrama::Command.new.run(%W[-d -o #{c_path}] + command_args + %W[#{grammar_file_path}]) - `gcc -Wall #{c_path} -o #{obj_path}` - expect($?.success?).to be true + def exec_command(command) + `#{command}` + raise "#{command} failed." unless $?.success? end - def test_grammar(grammar, expected, command_args: []) - Tempfile.create(%w[test .y]) do |f| - f << grammar - f.close - c_path = File.dirname(f.path) + "/test.c" - obj_path = File.dirname(f.path) + "/test" - - generate_object(f.path, c_path, obj_path, command_args: command_args) + def test_parser(parser_name, input, expected, lrama_command_args: [], debug: false) + tmpdir = Dir.tmpdir + grammar_file_path = fixture_path("integration/#{parser_name}.y") + lexer_file_path = fixture_path("integration/#{parser_name}.l") + parser_c_path = tmpdir + "/#{parser_name}.c" + parser_h_path = tmpdir + "/#{parser_name}.h" + lexer_c_path = tmpdir + "/#{parser_name}-lexer.c" + lexer_h_path = tmpdir + "/#{parser_name}-lexer.h" + obj_path = tmpdir + "/#{parser_name}" - result = Open3.popen3(obj_path) do |stdin, stdout, stderr, wait_thr| - stdout.read - end - - expect(result).to eq(expected) - end - end + flex = windows? ? "win_flex" : "flex" - def generate_lexer_body(input) - input.each_with_index.map do |(token, union, semantic_value), i| - str = "" - str << " case #{i}:\n" - str << " yylval->#{union} = #{semantic_value};\n" if union && semantic_value - str << " return #{token};\n" - end.join("\n") - end + Lrama::Command.new.run(%W[-H#{parser_h_path} -o#{parser_c_path}] + lrama_command_args + %W[#{grammar_file_path}]) + exec_command("#{flex} --header-file=#{lexer_h_path} -o #{lexer_c_path} #{lexer_file_path}") + exec_command("gcc -Wall -I#{tmpdir} #{parser_c_path} #{lexer_c_path} -o #{obj_path}") - def test_rules(rules, input, expected, command_args: [], debug: false) - cases = generate_lexer_body(input) - yydebug_macro = '' - yydebug = '' + out = err = nil - if debug - yydebug_macro = '#define YYDEBUG 1' - yydebug = 'yydebug = 1;' - command_args << "--report=all" + Open3.popen3(obj_path, input) do |stdin, stdout, stderr, wait_thr| + out = stdout.read + err = stderr.read end - grammar = <<~Grammar -%{ -#{yydebug_macro} -#include - -#include "test.h" - -static int yylex(YYSTYPE *val, YYLTYPE *loc); -static int yyerror(YYLTYPE *loc, const char *str); -%} - -#{rules} - -// #{input} - -int c = 0; - -static int yylex(YYSTYPE *yylval, YYLTYPE *loc) { - switch (c++) { -#{cases} - default: - // End of Input - return -1; - } -} - -static int yyerror(YYLTYPE *loc, const char *str) { - fprintf(stderr, "parse error: %s\\n", str); - return 0; -} - -int main() { - #{yydebug} - yyparse(); - return 0; -} - - Grammar + STDERR.puts err if debug && !err.empty? + expect(out).to eq(expected) + end - test_grammar(grammar, expected, command_args: command_args) + def generate_object(grammar_file_path, c_path, obj_path, command_args: []) + Lrama::Command.new.run(%W[-d -o #{c_path}] + command_args + %W[#{grammar_file_path}]) + exec_command("gcc -Wall #{c_path} -o #{obj_path}") end end @@ -94,270 +46,28 @@ def test_rules(rules, input, expected, command_args: [], debug: false) describe "calculator" do it "returns 9 for '(1+2)*3'" do - # (1+2)*3 #=> 9 - input = [ - %w['('], - %w[NUM val 1], - %w['+'], - %w[NUM val 2], - %w[')'], - %w['*'], - %w[NUM val 3] - ] - - test_rules(<<~Rules, input, "=> 9") - %union { - int val; - } - %token NUM - %type expr - %left '+' '-' - %left '*' '/' - - %% - - program : /* empty */ - | expr { printf("=> %d", $1); } - ; - expr : NUM - | expr '+' expr { $$ = $1 + $3; } - | expr '-' expr { $$ = $1 - $3; } - | expr '*' expr { $$ = $1 * $3; } - | expr '/' expr { $$ = $1 / $3; } - | '(' expr ')' { $$ = $2; } - ; - - %% - Rules + test_parser("calculator", "( 1 + 2 ) * 3", "=> 9") end end it "prologue and epilogue are optional" do - test_grammar(<<~Grammar, "") - %code provides { - static int yylex(YYSTYPE *val, YYLTYPE *loc) { return 0; } - static int yyerror(YYLTYPE *loc, const char *str) { return 0; } - - int main() { - return 0; - } - } - - %union { - int val; - } - - %% - - program : /* empty */ - ; - - Grammar + test_parser("prologue_epilogue_optional", "", "") end - describe "YYDEBUG, %lex-param, %parse-param, error_recovery option are enabled" do + describe "YYDEBUG, %lex-param, %parse-param option are enabled" do it "returns 9 for '(1+2)*3'" do - # (1+2)*3 #=> 9 - input = [ - %w['('], - %w[NUM val 1], - %w['+'], - %w[NUM val 2], - %w[')'], - %w['*'], - %w[NUM val 3] - ] - cases = generate_lexer_body(input) - - test_grammar(<<~Grammar, "=> 9", command_args: %W[-e]) - %{ - #define YYDEBUG 1 - #include - - #define YY_LOCATION_PRINT(File, loc, p) ((void) 0) - - #include "test.h" - - static int yylex(YYSTYPE *yylval, YYLTYPE *loc, int lex_param); - static int yyerror(YYLTYPE *loc, int parse_param, const char *str); - %} - - %lex-param {int parse_param} - %parse-param {int parse_param} - - %union { - int val; - } - %token NUM - %type expr - %left '+' '-' - %left '*' '/' - - %% - - program : /* empty */ - | expr { printf("=> %d", $1); } - ; - expr : NUM - | expr '+' expr { $$ = $1 + $3; } - | expr '-' expr { $$ = $1 - $3; } - | expr '*' expr { $$ = $1 * $3; } - | expr '/' expr { $$ = $1 / $3; } - | '(' expr ')' { $$ = $2; } - ; - - %% - - int c = 0; - - static int yylex(YYSTYPE *yylval, YYLTYPE *loc, int lex_param) { - switch (c++) { - #{cases} - default: - // End of Input - return -1; - } - } - - static int yyerror(YYLTYPE *loc, int parse_param, const char *str) { - fprintf(stderr, "parse error: %s\\n", str); - return 0; - } - - int main() { - yyparse(0); - return 0; - } - Grammar + test_parser("params", "(1+2)*3", "=> 9") end end describe "named references" do it "returns 3 for '1 2 +" do - # 1 2 + #=> 3 - input = [ - %w[NUM val 1], - %w[NUM val 2], - %w['+'], - ] - cases = generate_lexer_body(input) - - test_grammar(<<~Grammar, "expr[ex-left] (0): 0.0-0.1. expr[ex.right] (1): 1.0-1.1. line (0): 0.0-2.1. => 3") - %{ - #include - - #include "test.h" - - typedef struct code_location { - int first_line; - int first_column; - int last_line; - int last_column; - } code_location_t; - - #define YYLTYPE code_location_t - #define YYLLOC_DEFAULT(Current, Rhs, N) \ - do \ - if (N) \ - { \ - (Current).first_line = YYRHSLOC(Rhs, 1).first_line; \ - (Current).first_column = YYRHSLOC(Rhs, 1).first_column; \ - (Current).last_line = YYRHSLOC(Rhs, N).last_line; \ - (Current).last_column = YYRHSLOC(Rhs, N).last_column; \ - } \ - else \ - { \ - (Current).first_line = YYRHSLOC(Rhs, 0).last_line; \ - (Current).first_column = YYRHSLOC(Rhs, 0).last_column; \ - (Current).last_line = YYRHSLOC(Rhs, 0).last_line; \ - (Current).last_column = YYRHSLOC(Rhs, 0).last_column; \ - } \ - while (0) - - static int yylex(YYSTYPE *val, YYLTYPE *loc); - static void print_location(YYLTYPE *loc); - static int yyerror(YYLTYPE *loc, const char *str); - - %} - - %union { - int val; - } - %token NUM - %type expr - - %% - - line: expr - { - printf("line (%d): ", @expr.first_line); - print_location(&@expr); - - printf("=> %d", $expr); - } - ; - - expr[result]: NUM - | expr[ex-left] expr[ex.right] '+' - { - printf("expr[ex-left] (%d): ", @[ex-left].first_line); - print_location(&@[ex-left]); - - printf("expr[ex.right] (%d): ", @[ex.right].first_line); - print_location(&@[ex.right]); - - $result = $[ex-left] + $[ex.right]; - } - ; - - %% - - int c = 0; - - static int yylex(YYSTYPE *yylval, YYLTYPE *loc) { - loc->first_line = c; - loc->first_column = 0; - loc->last_line = c; - loc->last_column = 1; - - switch (c++) { - #{cases} - default: - // End of Input - return -1; - } - } - - static void print_location(YYLTYPE *loc) { - printf("%d.%d-%d.%d. ", loc->first_line, loc->first_column, loc->last_line, loc->last_column); - } - - static int yyerror(YYLTYPE *loc, const char *str) { - fprintf(stderr, "parse error: %s\\n", str); - return 0; - } - - int main() { - yyparse(); - return 0; - } - - Grammar + test_parser("named_references", "1 2 +", "expr[ex-left] (1): 1.0-1.1. expr[ex.right] (1): 1.2-1.3. line (1): 1.0-1.5. => 3") end end describe "%printer" do it "prints messages" do - # (1+) #=> 101 - # '100' is complemented - input = [ - %w[NUM val1 1], - %w['+'], - %w[NUM val1 2], - %w['*'], - %w[NUM val1 3] - ] - expected = <<~STR.chomp val1: 1 val1: 1 @@ -383,91 +93,16 @@ def test_rules(rules, input, expected, command_args: [], debug: false) => 7 STR - test_rules(<<~Rules, input, expected, debug: true) - %union { - int val1; - int val2; - int val3; - } - %token NUM - %type add - %type expr - %left '+' '-' - %left '*' '/' - - %printer { - printf("val1: %d\\n", $$); - } // printer for TAG - - %printer { - printf("val2: %d\\n", $$); - } - - %printer { - printf("expr: %d\\n", $$); - } expr // printer for symbol - - %% - - program : /* empty */ - | expr { printf("=> %d", $1); } - ; - - add : expr '+' expr { $$ = $1 + $3; } - - expr : NUM - | add - | expr '-' expr { $$ = $1 - $3; } - | expr '*' expr { $$ = $1 * $3; } - | expr '/' expr { $$ = $1 / $3; } - | '(' expr ')' { $$ = $2; } - ; - - %% - Rules + test_parser("printers", "1 + 2 * 3", expected) end end # TODO: Add test case for "(1+2" describe "error_recovery" do - it "returns 6 for '(1+)'" do + it "returns 101 for '(1+)'" do # (1+) #=> 101 # '100' is complemented - input = [ - %w['('], - %w[NUM val 1], - %w['+'], - %w[')'], - ] - - test_rules(<<~Rules, input, "=> 101", command_args: %W[-e]) - %union { - int val; - } - %token NUM - %type expr - %left '+' '-' - %left '*' '/' - - %error-token { - $$ = 100; - } NUM - - %% - - program : /* empty */ - | expr { printf("=> %d", $1); } - ; - expr : NUM - | expr '+' expr { $$ = $1 + $3; } - | expr '-' expr { $$ = $1 - $3; } - | expr '*' expr { $$ = $1 * $3; } - | expr '/' expr { $$ = $1 / $3; } - | '(' expr ')' { $$ = $2; } - ; - - %% - Rules + test_parser("error_recovery", "(1+)", "=> 101", lrama_command_args: %W[-e]) end end diff --git a/spec/lrama/lexer_spec.rb b/spec/lrama/lexer_spec.rb index 0fe5d27a..18383d18 100644 --- a/spec/lrama/lexer_spec.rb +++ b/spec/lrama/lexer_spec.rb @@ -293,4 +293,9 @@ expect { lexer.next_token }.to raise_error(ParseError, "Unexpected code: @invalid.") end end + + it 'lex a line comment without newline' do + lexer = Lrama::Lexer.new("// foo") + expect(lexer.next_token).to be_nil + end end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 394fd947..25757bf8 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -42,3 +42,7 @@ def sample_path(file_name) def exe_path(file_name) File.expand_path("../../exe/#{file_name}", __FILE__) end + +def windows? + return /mswin|mingw|bccwin/ =~ RUBY_PLATFORM +end