From c30a6c2f678c8f410b85d31895f8735fa5de1000 Mon Sep 17 00:00:00 2001 From: Bart Broere Date: Sat, 7 Sep 2024 10:50:31 +0200 Subject: [PATCH 01/18] Skeleton code for the lexer --- lib/rouge/lexers/cobol.rb | 72 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 lib/rouge/lexers/cobol.rb diff --git a/lib/rouge/lexers/cobol.rb b/lib/rouge/lexers/cobol.rb new file mode 100644 index 0000000000..1814a254cc --- /dev/null +++ b/lib/rouge/lexers/cobol.rb @@ -0,0 +1,72 @@ +# -*- coding: utf-8 -*- # +# frozen_string_literal: true + +module Rouge + module Lexers + class COBOL < RegexLexer + title 'COBOL' + desc 'COBOL (Common Business-Oriented Language) programming language' + tag 'cobol' + filenames '*.cob', '*.cbl', '*.cobol', '*.cpy' + mimetypes 'text/x-cobol' + + # List of COBOL keywords + KEYWORDS = %w[ + ACCEPT ADD ALTER APPLY CALL CANCEL CLOSE COMPUTE CONTINUE + DECLARE DELETE DISPLAY DIVIDE ELSE END-ADD END-CALL END-COMPUTE + END-DELETE END-DISPLAY END-DIVIDE END-EVALUATE END-IF END-MULTIPLY + END-OF-PAGE END-PERFORM END-READ END-RETURN END-REWRITE END-SEARCH + END-START END-STRING END-SUBTRACT END-UNSTRING END-WRITE EVALUATE + EXIT GOBACK IF INITIALIZE INSPECT MERGE MOVE MULTIPLY NEXT + OPEN PERFORM READ RECEIVE RETURN REWRITE SEARCH SEND SET SORT + START STOP STRING SUBTRACT UNSTRING WRITE + ] + + # COBOL divisions and sections + DIVISIONS = %w[ + IDENTIFICATION ENVIRONMENT DATA PROCEDURE + ] + + SECTIONS = %w[ + CONFIGURATION INPUT-OUTPUT FILE WORKING-STORAGE LOCAL-STORAGE LINKAGE + ] + + # Define tokens for the lexer + state :whitespace do + rule %r/\s+/m, Text::Whitespace + end + + state :root do + mixin :whitespace + + # Comments + rule %r{(\*)[^\n]*}, Comment::Single + + # Strings + rule %r/"/, Str::Double, :string + rule %r/'/, Str::Single, :string + + # Keywords and divisions + rule %r/\b(#{DIVISIONS.join('|')})\b/i, Keyword::Declaration + rule %r/\b(#{SECTIONS.join('|')})\b/i, Keyword::Namespace + rule %r/\b(#{KEYWORDS.join('|')})\b/i, Keyword + + # Numbers + rule %r/\b\d+(\.\d+)?\b/, Num + + # Identifiers + rule %r/[a-zA-Z0-9_-]+/, Name + + # Punctuation and operators + rule %r/[.,;:()]/, Punctuation + rule %r/[+-\/\*=&><]/, Operator + end + + state :string do + rule %r/[^'"\\]+/, Str + rule %r/\\./, Str::Escape + rule %r/["']/, Str, :pop! + end + end + end +end From c62f109089af36a69876bc46ba824f47dc60aaee Mon Sep 17 00:00:00 2001 From: Bart Broere Date: Sat, 7 Sep 2024 10:56:40 +0200 Subject: [PATCH 02/18] Add TODOs --- lib/rouge/lexers/cobol.rb | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/rouge/lexers/cobol.rb b/lib/rouge/lexers/cobol.rb index 1814a254cc..0929c35f5a 100644 --- a/lib/rouge/lexers/cobol.rb +++ b/lib/rouge/lexers/cobol.rb @@ -11,6 +11,8 @@ class COBOL < RegexLexer mimetypes 'text/x-cobol' # List of COBOL keywords + # TODO expand with all keywords listed here: https://www.ibm.com/docs/en/cobol-zos/6.3?topic=appendixes-reserved-words + # TODO but move some that are more operator than keyword to the operators list lower in the file KEYWORDS = %w[ ACCEPT ADD ALTER APPLY CALL CANCEL CLOSE COMPUTE CONTINUE DECLARE DELETE DISPLAY DIVIDE ELSE END-ADD END-CALL END-COMPUTE @@ -24,11 +26,11 @@ class COBOL < RegexLexer # COBOL divisions and sections DIVISIONS = %w[ - IDENTIFICATION ENVIRONMENT DATA PROCEDURE + IDENTIFICATION ENVIRONMENT DATA PROCEDURE DIVISION ] SECTIONS = %w[ - CONFIGURATION INPUT-OUTPUT FILE WORKING-STORAGE LOCAL-STORAGE LINKAGE + CONFIGURATION INPUT-OUTPUT FILE WORKING-STORAGE LOCAL-STORAGE LINKAGE SECTION ] # Define tokens for the lexer @@ -62,6 +64,7 @@ class COBOL < RegexLexer rule %r/[+-\/\*=&><]/, Operator end + # TODO double check string escaping in COBOL state :string do rule %r/[^'"\\]+/, Str rule %r/\\./, Str::Escape From a684e891bd07c85fc7bc908ff210668f11ffde1e Mon Sep 17 00:00:00 2001 From: Bart Broere Date: Sat, 7 Sep 2024 16:51:24 +0200 Subject: [PATCH 03/18] Extend list of COBOL keywords --- lib/rouge/lexers/cobol.rb | 57 ++++++++++++++++++++++++++++++--------- 1 file changed, 45 insertions(+), 12 deletions(-) diff --git a/lib/rouge/lexers/cobol.rb b/lib/rouge/lexers/cobol.rb index 0929c35f5a..3a4931fdcc 100644 --- a/lib/rouge/lexers/cobol.rb +++ b/lib/rouge/lexers/cobol.rb @@ -11,17 +11,48 @@ class COBOL < RegexLexer mimetypes 'text/x-cobol' # List of COBOL keywords - # TODO expand with all keywords listed here: https://www.ibm.com/docs/en/cobol-zos/6.3?topic=appendixes-reserved-words - # TODO but move some that are more operator than keyword to the operators list lower in the file + # TODO Change the order of the keywords below, so cases like END-PERFORM are matched before END is matched KEYWORDS = %w[ - ACCEPT ADD ALTER APPLY CALL CANCEL CLOSE COMPUTE CONTINUE - DECLARE DELETE DISPLAY DIVIDE ELSE END-ADD END-CALL END-COMPUTE - END-DELETE END-DISPLAY END-DIVIDE END-EVALUATE END-IF END-MULTIPLY - END-OF-PAGE END-PERFORM END-READ END-RETURN END-REWRITE END-SEARCH - END-START END-STRING END-SUBTRACT END-UNSTRING END-WRITE EVALUATE - EXIT GOBACK IF INITIALIZE INSPECT MERGE MOVE MULTIPLY NEXT - OPEN PERFORM READ RECEIVE RETURN REWRITE SEARCH SEND SET SORT - START STOP STRING SUBTRACT UNSTRING WRITE + ACCEPT ACCESS ACTIVE-CLASS ADD ADDRESS ADVANCING AFTER ALIGNED ALL ALLOCATE ALPHABET ALPHABETIC ALPHABETIC-LOWER + ALPHABETIC-UPPER ALPHANUMERIC ALPHANUMERIC-EDITED ALSO ALTER ALTERNATE AND ANY ANYCASE APPLY ARE AREA AREAS + ASCENDING ASSIGN AT AUTHOR B-AND B-NOT B-OR B-XOR BASED BASIS BEFORE BEGINNING BINARY BINARY-CHAR BINARY-DOUBLE + BINARY-LONG BINARY-SHORT BIT BLANK BLOCK BOOLEAN BOTTOM BY BYTE-LENGTH CALL CANCEL CBL CD CF CH CHARACTER + CHARACTERS CLASS CLASS-ID CLOCK-UNITS CLOSE COBOL CODE CODE-SET COL COLLATING COLS COLUMN COLUMNS COM-REG COMMA + COMMON COMMUNICATION COMP COMP-1 COMP-2 COMP-3 COMP-4 COMP-5 COMPUTATIONAL COMPUTATIONAL-1 COMPUTATIONAL-2 + COMPUTATIONAL-3 COMPUTATIONAL-4 COMPUTATIONAL-5 COMPUTE CONDITION CONSTANT CONTAINS CONTENT + CONTINUE CONTROL CONTROLS CONVERTING COPY CORR CORRESPONDING COUNT CRT CURRENCY CURSOR DATA-POINTER DATE + DATE-COMPILED DATE-WRITTEN DAY DAY-OF-WEEK DBCS DE DEBUG-CONTENTS DEBUG-ITEM DEBUG-LINE DEBUG-NAME DEBUG-SUB-1 + DEBUG-SUB-2 DEBUG-SUB-3 DEBUGGING DECIMAL-POINT DECLARATIVES DEFAULT DELETE DELIMITED DELIMITER DEPENDING + DESCENDING DESTINATION DETAIL DISABLE DISPLAY DISPLAY-1 DIVIDE DOWN DUPLICATES DYNAMIC EC EGCS EGI + EJECT ELSE EMI ENABLE END END-ACCEPT END-ADD END-CALL END-COMPUTE END-DELETE END-DISPLAY END-DIVIDE END-EVALUATE + END-EXEC END-IF END-INVOKE END-JSON END-MULTIPLY END-OF-PAGE END-PERFORM END-READ END-RECEIVE END-RETURN + END-REWRITE END-SEARCH END-START END-STRING END-SUBTRACT END-UNSTRING END-WRITE END-XML ENDING ENTER ENTRY + EO EOP EQUAL ERROR ESI EVALUATE EVERY EXCEPTION EXCEPTION-OBJECT EXEC EXECUTE EXIT EXTEND EXTERNAL + FACTORY FALSE FD FILE-CONTROL FILLER FINAL FIRST FLOAT-EXTENDED FLOAT-LONG FLOAT-SHORT FOOTING FOR FORMAT + FREE FROM FUNCTION FUNCTION-ID FUNCTION-POINTER GENERATE GET GIVING GLOBAL GO GOBACK GREATER GROUP GROUP-USAGE + HEADING HIGH-VALUE HIGH-VALUES I-O I-O-CONTROL ID IF IN INDEX INDEXED INDICATE INHERITS INITIAL + INITIALIZE INITIATE INPUT INSERT INSPECT INSTALLATION INTERFACE INTERFACE-ID INTO INVALID INVOKE + IS JAVA JNIENVPTR JSON JSON-CODE JSON-STATUS JUST JUSTIFIED KANJI KEY LABEL LAST LEADING LEFT LENGTH LESS LIMIT + LIMITS LINAGE LINAGE-COUNTER LINE LINE-COUNTER LINES LOCALE LOCK LOW-VALUE LOW-VALUES + MEMORY MERGE MESSAGE METHOD METHOD-ID MINUS MODE MODULES MORE-LABELS MOVE MULTIPLE MULTIPLY NATIONAL + NATIONAL-EDITED NATIVE NEGATIVE NESTED NEXT NO NOT NULL NULLS NUMBER NUMERIC NUMERIC-EDITED OBJECT + OBJECT-COMPUTER OBJECT-REFERENCE OCCURS OF OFF OMITTED ON OPEN OPTIONAL OPTIONS OR ORDER ORGANIZATION + OTHER OUTPUT OVERFLOW OVERRIDE PACKED-DECIMAL PADDING PAGE PAGE-COUNTER PASSWORD PERFORM PF PH PIC PICTURE + PLUS POINTER POINTER- POINTER-31 POINTER-32 POINTER-64 POSITION POSITIVE PRESENT PRINTING + PROCEDURE-POINTER PROCEDURES PROCEED PROCESSING PROGRAM PROGRAM-ID PROGRAM-POINTER PROPERTY PROTOTYPE + PURGE QUEUE QUOTE QUOTES RAISE RAISING RANDOM RD READ READY RECEIVE RECORD RECORDING RECORDS RECURSIVE REDEFINES + REEL REFERENCE REFERENCES RELATIVE RELEASE RELOAD REMAINDER REMOVAL RENAMES REPLACE REPLACING REPORT REPORTING + REPORTS REPOSITORY RERUN RESERVE RESET RESUME RETRY RETURN RETURN-CODE RETURNING REVERSED REWIND REWRITE RF RH + RIGHT ROUNDED RUN SAME SCREEN SD SEARCH SECTION SECURITY SEGMENT SEGMENT-LIMIT SELECT SELF SEND SENTENCE + SEPARATE SEQUENCE SEQUENTIAL SERVICE SET SHARING SHIFT-IN SHIFT-OUT SIGN SIZE SKIP1 SKIP2 SKIP3 SORT + SORT-CONTROL SORT-CORE-SIZE SORT-FILE-SIZE SORT-MERGE SORT-MESSAGE SORT-MODE-SIZE SORT-RETURN SOURCE + SOURCE-COMPUTER SOURCES SPACE SPACES SPECIAL-NAMES SQL SQLIMS STANDARD STANDARD-1 STANDARD-2 START STATUS STOP + STRING SUB-QUEUE-1 SUB-QUEUE-2 SUB-QUEUE-3 SUBTRACT SUM SUPER SUPPRESS SYMBOLIC SYNC SYNCHRONIZED SYSTEM-DEFAULT + TABLE TALLY TALLYING TAPE TERMINAL TERMINATE TEST TEXT THAN THEN THROUGH THRU TIME TIMES TITLE TO TOP TRACE + TRAILING TRUE TYPE TYPEDEF UNIT UNIVERSAL UNLOCK UNSTRING UNTIL UP UPON USAGE USE USER-DEFAULT USING UTF-8 + VAL-STATUS VALID VALIDATE VALIDATE-STATUS VALUE VALUES VARYING VOLATILE WHEN WHEN-COMPILED WITH WORDS + WRITE WRITE-ONLY XML XML-CODE XML-EVENT XML-INFORMATION XML-NAMESPACE XML-NAMESPACE-PREFIX + XML-NNAMESPACE XML-NNAMESPACE-PREFIX XML-NTEXT XML-SCHEMA XML-TEXT ZERO ZEROES ZEROS ] # COBOL divisions and sections @@ -42,7 +73,8 @@ class COBOL < RegexLexer mixin :whitespace # Comments - rule %r{(\*)[^\n]*}, Comment::Single + rule %r/\*>.*/, Comment::Single + rule %r/(^.{6})\*/, Comment::Single # Strings rule %r/"/, Str::Double, :string @@ -61,7 +93,8 @@ class COBOL < RegexLexer # Punctuation and operators rule %r/[.,;:()]/, Punctuation - rule %r/[+-\/\*=&><]/, Operator + # TODO Find out what's going wrong in the "+ (2 **" line + rule %r/[+\-\/*=&><]/, Operator end # TODO double check string escaping in COBOL From 084c16dc065091852522c2f05561600b4c529f6c Mon Sep 17 00:00:00 2001 From: Bart Broere Date: Sat, 7 Sep 2024 17:35:18 +0200 Subject: [PATCH 04/18] Fixes --- lib/rouge/lexers/cobol.rb | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/lib/rouge/lexers/cobol.rb b/lib/rouge/lexers/cobol.rb index 3a4931fdcc..65c5db8e93 100644 --- a/lib/rouge/lexers/cobol.rb +++ b/lib/rouge/lexers/cobol.rb @@ -18,19 +18,19 @@ class COBOL < RegexLexer ASCENDING ASSIGN AT AUTHOR B-AND B-NOT B-OR B-XOR BASED BASIS BEFORE BEGINNING BINARY BINARY-CHAR BINARY-DOUBLE BINARY-LONG BINARY-SHORT BIT BLANK BLOCK BOOLEAN BOTTOM BY BYTE-LENGTH CALL CANCEL CBL CD CF CH CHARACTER CHARACTERS CLASS CLASS-ID CLOCK-UNITS CLOSE COBOL CODE CODE-SET COL COLLATING COLS COLUMN COLUMNS COM-REG COMMA - COMMON COMMUNICATION COMP COMP-1 COMP-2 COMP-3 COMP-4 COMP-5 COMPUTATIONAL COMPUTATIONAL-1 COMPUTATIONAL-2 - COMPUTATIONAL-3 COMPUTATIONAL-4 COMPUTATIONAL-5 COMPUTE CONDITION CONSTANT CONTAINS CONTENT + COMMON COMMUNICATION COMP-1 COMP-2 COMP-3 COMP-4 COMP-5 COMP COMPUTATIONAL-1 COMPUTATIONAL-2 + COMPUTATIONAL-3 COMPUTATIONAL-4 COMPUTATIONAL-5 COMPUTATIONAL COMPUTE CONDITION CONSTANT CONTAINS CONTENT CONTINUE CONTROL CONTROLS CONVERTING COPY CORR CORRESPONDING COUNT CRT CURRENCY CURSOR DATA-POINTER DATE DATE-COMPILED DATE-WRITTEN DAY DAY-OF-WEEK DBCS DE DEBUG-CONTENTS DEBUG-ITEM DEBUG-LINE DEBUG-NAME DEBUG-SUB-1 DEBUG-SUB-2 DEBUG-SUB-3 DEBUGGING DECIMAL-POINT DECLARATIVES DEFAULT DELETE DELIMITED DELIMITER DEPENDING - DESCENDING DESTINATION DETAIL DISABLE DISPLAY DISPLAY-1 DIVIDE DOWN DUPLICATES DYNAMIC EC EGCS EGI - EJECT ELSE EMI ENABLE END END-ACCEPT END-ADD END-CALL END-COMPUTE END-DELETE END-DISPLAY END-DIVIDE END-EVALUATE + DESCENDING DESTINATION DETAIL DISABLE DISPLAY-1 DISPLAY DIVIDE DOWN DUPLICATES DYNAMIC EC EGCS EGI + EJECT ELSE EMI ENABLE END-ACCEPT END-ADD END-CALL END-COMPUTE END-DELETE END-DISPLAY END-DIVIDE END-EVALUATE END-EXEC END-IF END-INVOKE END-JSON END-MULTIPLY END-OF-PAGE END-PERFORM END-READ END-RECEIVE END-RETURN - END-REWRITE END-SEARCH END-START END-STRING END-SUBTRACT END-UNSTRING END-WRITE END-XML ENDING ENTER ENTRY + END-REWRITE END-SEARCH END-START END-STRING END-SUBTRACT END-UNSTRING END-WRITE END-XML ENDING END ENTER ENTRY EO EOP EQUAL ERROR ESI EVALUATE EVERY EXCEPTION EXCEPTION-OBJECT EXEC EXECUTE EXIT EXTEND EXTERNAL FACTORY FALSE FD FILE-CONTROL FILLER FINAL FIRST FLOAT-EXTENDED FLOAT-LONG FLOAT-SHORT FOOTING FOR FORMAT FREE FROM FUNCTION FUNCTION-ID FUNCTION-POINTER GENERATE GET GIVING GLOBAL GO GOBACK GREATER GROUP GROUP-USAGE - HEADING HIGH-VALUE HIGH-VALUES I-O I-O-CONTROL ID IF IN INDEX INDEXED INDICATE INHERITS INITIAL + HEADING HIGH-VALUE HIGH-VALUES I-O-CONTROL I-O ID IF IN INDEX INDEXED INDICATE INHERITS INITIAL INITIALIZE INITIATE INPUT INSERT INSPECT INSTALLATION INTERFACE INTERFACE-ID INTO INVALID INVOKE IS JAVA JNIENVPTR JSON JSON-CODE JSON-STATUS JUST JUSTIFIED KANJI KEY LABEL LAST LEADING LEFT LENGTH LESS LIMIT LIMITS LINAGE LINAGE-COUNTER LINE LINE-COUNTER LINES LOCALE LOCK LOW-VALUE LOW-VALUES @@ -39,20 +39,20 @@ class COBOL < RegexLexer OBJECT-COMPUTER OBJECT-REFERENCE OCCURS OF OFF OMITTED ON OPEN OPTIONAL OPTIONS OR ORDER ORGANIZATION OTHER OUTPUT OVERFLOW OVERRIDE PACKED-DECIMAL PADDING PAGE PAGE-COUNTER PASSWORD PERFORM PF PH PIC PICTURE PLUS POINTER POINTER- POINTER-31 POINTER-32 POINTER-64 POSITION POSITIVE PRESENT PRINTING - PROCEDURE-POINTER PROCEDURES PROCEED PROCESSING PROGRAM PROGRAM-ID PROGRAM-POINTER PROPERTY PROTOTYPE + PROCEDURE-POINTER PROCEDURES PROCEED PROCESSING PROGRAM-ID PROGRAM-POINTER PROGRAM PROPERTY PROTOTYPE PURGE QUEUE QUOTE QUOTES RAISE RAISING RANDOM RD READ READY RECEIVE RECORD RECORDING RECORDS RECURSIVE REDEFINES REEL REFERENCE REFERENCES RELATIVE RELEASE RELOAD REMAINDER REMOVAL RENAMES REPLACE REPLACING REPORT REPORTING REPORTS REPOSITORY RERUN RESERVE RESET RESUME RETRY RETURN RETURN-CODE RETURNING REVERSED REWIND REWRITE RF RH RIGHT ROUNDED RUN SAME SCREEN SD SEARCH SECTION SECURITY SEGMENT SEGMENT-LIMIT SELECT SELF SEND SENTENCE - SEPARATE SEQUENCE SEQUENTIAL SERVICE SET SHARING SHIFT-IN SHIFT-OUT SIGN SIZE SKIP1 SKIP2 SKIP3 SORT - SORT-CONTROL SORT-CORE-SIZE SORT-FILE-SIZE SORT-MERGE SORT-MESSAGE SORT-MODE-SIZE SORT-RETURN SOURCE - SOURCE-COMPUTER SOURCES SPACE SPACES SPECIAL-NAMES SQL SQLIMS STANDARD STANDARD-1 STANDARD-2 START STATUS STOP + SEPARATE SEQUENCE SEQUENTIAL SERVICE SET SHARING SHIFT-IN SHIFT-OUT SIGN SIZE SKIP1 SKIP2 SKIP3 + SORT-CONTROL SORT-CORE-SIZE SORT-FILE-SIZE SORT-MERGE SORT-MESSAGE SORT-MODE-SIZE SORT-RETURN SORT + SOURCE-COMPUTER SOURCES SOURCE SPACE SPACES SPECIAL-NAMES SQL SQLIMS STANDARD-1 STANDARD-2 STANDARD START STATUS STOP STRING SUB-QUEUE-1 SUB-QUEUE-2 SUB-QUEUE-3 SUBTRACT SUM SUPER SUPPRESS SYMBOLIC SYNC SYNCHRONIZED SYSTEM-DEFAULT TABLE TALLY TALLYING TAPE TERMINAL TERMINATE TEST TEXT THAN THEN THROUGH THRU TIME TIMES TITLE TO TOP TRACE TRAILING TRUE TYPE TYPEDEF UNIT UNIVERSAL UNLOCK UNSTRING UNTIL UP UPON USAGE USE USER-DEFAULT USING UTF-8 VAL-STATUS VALID VALIDATE VALIDATE-STATUS VALUE VALUES VARYING VOLATILE WHEN WHEN-COMPILED WITH WORDS - WRITE WRITE-ONLY XML XML-CODE XML-EVENT XML-INFORMATION XML-NAMESPACE XML-NAMESPACE-PREFIX - XML-NNAMESPACE XML-NNAMESPACE-PREFIX XML-NTEXT XML-SCHEMA XML-TEXT ZERO ZEROES ZEROS + WRITE WRITE-ONLY XML-CODE XML-EVENT XML-INFORMATION XML-NAMESPACE XML-NAMESPACE-PREFIX + XML-NNAMESPACE XML-NNAMESPACE-PREFIX XML-NTEXT XML-SCHEMA XML-TEXT XML ZERO ZEROES ZEROS ] # COBOL divisions and sections @@ -72,10 +72,6 @@ class COBOL < RegexLexer state :root do mixin :whitespace - # Comments - rule %r/\*>.*/, Comment::Single - rule %r/(^.{6})\*/, Comment::Single - # Strings rule %r/"/, Str::Double, :string rule %r/'/, Str::Single, :string @@ -86,15 +82,20 @@ class COBOL < RegexLexer rule %r/\b(#{KEYWORDS.join('|')})\b/i, Keyword # Numbers - rule %r/\b\d+(\.\d+)?\b/, Num + rule %r/[-+]?\b\d+(\.\d+)?\b/, Num # Identifiers rule %r/[a-zA-Z0-9_-]+/, Name - # Punctuation and operators + # Punctuation rule %r/[.,;:()]/, Punctuation # TODO Find out what's going wrong in the "+ (2 **" line - rule %r/[+\-\/*=&><]/, Operator + + # Comments + rule %r/\*>.*/, Comment::Single + + # Operators + rule %r/[+\-*\/><=]/, Operator end # TODO double check string escaping in COBOL From 63adce8d0f7fe0faefca7576f9b1378fea624739 Mon Sep 17 00:00:00 2001 From: Bart Broere Date: Sat, 7 Sep 2024 18:09:16 +0200 Subject: [PATCH 05/18] Work work --- lib/rouge/lexers/cobol.rb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/rouge/lexers/cobol.rb b/lib/rouge/lexers/cobol.rb index 65c5db8e93..183e512a3c 100644 --- a/lib/rouge/lexers/cobol.rb +++ b/lib/rouge/lexers/cobol.rb @@ -7,11 +7,10 @@ class COBOL < RegexLexer title 'COBOL' desc 'COBOL (Common Business-Oriented Language) programming language' tag 'cobol' - filenames '*.cob', '*.cbl', '*.cobol', '*.cpy' + filenames '*.cob', '*.cbl' mimetypes 'text/x-cobol' # List of COBOL keywords - # TODO Change the order of the keywords below, so cases like END-PERFORM are matched before END is matched KEYWORDS = %w[ ACCEPT ACCESS ACTIVE-CLASS ADD ADDRESS ADVANCING AFTER ALIGNED ALL ALLOCATE ALPHABET ALPHABETIC ALPHABETIC-LOWER ALPHABETIC-UPPER ALPHANUMERIC ALPHANUMERIC-EDITED ALSO ALTER ALTERNATE AND ANY ANYCASE APPLY ARE AREA AREAS @@ -99,11 +98,14 @@ class COBOL < RegexLexer end # TODO double check string escaping in COBOL + # TODO Fix that a string opened by " can't be closed by ' state :string do rule %r/[^'"\\]+/, Str rule %r/\\./, Str::Escape rule %r/["']/, Str, :pop! end + + # TODO match lines with the asterisk in position 7, preceded by spaces or the word Debug end end end From baa0fcfd2d6cb2e71e52d8c1c7da9b47871a206d Mon Sep 17 00:00:00 2001 From: Bart Broere Date: Sun, 8 Sep 2024 11:42:20 +0200 Subject: [PATCH 06/18] Create some fixes --- lib/rouge/lexers/cobol.rb | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/lib/rouge/lexers/cobol.rb b/lib/rouge/lexers/cobol.rb index 183e512a3c..34a1fd693a 100644 --- a/lib/rouge/lexers/cobol.rb +++ b/lib/rouge/lexers/cobol.rb @@ -32,12 +32,12 @@ class COBOL < RegexLexer HEADING HIGH-VALUE HIGH-VALUES I-O-CONTROL I-O ID IF IN INDEX INDEXED INDICATE INHERITS INITIAL INITIALIZE INITIATE INPUT INSERT INSPECT INSTALLATION INTERFACE INTERFACE-ID INTO INVALID INVOKE IS JAVA JNIENVPTR JSON JSON-CODE JSON-STATUS JUST JUSTIFIED KANJI KEY LABEL LAST LEADING LEFT LENGTH LESS LIMIT - LIMITS LINAGE LINAGE-COUNTER LINE LINE-COUNTER LINES LOCALE LOCK LOW-VALUE LOW-VALUES + LIMITS LINAGE-COUNTER LINAGE LINE-COUNTER LINES LINE LOCALE LOCK LOW-VALUE LOW-VALUES MEMORY MERGE MESSAGE METHOD METHOD-ID MINUS MODE MODULES MORE-LABELS MOVE MULTIPLE MULTIPLY NATIONAL NATIONAL-EDITED NATIVE NEGATIVE NESTED NEXT NO NOT NULL NULLS NUMBER NUMERIC NUMERIC-EDITED OBJECT OBJECT-COMPUTER OBJECT-REFERENCE OCCURS OF OFF OMITTED ON OPEN OPTIONAL OPTIONS OR ORDER ORGANIZATION OTHER OUTPUT OVERFLOW OVERRIDE PACKED-DECIMAL PADDING PAGE PAGE-COUNTER PASSWORD PERFORM PF PH PIC PICTURE - PLUS POINTER POINTER- POINTER-31 POINTER-32 POINTER-64 POSITION POSITIVE PRESENT PRINTING + PLUS POINTER- POINTER-31 POINTER-32 POINTER-64 POINTER POSITION POSITIVE PRESENT PRINTING PROCEDURE-POINTER PROCEDURES PROCEED PROCESSING PROGRAM-ID PROGRAM-POINTER PROGRAM PROPERTY PROTOTYPE PURGE QUEUE QUOTE QUOTES RAISE RAISING RANDOM RD READ READY RECEIVE RECORD RECORDING RECORDS RECURSIVE REDEFINES REEL REFERENCE REFERENCES RELATIVE RELEASE RELOAD REMAINDER REMOVAL RENAMES REPLACE REPLACING REPORT REPORTING @@ -63,17 +63,13 @@ class COBOL < RegexLexer CONFIGURATION INPUT-OUTPUT FILE WORKING-STORAGE LOCAL-STORAGE LINKAGE SECTION ] - # Define tokens for the lexer - state :whitespace do - rule %r/\s+/m, Text::Whitespace - end - state :root do - mixin :whitespace + # First detect the comments + rule %r/^( \*).*|^(^Debug \*).*/, Comment::Special # Strings - rule %r/"/, Str::Double, :string - rule %r/'/, Str::Single, :string + rule %r/"/, Str::Double, :string_double + rule %r/'/, Str::Single, :string_single # Keywords and divisions rule %r/\b(#{DIVISIONS.join('|')})\b/i, Keyword::Declaration @@ -95,17 +91,32 @@ class COBOL < RegexLexer # Operators rule %r/[+\-*\/><=]/, Operator + + # Whitespace remaining + rule %r/\s/, Text::Whitespace end # TODO double check string escaping in COBOL # TODO Fix that a string opened by " can't be closed by ' - state :string do - rule %r/[^'"\\]+/, Str + # TODO Fix that strings can't be multi-line + + # Handle strings where " opens a string and must be closed by " + state :string_double do + # Ensure strings can't span multiple lines + rule %r/[^"\\\n]+/, Str rule %r/\\./, Str::Escape - rule %r/["']/, Str, :pop! + rule %r/"/, Str::Double, :pop! + rule %r/\n/, Error # Flag an error if a string goes to the next line end - # TODO match lines with the asterisk in position 7, preceded by spaces or the word Debug + # Handle strings where ' opens a string and must be closed by ' + state :string_single do + # Ensure strings can't span multiple lines + rule %r/[^'\\\n]+/, Str + rule %r/\\./, Str::Escape + rule %r/'/, Str::Single, :pop! + rule %r/\n/, Error # Flag an error if a string goes to the next line + end end end end From cebb2b2263b26cbee558006b1b1a2f09d98801f9 Mon Sep 17 00:00:00 2001 From: Bart Broere Date: Sun, 8 Sep 2024 12:17:32 +0200 Subject: [PATCH 07/18] Follow the lexer development guide --- lib/rouge/demos/cobol | 6 +++ spec/lexers/cobol_spec.rb | 79 +++++++++++++++++++++++++++++++++++++++ spec/visual/samples/cobol | 6 +++ 3 files changed, 91 insertions(+) create mode 100644 lib/rouge/demos/cobol create mode 100644 spec/lexers/cobol_spec.rb create mode 100644 spec/visual/samples/cobol diff --git a/lib/rouge/demos/cobol b/lib/rouge/demos/cobol new file mode 100644 index 0000000000..c41631ece8 --- /dev/null +++ b/lib/rouge/demos/cobol @@ -0,0 +1,6 @@ + IDENTIFICATION DIVISION. + PROGRAM-ID. hello-world. + *This program will be replaced by a more attractive demo + PROCEDURE DIVISION. + DISPLAY "Hello, world!" + . \ No newline at end of file diff --git a/spec/lexers/cobol_spec.rb b/spec/lexers/cobol_spec.rb new file mode 100644 index 0000000000..17f15395cd --- /dev/null +++ b/spec/lexers/cobol_spec.rb @@ -0,0 +1,79 @@ +# -*- coding: utf-8 -*- # +# frozen_string_literal: true + +describe Rouge::Lexers::COBOL do + let(:subject) { Rouge::Lexers::COBOL.new } + + include Support::Lexing + + it 'highlights COBOL keywords correctly' do + tokens = subject.lex('IDENTIFICATION DIVISION.').to_a + assert { tokens.size == 4 } + assert { tokens.first[0] == Token['Keyword.Declaration'] } + assert { tokens.last[0] == Token['Punctuation'] } + end + + it 'highlights COBOL sections correctly' do + tokens = subject.lex('WORKING-STORAGE SECTION.').to_a + assert { tokens.size == 4 } + assert { tokens.first[0] == Token['Keyword.Namespace'] } + assert { tokens.last[0] == Token['Punctuation'] } + end + + it 'handles comments correctly' do + tokens = subject.lex('*> This is a comment').to_a + assert { tokens.size == 1 } + assert { tokens.first[0] == Token['Comment.Single'] } + end + + it 'highlights special comments with asterisks in position 7 correctly' do + tokens = subject.lex(' * This is a special comment').to_a + assert { tokens.size == 1 } + assert { tokens.first[0] == Token['Comment.Special'] } + + tokens = subject.lex('Debug * This is a Debug comment').to_a + assert { tokens.size == 1 } + assert { tokens.first[0] == Token['Comment.Special'] } + end + + it 'ensures strings cannot be multi-line and must match opening and closing quotes' do + tokens = subject.lex('"This is a string"').to_a + assert { tokens.size == 3 } + assert { tokens.first[0] == Token['Literal.String.Double'] } + + tokens = subject.lex("'This is a string'").to_a + assert { tokens.size == 3 } + assert { tokens.first[0] == Token['Literal.String.Single'] } + + tokens = subject.lex('"This string doesn\'t close').to_a + assert { tokens.size == 2 } # Should detect an unclosed string and raise an error or issue a second token + end + + it 'recognizes operators like "+ (2 ** ...)" correctly' do + tokens = subject.lex('X = 2 + (2 ** 3)').to_a + assert { tokens.size == 15 } + assert { tokens[0][0] == Token['Name'] } + assert { tokens[2][0] == Token['Operator'] } + assert { tokens[4][0] == Token['Literal.Number'] } + assert { tokens[6][0] == Token['Operator'] } + assert { tokens[8][0] == Token['Punctuation'] } + assert { tokens[9][0] == Token['Literal.Number'] } + assert { tokens[11][0] == Token['Operator'] } + assert { tokens[13][0] == Token['Literal.Number'] } + assert { tokens[14][0] == Token['Punctuation'] } + + end + + describe 'guessing' do + include Support::Guessing + + it 'guesses by filename' do + assert_guess :filename => 'foo.cob' + assert_guess :filename => 'foo.cbl' + end + + it 'guesses by mimetype' do + assert_guess :mimetype => 'text/x-cobol' + end + end +end diff --git a/spec/visual/samples/cobol b/spec/visual/samples/cobol new file mode 100644 index 0000000000..c41631ece8 --- /dev/null +++ b/spec/visual/samples/cobol @@ -0,0 +1,6 @@ + IDENTIFICATION DIVISION. + PROGRAM-ID. hello-world. + *This program will be replaced by a more attractive demo + PROCEDURE DIVISION. + DISPLAY "Hello, world!" + . \ No newline at end of file From edf23673305009f8b1b567de3dcf950757e588b2 Mon Sep 17 00:00:00 2001 From: Bart Broere Date: Sun, 8 Sep 2024 12:21:06 +0200 Subject: [PATCH 08/18] Add some creative commons licensed COBOL code as example --- lib/rouge/demos/cobol | 188 +++++++++++++++++++++++++++++++++++++- spec/visual/samples/cobol | 188 +++++++++++++++++++++++++++++++++++++- 2 files changed, 370 insertions(+), 6 deletions(-) diff --git a/lib/rouge/demos/cobol b/lib/rouge/demos/cobol index c41631ece8..07ed0d9cdd 100644 --- a/lib/rouge/demos/cobol +++ b/lib/rouge/demos/cobol @@ -1,6 +1,188 @@ + *----------------------- + * Copyright Contributors to the COBOL Programming Course + * SPDX-License-Identifier: CC-BY-4.0 + *----------------------- IDENTIFICATION DIVISION. - PROGRAM-ID. hello-world. - *This program will be replaced by a more attractive demo + *----------------------- + PROGRAM-ID. CBLDB23 + AUTHOR. Otto B. Relational + + ENVIRONMENT DIVISION. + *-------------------- + CONFIGURATION SECTION. + INPUT-OUTPUT SECTION. + FILE-CONTROL. + SELECT REPOUT + ASSIGN TO UT-S-REPORT. + SELECT CARDIN + ASSIGN TO DA-S-CARDIN. + + DATA DIVISION. + *------------- + FILE SECTION. + FD REPOUT + RECORD CONTAINS 120 CHARACTERS + LABEL RECORDS ARE OMITTED + RECORDING MODE F + DATA RECORD IS REPREC. + 01 REPREC. + 05 ACCT-NO-O PIC X(8). + 05 ACCT-LASTN-O PIC X(20). + 05 ACCT-FIRSTN-O PIC X(15). + 05 ACCT-ADDR3-O PIC X(15). + FD CARDIN + RECORD CONTAINS 80 CHARACTERS + BLOCK CONTAINS 0 RECORDS + RECORDING MODE F + LABEL RECORDS ARE OMITTED. + 01 CARDREC PIC X(80). + + WORKING-STORAGE SECTION. + ***************************************************** + * STRUCTURE FOR INPUT * + ***************************************************** + 01 IOAREA. + 02 STATE PIC X(25). + 02 FILLER PIC X(55). + 77 INPUT-SWITCH PIC X VALUE 'Y'. + 88 NOMORE-INPUT VALUE 'N'. + ***************************************************** + * SQL INCLUDE FOR SQLCA * + ***************************************************** + EXEC SQL INCLUDE SQLCA END-EXEC. + ***************************************************** + * DECLARATIONS FOR SQL ERROR HANDLING * + ***************************************************** + 01 ERROR-MESSAGE. + 02 ERROR-LEN PIC S9(4) COMP VALUE +1320. + 02 ERROR-TEXT PIC X(132) OCCURS 10 TIMES + INDEXED BY ERROR-INDEX. + 77 ERROR-TEXT-LEN PIC S9(9) COMP VALUE +132. + 77 ERROR-TEXT-HBOUND PIC S9(9) COMP VALUE +10. + * USER DEFINED ERROR MESSAGE + 01 UD-ERROR-MESSAGE PIC X(80) VALUE SPACES. + ***************************************************** + * SQL DECLARATION FOR VIEW ACCOUNTS * + ***************************************************** + EXEC SQL DECLARE Z#####T TABLE + (ACCTNO CHAR(8) NOT NULL, + LIMIT DECIMAL(9,2) , + BALANCE DECIMAL(9,2) , + SURNAME CHAR(20) NOT NULL, + FIRSTN CHAR(15) NOT NULL, + ADDRESS1 CHAR(25) NOT NULL, + ADDRESS2 CHAR(20) NOT NULL, + ADDRESS3 CHAR(15) NOT NULL, + RESERVED CHAR(7) NOT NULL, + COMMENTS CHAR(50) NOT NULL) + END-EXEC. + ***************************************************** + * SQL CURSORS * + ***************************************************** + EXEC SQL DECLARE CUR1 CURSOR FOR + SELECT * FROM Z#####T + END-EXEC. + EXEC SQL DECLARE CUR2 CURSOR FOR + SELECT * + FROM Z#####T + WHERE ADDRESS3 = :STATE + END-EXEC. + ***************************************************** + * STRUCTURE FOR CUSTOMER RECORD * + ***************************************************** + 01 CUSTOMER-RECORD. + 02 ACCT-NO PIC X(8). + 02 ACCT-LIMIT PIC S9(7)V99 COMP-3. + 02 ACCT-BALANCE PIC S9(7)V99 COMP-3. + 02 ACCT-LASTN PIC X(20). + 02 ACCT-FIRSTN PIC X(15). + 02 ACCT-ADDR1 PIC X(25). + 02 ACCT-ADDR2 PIC X(20). + 02 ACCT-ADDR3 PIC X(15). + 02 ACCT-RSRVD PIC X(7). + 02 ACCT-COMMENT PIC X(50). + PROCEDURE DIVISION. - DISPLAY "Hello, world!" + *------------------ + PROG-START. + OPEN INPUT CARDIN. + OPEN OUTPUT REPOUT. + READ CARDIN RECORD INTO IOAREA + AT END SET NOMORE-INPUT TO TRUE. + PERFORM PROCESS-INPUT + UNTIL NOMORE-INPUT. + PROG-END. + CLOSE CARDIN + REPOUT. + GOBACK. + PROCESS-INPUT. + IF STATE = '*' + PERFORM GET-ALL + ELSE + PERFORM GET-SPECIFIC. + READ CARDIN RECORD INTO IOAREA + AT END SET NOMORE-INPUT TO TRUE. + GET-ALL. + EXEC SQL OPEN CUR1 END-EXEC. + IF SQLCODE NOT = 0 THEN + MOVE 'OPEN CUR1' TO UD-ERROR-MESSAGE + PERFORM SQL-ERROR-HANDLING + END-IF + EXEC SQL FETCH CUR1 INTO :CUSTOMER-RECORD END-EXEC. + PERFORM PRINT-ALL + UNTIL SQLCODE IS NOT EQUAL TO ZERO. + IF SQLCODE NOT = 100 THEN + MOVE 'FETCH CUR1' TO UD-ERROR-MESSAGE + PERFORM SQL-ERROR-HANDLING + END-IF + EXEC SQL CLOSE CUR1 END-EXEC. + IF SQLCODE NOT = 0 THEN + MOVE 'CLOSE CUR1' TO UD-ERROR-MESSAGE + PERFORM SQL-ERROR-HANDLING + END-IF + . + PRINT-ALL. + PERFORM PRINT-A-LINE. + EXEC SQL FETCH CUR1 INTO :CUSTOMER-RECORD END-EXEC. + GET-SPECIFIC. + EXEC SQL OPEN CUR2 END-EXEC. + IF SQLCODE NOT = 0 THEN + MOVE 'OPEN CUR2' TO UD-ERROR-MESSAGE + PERFORM SQL-ERROR-HANDLING + END-IF + EXEC SQL FETCH CUR2 INTO :CUSTOMER-RECORD END-EXEC. + PERFORM PRINT-SPECIFIC + UNTIL SQLCODE IS NOT EQUAL TO ZERO. + IF SQLCODE NOT = 100 THEN + MOVE 'FETCH CUR2' TO UD-ERROR-MESSAGE + PERFORM SQL-ERROR-HANDLING + END-IF + EXEC SQL CLOSE CUR2 END-EXEC. + IF SQLCODE NOT = 0 THEN + MOVE 'CLOSE CUR2' TO UD-ERROR-MESSAGE + PERFORM SQL-ERROR-HANDLING + END-IF + . + PRINT-SPECIFIC. + PERFORM PRINT-A-LINE. + EXEC SQL FETCH CUR2 INTO :CUSTOMER-RECORD END-EXEC. + PRINT-A-LINE. + MOVE ACCT-NO TO ACCT-NO-O. + MOVE ACCT-LASTN TO ACCT-LASTN-O. + MOVE ACCT-FIRSTN TO ACCT-FIRSTN-O. + MOVE ACCT-ADDR3 TO ACCT-ADDR3-O. + WRITE REPREC AFTER ADVANCING 2 LINES. + + SQL-ERROR-HANDLING. + DISPLAY 'ERROR AT ' FUNCTION TRIM(UD-ERROR-MESSAGE, TRAILING) + CALL 'DSNTIAR' USING SQLCA ERROR-MESSAGE ERROR-TEXT-LEN. + PERFORM VARYING ERROR-INDEX FROM 1 BY 1 + UNTIL ERROR-INDEX > ERROR-TEXT-HBOUND + OR ERROR-TEXT(ERROR-INDEX) = SPACES + DISPLAY FUNCTION TRIM(ERROR-TEXT(ERROR-INDEX), TRAILING) + END-PERFORM + IF SQLCODE NOT = 0 AND SQLCODE NOT = 100 + MOVE 1000 TO RETURN-CODE + STOP RUN + END-IF . \ No newline at end of file diff --git a/spec/visual/samples/cobol b/spec/visual/samples/cobol index c41631ece8..07ed0d9cdd 100644 --- a/spec/visual/samples/cobol +++ b/spec/visual/samples/cobol @@ -1,6 +1,188 @@ + *----------------------- + * Copyright Contributors to the COBOL Programming Course + * SPDX-License-Identifier: CC-BY-4.0 + *----------------------- IDENTIFICATION DIVISION. - PROGRAM-ID. hello-world. - *This program will be replaced by a more attractive demo + *----------------------- + PROGRAM-ID. CBLDB23 + AUTHOR. Otto B. Relational + + ENVIRONMENT DIVISION. + *-------------------- + CONFIGURATION SECTION. + INPUT-OUTPUT SECTION. + FILE-CONTROL. + SELECT REPOUT + ASSIGN TO UT-S-REPORT. + SELECT CARDIN + ASSIGN TO DA-S-CARDIN. + + DATA DIVISION. + *------------- + FILE SECTION. + FD REPOUT + RECORD CONTAINS 120 CHARACTERS + LABEL RECORDS ARE OMITTED + RECORDING MODE F + DATA RECORD IS REPREC. + 01 REPREC. + 05 ACCT-NO-O PIC X(8). + 05 ACCT-LASTN-O PIC X(20). + 05 ACCT-FIRSTN-O PIC X(15). + 05 ACCT-ADDR3-O PIC X(15). + FD CARDIN + RECORD CONTAINS 80 CHARACTERS + BLOCK CONTAINS 0 RECORDS + RECORDING MODE F + LABEL RECORDS ARE OMITTED. + 01 CARDREC PIC X(80). + + WORKING-STORAGE SECTION. + ***************************************************** + * STRUCTURE FOR INPUT * + ***************************************************** + 01 IOAREA. + 02 STATE PIC X(25). + 02 FILLER PIC X(55). + 77 INPUT-SWITCH PIC X VALUE 'Y'. + 88 NOMORE-INPUT VALUE 'N'. + ***************************************************** + * SQL INCLUDE FOR SQLCA * + ***************************************************** + EXEC SQL INCLUDE SQLCA END-EXEC. + ***************************************************** + * DECLARATIONS FOR SQL ERROR HANDLING * + ***************************************************** + 01 ERROR-MESSAGE. + 02 ERROR-LEN PIC S9(4) COMP VALUE +1320. + 02 ERROR-TEXT PIC X(132) OCCURS 10 TIMES + INDEXED BY ERROR-INDEX. + 77 ERROR-TEXT-LEN PIC S9(9) COMP VALUE +132. + 77 ERROR-TEXT-HBOUND PIC S9(9) COMP VALUE +10. + * USER DEFINED ERROR MESSAGE + 01 UD-ERROR-MESSAGE PIC X(80) VALUE SPACES. + ***************************************************** + * SQL DECLARATION FOR VIEW ACCOUNTS * + ***************************************************** + EXEC SQL DECLARE Z#####T TABLE + (ACCTNO CHAR(8) NOT NULL, + LIMIT DECIMAL(9,2) , + BALANCE DECIMAL(9,2) , + SURNAME CHAR(20) NOT NULL, + FIRSTN CHAR(15) NOT NULL, + ADDRESS1 CHAR(25) NOT NULL, + ADDRESS2 CHAR(20) NOT NULL, + ADDRESS3 CHAR(15) NOT NULL, + RESERVED CHAR(7) NOT NULL, + COMMENTS CHAR(50) NOT NULL) + END-EXEC. + ***************************************************** + * SQL CURSORS * + ***************************************************** + EXEC SQL DECLARE CUR1 CURSOR FOR + SELECT * FROM Z#####T + END-EXEC. + EXEC SQL DECLARE CUR2 CURSOR FOR + SELECT * + FROM Z#####T + WHERE ADDRESS3 = :STATE + END-EXEC. + ***************************************************** + * STRUCTURE FOR CUSTOMER RECORD * + ***************************************************** + 01 CUSTOMER-RECORD. + 02 ACCT-NO PIC X(8). + 02 ACCT-LIMIT PIC S9(7)V99 COMP-3. + 02 ACCT-BALANCE PIC S9(7)V99 COMP-3. + 02 ACCT-LASTN PIC X(20). + 02 ACCT-FIRSTN PIC X(15). + 02 ACCT-ADDR1 PIC X(25). + 02 ACCT-ADDR2 PIC X(20). + 02 ACCT-ADDR3 PIC X(15). + 02 ACCT-RSRVD PIC X(7). + 02 ACCT-COMMENT PIC X(50). + PROCEDURE DIVISION. - DISPLAY "Hello, world!" + *------------------ + PROG-START. + OPEN INPUT CARDIN. + OPEN OUTPUT REPOUT. + READ CARDIN RECORD INTO IOAREA + AT END SET NOMORE-INPUT TO TRUE. + PERFORM PROCESS-INPUT + UNTIL NOMORE-INPUT. + PROG-END. + CLOSE CARDIN + REPOUT. + GOBACK. + PROCESS-INPUT. + IF STATE = '*' + PERFORM GET-ALL + ELSE + PERFORM GET-SPECIFIC. + READ CARDIN RECORD INTO IOAREA + AT END SET NOMORE-INPUT TO TRUE. + GET-ALL. + EXEC SQL OPEN CUR1 END-EXEC. + IF SQLCODE NOT = 0 THEN + MOVE 'OPEN CUR1' TO UD-ERROR-MESSAGE + PERFORM SQL-ERROR-HANDLING + END-IF + EXEC SQL FETCH CUR1 INTO :CUSTOMER-RECORD END-EXEC. + PERFORM PRINT-ALL + UNTIL SQLCODE IS NOT EQUAL TO ZERO. + IF SQLCODE NOT = 100 THEN + MOVE 'FETCH CUR1' TO UD-ERROR-MESSAGE + PERFORM SQL-ERROR-HANDLING + END-IF + EXEC SQL CLOSE CUR1 END-EXEC. + IF SQLCODE NOT = 0 THEN + MOVE 'CLOSE CUR1' TO UD-ERROR-MESSAGE + PERFORM SQL-ERROR-HANDLING + END-IF + . + PRINT-ALL. + PERFORM PRINT-A-LINE. + EXEC SQL FETCH CUR1 INTO :CUSTOMER-RECORD END-EXEC. + GET-SPECIFIC. + EXEC SQL OPEN CUR2 END-EXEC. + IF SQLCODE NOT = 0 THEN + MOVE 'OPEN CUR2' TO UD-ERROR-MESSAGE + PERFORM SQL-ERROR-HANDLING + END-IF + EXEC SQL FETCH CUR2 INTO :CUSTOMER-RECORD END-EXEC. + PERFORM PRINT-SPECIFIC + UNTIL SQLCODE IS NOT EQUAL TO ZERO. + IF SQLCODE NOT = 100 THEN + MOVE 'FETCH CUR2' TO UD-ERROR-MESSAGE + PERFORM SQL-ERROR-HANDLING + END-IF + EXEC SQL CLOSE CUR2 END-EXEC. + IF SQLCODE NOT = 0 THEN + MOVE 'CLOSE CUR2' TO UD-ERROR-MESSAGE + PERFORM SQL-ERROR-HANDLING + END-IF + . + PRINT-SPECIFIC. + PERFORM PRINT-A-LINE. + EXEC SQL FETCH CUR2 INTO :CUSTOMER-RECORD END-EXEC. + PRINT-A-LINE. + MOVE ACCT-NO TO ACCT-NO-O. + MOVE ACCT-LASTN TO ACCT-LASTN-O. + MOVE ACCT-FIRSTN TO ACCT-FIRSTN-O. + MOVE ACCT-ADDR3 TO ACCT-ADDR3-O. + WRITE REPREC AFTER ADVANCING 2 LINES. + + SQL-ERROR-HANDLING. + DISPLAY 'ERROR AT ' FUNCTION TRIM(UD-ERROR-MESSAGE, TRAILING) + CALL 'DSNTIAR' USING SQLCA ERROR-MESSAGE ERROR-TEXT-LEN. + PERFORM VARYING ERROR-INDEX FROM 1 BY 1 + UNTIL ERROR-INDEX > ERROR-TEXT-HBOUND + OR ERROR-TEXT(ERROR-INDEX) = SPACES + DISPLAY FUNCTION TRIM(ERROR-TEXT(ERROR-INDEX), TRAILING) + END-PERFORM + IF SQLCODE NOT = 0 AND SQLCODE NOT = 100 + MOVE 1000 TO RETURN-CODE + STOP RUN + END-IF . \ No newline at end of file From cb40aa80f8f1a09764b6ca78e7ea0834d62bc135 Mon Sep 17 00:00:00 2001 From: Bart Broere Date: Sun, 8 Sep 2024 12:30:32 +0200 Subject: [PATCH 09/18] Different script --- lib/rouge/demos/cobol | 250 ++++++++++++-------------------------- lib/rouge/lexers/cobol.rb | 3 + spec/visual/samples/cobol | 250 ++++++++++++-------------------------- 3 files changed, 163 insertions(+), 340 deletions(-) diff --git a/lib/rouge/demos/cobol b/lib/rouge/demos/cobol index 07ed0d9cdd..9911fe20f2 100644 --- a/lib/rouge/demos/cobol +++ b/lib/rouge/demos/cobol @@ -4,185 +4,95 @@ *----------------------- IDENTIFICATION DIVISION. *----------------------- - PROGRAM-ID. CBLDB23 - AUTHOR. Otto B. Relational - + PROGRAM-ID. CBL0001 + AUTHOR. Otto B. Fun. + *-------------------- ENVIRONMENT DIVISION. *-------------------- - CONFIGURATION SECTION. INPUT-OUTPUT SECTION. FILE-CONTROL. - SELECT REPOUT - ASSIGN TO UT-S-REPORT. - SELECT CARDIN - ASSIGN TO DA-S-CARDIN. - + SELECT PRINT-LINE ASSIGN TO PRTLINE. + SELECT ACCT-REC ASSIGN TO ACCTREC. + *SELECT clause creates an internal file name + *ASSIGN clause creates a name for an external data source, + *which is associated with the JCL DDNAME used by the z/OS + *e.g. ACCTREC is linked in JCL file CBL0001J to &SYSUID..DATA + *where &SYSUID. stands for Your z/OS user id + *e.g. if Your user id is Z54321, + *the data set used for ACCTREC is Z54321.DATA + *------------- DATA DIVISION. *------------- FILE SECTION. - FD REPOUT - RECORD CONTAINS 120 CHARACTERS - LABEL RECORDS ARE OMITTED - RECORDING MODE F - DATA RECORD IS REPREC. - 01 REPREC. + FD PRINT-LINE RECORDING MODE F. + 01 PRINT-REC. 05 ACCT-NO-O PIC X(8). - 05 ACCT-LASTN-O PIC X(20). - 05 ACCT-FIRSTN-O PIC X(15). - 05 ACCT-ADDR3-O PIC X(15). - FD CARDIN - RECORD CONTAINS 80 CHARACTERS - BLOCK CONTAINS 0 RECORDS - RECORDING MODE F - LABEL RECORDS ARE OMITTED. - 01 CARDREC PIC X(80). - + 05 ACCT-LIMIT-O PIC $$,$$$,$$9.99. + 05 ACCT-BALANCE-O PIC $$,$$$,$$9.99. + * PIC $$,$$$,$$9.99 -- Alternative for PIC on chapter 7.2.3, + * using $ to allow values of different amounts of digits + * and .99 instead of v99 to allow period display on output + 05 LAST-NAME-O PIC X(20). + 05 FIRST-NAME-O PIC X(15). + 05 COMMENTS-O PIC X(50). + * since the level 05 is higher than level 01, + * all variables belong to PRINT-REC (see chapter 7.3.3) + * + FD ACCT-REC RECORDING MODE F. + 01 ACCT-FIELDS. + 05 ACCT-NO PIC X(8). + 05 ACCT-LIMIT PIC S9(7)V99 COMP-3. + 05 ACCT-BALANCE PIC S9(7)V99 COMP-3. + * PIC S9(7)v99 -- seven-digit plus a sign digit value + * COMP-3 -- packed BCD (binary coded decimal) representation + 05 LAST-NAME PIC X(20). + 05 FIRST-NAME PIC X(15). + 05 CLIENT-ADDR. + 10 STREET-ADDR PIC X(25). + 10 CITY-COUNTY PIC X(20). + 10 USA-STATE PIC X(15). + 05 RESERVED PIC X(7). + 05 COMMENTS PIC X(50). + * WORKING-STORAGE SECTION. - ***************************************************** - * STRUCTURE FOR INPUT * - ***************************************************** - 01 IOAREA. - 02 STATE PIC X(25). - 02 FILLER PIC X(55). - 77 INPUT-SWITCH PIC X VALUE 'Y'. - 88 NOMORE-INPUT VALUE 'N'. - ***************************************************** - * SQL INCLUDE FOR SQLCA * - ***************************************************** - EXEC SQL INCLUDE SQLCA END-EXEC. - ***************************************************** - * DECLARATIONS FOR SQL ERROR HANDLING * - ***************************************************** - 01 ERROR-MESSAGE. - 02 ERROR-LEN PIC S9(4) COMP VALUE +1320. - 02 ERROR-TEXT PIC X(132) OCCURS 10 TIMES - INDEXED BY ERROR-INDEX. - 77 ERROR-TEXT-LEN PIC S9(9) COMP VALUE +132. - 77 ERROR-TEXT-HBOUND PIC S9(9) COMP VALUE +10. - * USER DEFINED ERROR MESSAGE - 01 UD-ERROR-MESSAGE PIC X(80) VALUE SPACES. - ***************************************************** - * SQL DECLARATION FOR VIEW ACCOUNTS * - ***************************************************** - EXEC SQL DECLARE Z#####T TABLE - (ACCTNO CHAR(8) NOT NULL, - LIMIT DECIMAL(9,2) , - BALANCE DECIMAL(9,2) , - SURNAME CHAR(20) NOT NULL, - FIRSTN CHAR(15) NOT NULL, - ADDRESS1 CHAR(25) NOT NULL, - ADDRESS2 CHAR(20) NOT NULL, - ADDRESS3 CHAR(15) NOT NULL, - RESERVED CHAR(7) NOT NULL, - COMMENTS CHAR(50) NOT NULL) - END-EXEC. - ***************************************************** - * SQL CURSORS * - ***************************************************** - EXEC SQL DECLARE CUR1 CURSOR FOR - SELECT * FROM Z#####T - END-EXEC. - EXEC SQL DECLARE CUR2 CURSOR FOR - SELECT * - FROM Z#####T - WHERE ADDRESS3 = :STATE - END-EXEC. - ***************************************************** - * STRUCTURE FOR CUSTOMER RECORD * - ***************************************************** - 01 CUSTOMER-RECORD. - 02 ACCT-NO PIC X(8). - 02 ACCT-LIMIT PIC S9(7)V99 COMP-3. - 02 ACCT-BALANCE PIC S9(7)V99 COMP-3. - 02 ACCT-LASTN PIC X(20). - 02 ACCT-FIRSTN PIC X(15). - 02 ACCT-ADDR1 PIC X(25). - 02 ACCT-ADDR2 PIC X(20). - 02 ACCT-ADDR3 PIC X(15). - 02 ACCT-RSRVD PIC X(7). - 02 ACCT-COMMENT PIC X(50). - + 01 FLAGS. + 05 LASTREC PIC X VALUE SPACE. + *------------------ PROCEDURE DIVISION. *------------------ - PROG-START. - OPEN INPUT CARDIN. - OPEN OUTPUT REPOUT. - READ CARDIN RECORD INTO IOAREA - AT END SET NOMORE-INPUT TO TRUE. - PERFORM PROCESS-INPUT - UNTIL NOMORE-INPUT. - PROG-END. - CLOSE CARDIN - REPOUT. - GOBACK. - PROCESS-INPUT. - IF STATE = '*' - PERFORM GET-ALL - ELSE - PERFORM GET-SPECIFIC. - READ CARDIN RECORD INTO IOAREA - AT END SET NOMORE-INPUT TO TRUE. - GET-ALL. - EXEC SQL OPEN CUR1 END-EXEC. - IF SQLCODE NOT = 0 THEN - MOVE 'OPEN CUR1' TO UD-ERROR-MESSAGE - PERFORM SQL-ERROR-HANDLING - END-IF - EXEC SQL FETCH CUR1 INTO :CUSTOMER-RECORD END-EXEC. - PERFORM PRINT-ALL - UNTIL SQLCODE IS NOT EQUAL TO ZERO. - IF SQLCODE NOT = 100 THEN - MOVE 'FETCH CUR1' TO UD-ERROR-MESSAGE - PERFORM SQL-ERROR-HANDLING - END-IF - EXEC SQL CLOSE CUR1 END-EXEC. - IF SQLCODE NOT = 0 THEN - MOVE 'CLOSE CUR1' TO UD-ERROR-MESSAGE - PERFORM SQL-ERROR-HANDLING - END-IF - . - PRINT-ALL. - PERFORM PRINT-A-LINE. - EXEC SQL FETCH CUR1 INTO :CUSTOMER-RECORD END-EXEC. - GET-SPECIFIC. - EXEC SQL OPEN CUR2 END-EXEC. - IF SQLCODE NOT = 0 THEN - MOVE 'OPEN CUR2' TO UD-ERROR-MESSAGE - PERFORM SQL-ERROR-HANDLING - END-IF - EXEC SQL FETCH CUR2 INTO :CUSTOMER-RECORD END-EXEC. - PERFORM PRINT-SPECIFIC - UNTIL SQLCODE IS NOT EQUAL TO ZERO. - IF SQLCODE NOT = 100 THEN - MOVE 'FETCH CUR2' TO UD-ERROR-MESSAGE - PERFORM SQL-ERROR-HANDLING - END-IF - EXEC SQL CLOSE CUR2 END-EXEC. - IF SQLCODE NOT = 0 THEN - MOVE 'CLOSE CUR2' TO UD-ERROR-MESSAGE - PERFORM SQL-ERROR-HANDLING - END-IF - . - PRINT-SPECIFIC. - PERFORM PRINT-A-LINE. - EXEC SQL FETCH CUR2 INTO :CUSTOMER-RECORD END-EXEC. - PRINT-A-LINE. - MOVE ACCT-NO TO ACCT-NO-O. - MOVE ACCT-LASTN TO ACCT-LASTN-O. - MOVE ACCT-FIRSTN TO ACCT-FIRSTN-O. - MOVE ACCT-ADDR3 TO ACCT-ADDR3-O. - WRITE REPREC AFTER ADVANCING 2 LINES. - - SQL-ERROR-HANDLING. - DISPLAY 'ERROR AT ' FUNCTION TRIM(UD-ERROR-MESSAGE, TRAILING) - CALL 'DSNTIAR' USING SQLCA ERROR-MESSAGE ERROR-TEXT-LEN. - PERFORM VARYING ERROR-INDEX FROM 1 BY 1 - UNTIL ERROR-INDEX > ERROR-TEXT-HBOUND - OR ERROR-TEXT(ERROR-INDEX) = SPACES - DISPLAY FUNCTION TRIM(ERROR-TEXT(ERROR-INDEX), TRAILING) + OPEN-FILES. + OPEN INPUT ACCT-REC. + OPEN OUTPUT PRINT-LINE. + * + READ-NEXT-RECORD. + PERFORM READ-RECORD + * The previous statement is needed before entering the loop. + * Both the loop condition LASTREC = 'Y' + * and the call to WRITE-RECORD depend on READ-RECORD having + * been executed before. + * The loop starts at the next line with PERFORM UNTIL + PERFORM UNTIL LASTREC = 'Y' + PERFORM WRITE-RECORD + PERFORM READ-RECORD END-PERFORM - IF SQLCODE NOT = 0 AND SQLCODE NOT = 100 - MOVE 1000 TO RETURN-CODE - STOP RUN - END-IF - . \ No newline at end of file + . + * + CLOSE-STOP. + CLOSE ACCT-REC. + CLOSE PRINT-LINE. + GOBACK. + * + READ-RECORD. + READ ACCT-REC + AT END MOVE 'Y' TO LASTREC + END-READ. + * + WRITE-RECORD. + MOVE ACCT-NO TO ACCT-NO-O. + MOVE ACCT-LIMIT TO ACCT-LIMIT-O. + MOVE ACCT-BALANCE TO ACCT-BALANCE-O. + MOVE LAST-NAME TO LAST-NAME-O. + MOVE FIRST-NAME TO FIRST-NAME-O. + MOVE COMMENTS TO COMMENTS-O. + WRITE PRINT-REC. + * \ No newline at end of file diff --git a/lib/rouge/lexers/cobol.rb b/lib/rouge/lexers/cobol.rb index 34a1fd693a..9794487fe6 100644 --- a/lib/rouge/lexers/cobol.rb +++ b/lib/rouge/lexers/cobol.rb @@ -94,6 +94,9 @@ class COBOL < RegexLexer # Whitespace remaining rule %r/\s/, Text::Whitespace + + # Anything else remaining + rule %r/.+/, Text end # TODO double check string escaping in COBOL diff --git a/spec/visual/samples/cobol b/spec/visual/samples/cobol index 07ed0d9cdd..9911fe20f2 100644 --- a/spec/visual/samples/cobol +++ b/spec/visual/samples/cobol @@ -4,185 +4,95 @@ *----------------------- IDENTIFICATION DIVISION. *----------------------- - PROGRAM-ID. CBLDB23 - AUTHOR. Otto B. Relational - + PROGRAM-ID. CBL0001 + AUTHOR. Otto B. Fun. + *-------------------- ENVIRONMENT DIVISION. *-------------------- - CONFIGURATION SECTION. INPUT-OUTPUT SECTION. FILE-CONTROL. - SELECT REPOUT - ASSIGN TO UT-S-REPORT. - SELECT CARDIN - ASSIGN TO DA-S-CARDIN. - + SELECT PRINT-LINE ASSIGN TO PRTLINE. + SELECT ACCT-REC ASSIGN TO ACCTREC. + *SELECT clause creates an internal file name + *ASSIGN clause creates a name for an external data source, + *which is associated with the JCL DDNAME used by the z/OS + *e.g. ACCTREC is linked in JCL file CBL0001J to &SYSUID..DATA + *where &SYSUID. stands for Your z/OS user id + *e.g. if Your user id is Z54321, + *the data set used for ACCTREC is Z54321.DATA + *------------- DATA DIVISION. *------------- FILE SECTION. - FD REPOUT - RECORD CONTAINS 120 CHARACTERS - LABEL RECORDS ARE OMITTED - RECORDING MODE F - DATA RECORD IS REPREC. - 01 REPREC. + FD PRINT-LINE RECORDING MODE F. + 01 PRINT-REC. 05 ACCT-NO-O PIC X(8). - 05 ACCT-LASTN-O PIC X(20). - 05 ACCT-FIRSTN-O PIC X(15). - 05 ACCT-ADDR3-O PIC X(15). - FD CARDIN - RECORD CONTAINS 80 CHARACTERS - BLOCK CONTAINS 0 RECORDS - RECORDING MODE F - LABEL RECORDS ARE OMITTED. - 01 CARDREC PIC X(80). - + 05 ACCT-LIMIT-O PIC $$,$$$,$$9.99. + 05 ACCT-BALANCE-O PIC $$,$$$,$$9.99. + * PIC $$,$$$,$$9.99 -- Alternative for PIC on chapter 7.2.3, + * using $ to allow values of different amounts of digits + * and .99 instead of v99 to allow period display on output + 05 LAST-NAME-O PIC X(20). + 05 FIRST-NAME-O PIC X(15). + 05 COMMENTS-O PIC X(50). + * since the level 05 is higher than level 01, + * all variables belong to PRINT-REC (see chapter 7.3.3) + * + FD ACCT-REC RECORDING MODE F. + 01 ACCT-FIELDS. + 05 ACCT-NO PIC X(8). + 05 ACCT-LIMIT PIC S9(7)V99 COMP-3. + 05 ACCT-BALANCE PIC S9(7)V99 COMP-3. + * PIC S9(7)v99 -- seven-digit plus a sign digit value + * COMP-3 -- packed BCD (binary coded decimal) representation + 05 LAST-NAME PIC X(20). + 05 FIRST-NAME PIC X(15). + 05 CLIENT-ADDR. + 10 STREET-ADDR PIC X(25). + 10 CITY-COUNTY PIC X(20). + 10 USA-STATE PIC X(15). + 05 RESERVED PIC X(7). + 05 COMMENTS PIC X(50). + * WORKING-STORAGE SECTION. - ***************************************************** - * STRUCTURE FOR INPUT * - ***************************************************** - 01 IOAREA. - 02 STATE PIC X(25). - 02 FILLER PIC X(55). - 77 INPUT-SWITCH PIC X VALUE 'Y'. - 88 NOMORE-INPUT VALUE 'N'. - ***************************************************** - * SQL INCLUDE FOR SQLCA * - ***************************************************** - EXEC SQL INCLUDE SQLCA END-EXEC. - ***************************************************** - * DECLARATIONS FOR SQL ERROR HANDLING * - ***************************************************** - 01 ERROR-MESSAGE. - 02 ERROR-LEN PIC S9(4) COMP VALUE +1320. - 02 ERROR-TEXT PIC X(132) OCCURS 10 TIMES - INDEXED BY ERROR-INDEX. - 77 ERROR-TEXT-LEN PIC S9(9) COMP VALUE +132. - 77 ERROR-TEXT-HBOUND PIC S9(9) COMP VALUE +10. - * USER DEFINED ERROR MESSAGE - 01 UD-ERROR-MESSAGE PIC X(80) VALUE SPACES. - ***************************************************** - * SQL DECLARATION FOR VIEW ACCOUNTS * - ***************************************************** - EXEC SQL DECLARE Z#####T TABLE - (ACCTNO CHAR(8) NOT NULL, - LIMIT DECIMAL(9,2) , - BALANCE DECIMAL(9,2) , - SURNAME CHAR(20) NOT NULL, - FIRSTN CHAR(15) NOT NULL, - ADDRESS1 CHAR(25) NOT NULL, - ADDRESS2 CHAR(20) NOT NULL, - ADDRESS3 CHAR(15) NOT NULL, - RESERVED CHAR(7) NOT NULL, - COMMENTS CHAR(50) NOT NULL) - END-EXEC. - ***************************************************** - * SQL CURSORS * - ***************************************************** - EXEC SQL DECLARE CUR1 CURSOR FOR - SELECT * FROM Z#####T - END-EXEC. - EXEC SQL DECLARE CUR2 CURSOR FOR - SELECT * - FROM Z#####T - WHERE ADDRESS3 = :STATE - END-EXEC. - ***************************************************** - * STRUCTURE FOR CUSTOMER RECORD * - ***************************************************** - 01 CUSTOMER-RECORD. - 02 ACCT-NO PIC X(8). - 02 ACCT-LIMIT PIC S9(7)V99 COMP-3. - 02 ACCT-BALANCE PIC S9(7)V99 COMP-3. - 02 ACCT-LASTN PIC X(20). - 02 ACCT-FIRSTN PIC X(15). - 02 ACCT-ADDR1 PIC X(25). - 02 ACCT-ADDR2 PIC X(20). - 02 ACCT-ADDR3 PIC X(15). - 02 ACCT-RSRVD PIC X(7). - 02 ACCT-COMMENT PIC X(50). - + 01 FLAGS. + 05 LASTREC PIC X VALUE SPACE. + *------------------ PROCEDURE DIVISION. *------------------ - PROG-START. - OPEN INPUT CARDIN. - OPEN OUTPUT REPOUT. - READ CARDIN RECORD INTO IOAREA - AT END SET NOMORE-INPUT TO TRUE. - PERFORM PROCESS-INPUT - UNTIL NOMORE-INPUT. - PROG-END. - CLOSE CARDIN - REPOUT. - GOBACK. - PROCESS-INPUT. - IF STATE = '*' - PERFORM GET-ALL - ELSE - PERFORM GET-SPECIFIC. - READ CARDIN RECORD INTO IOAREA - AT END SET NOMORE-INPUT TO TRUE. - GET-ALL. - EXEC SQL OPEN CUR1 END-EXEC. - IF SQLCODE NOT = 0 THEN - MOVE 'OPEN CUR1' TO UD-ERROR-MESSAGE - PERFORM SQL-ERROR-HANDLING - END-IF - EXEC SQL FETCH CUR1 INTO :CUSTOMER-RECORD END-EXEC. - PERFORM PRINT-ALL - UNTIL SQLCODE IS NOT EQUAL TO ZERO. - IF SQLCODE NOT = 100 THEN - MOVE 'FETCH CUR1' TO UD-ERROR-MESSAGE - PERFORM SQL-ERROR-HANDLING - END-IF - EXEC SQL CLOSE CUR1 END-EXEC. - IF SQLCODE NOT = 0 THEN - MOVE 'CLOSE CUR1' TO UD-ERROR-MESSAGE - PERFORM SQL-ERROR-HANDLING - END-IF - . - PRINT-ALL. - PERFORM PRINT-A-LINE. - EXEC SQL FETCH CUR1 INTO :CUSTOMER-RECORD END-EXEC. - GET-SPECIFIC. - EXEC SQL OPEN CUR2 END-EXEC. - IF SQLCODE NOT = 0 THEN - MOVE 'OPEN CUR2' TO UD-ERROR-MESSAGE - PERFORM SQL-ERROR-HANDLING - END-IF - EXEC SQL FETCH CUR2 INTO :CUSTOMER-RECORD END-EXEC. - PERFORM PRINT-SPECIFIC - UNTIL SQLCODE IS NOT EQUAL TO ZERO. - IF SQLCODE NOT = 100 THEN - MOVE 'FETCH CUR2' TO UD-ERROR-MESSAGE - PERFORM SQL-ERROR-HANDLING - END-IF - EXEC SQL CLOSE CUR2 END-EXEC. - IF SQLCODE NOT = 0 THEN - MOVE 'CLOSE CUR2' TO UD-ERROR-MESSAGE - PERFORM SQL-ERROR-HANDLING - END-IF - . - PRINT-SPECIFIC. - PERFORM PRINT-A-LINE. - EXEC SQL FETCH CUR2 INTO :CUSTOMER-RECORD END-EXEC. - PRINT-A-LINE. - MOVE ACCT-NO TO ACCT-NO-O. - MOVE ACCT-LASTN TO ACCT-LASTN-O. - MOVE ACCT-FIRSTN TO ACCT-FIRSTN-O. - MOVE ACCT-ADDR3 TO ACCT-ADDR3-O. - WRITE REPREC AFTER ADVANCING 2 LINES. - - SQL-ERROR-HANDLING. - DISPLAY 'ERROR AT ' FUNCTION TRIM(UD-ERROR-MESSAGE, TRAILING) - CALL 'DSNTIAR' USING SQLCA ERROR-MESSAGE ERROR-TEXT-LEN. - PERFORM VARYING ERROR-INDEX FROM 1 BY 1 - UNTIL ERROR-INDEX > ERROR-TEXT-HBOUND - OR ERROR-TEXT(ERROR-INDEX) = SPACES - DISPLAY FUNCTION TRIM(ERROR-TEXT(ERROR-INDEX), TRAILING) + OPEN-FILES. + OPEN INPUT ACCT-REC. + OPEN OUTPUT PRINT-LINE. + * + READ-NEXT-RECORD. + PERFORM READ-RECORD + * The previous statement is needed before entering the loop. + * Both the loop condition LASTREC = 'Y' + * and the call to WRITE-RECORD depend on READ-RECORD having + * been executed before. + * The loop starts at the next line with PERFORM UNTIL + PERFORM UNTIL LASTREC = 'Y' + PERFORM WRITE-RECORD + PERFORM READ-RECORD END-PERFORM - IF SQLCODE NOT = 0 AND SQLCODE NOT = 100 - MOVE 1000 TO RETURN-CODE - STOP RUN - END-IF - . \ No newline at end of file + . + * + CLOSE-STOP. + CLOSE ACCT-REC. + CLOSE PRINT-LINE. + GOBACK. + * + READ-RECORD. + READ ACCT-REC + AT END MOVE 'Y' TO LASTREC + END-READ. + * + WRITE-RECORD. + MOVE ACCT-NO TO ACCT-NO-O. + MOVE ACCT-LIMIT TO ACCT-LIMIT-O. + MOVE ACCT-BALANCE TO ACCT-BALANCE-O. + MOVE LAST-NAME TO LAST-NAME-O. + MOVE FIRST-NAME TO FIRST-NAME-O. + MOVE COMMENTS TO COMMENTS-O. + WRITE PRINT-REC. + * \ No newline at end of file From 0114db01b8f48dd31e10cf4549a371a46c2a11dc Mon Sep 17 00:00:00 2001 From: Bart Broere Date: Sun, 8 Sep 2024 12:35:32 +0200 Subject: [PATCH 10/18] Comply with license for example script --- lib/rouge/demos/cobol | 5 +++++ lib/rouge/lexers/cobol.rb | 3 ++- spec/visual/samples/cobol | 5 +++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/lib/rouge/demos/cobol b/lib/rouge/demos/cobol index 9911fe20f2..719d8e46c0 100644 --- a/lib/rouge/demos/cobol +++ b/lib/rouge/demos/cobol @@ -1,4 +1,9 @@ *----------------------- + * This file was sourced from https://github.com/openmainframeproject/cobol-programming-course + * Credits: + * The course materials were made available through a joint collaboration between IBM, its clients, and + * American River College and proposed as a new project by IBM. + *----------------------- * Copyright Contributors to the COBOL Programming Course * SPDX-License-Identifier: CC-BY-4.0 *----------------------- diff --git a/lib/rouge/lexers/cobol.rb b/lib/rouge/lexers/cobol.rb index 9794487fe6..660535c4eb 100644 --- a/lib/rouge/lexers/cobol.rb +++ b/lib/rouge/lexers/cobol.rb @@ -11,9 +11,10 @@ class COBOL < RegexLexer mimetypes 'text/x-cobol' # List of COBOL keywords + # sourced from https://www.ibm.com/docs/en/cobol-zos/6.4?topic=appendixes-reserved-words KEYWORDS = %w[ ACCEPT ACCESS ACTIVE-CLASS ADD ADDRESS ADVANCING AFTER ALIGNED ALL ALLOCATE ALPHABET ALPHABETIC ALPHABETIC-LOWER - ALPHABETIC-UPPER ALPHANUMERIC ALPHANUMERIC-EDITED ALSO ALTER ALTERNATE AND ANY ANYCASE APPLY ARE AREA AREAS + ALPHABETIC-UPPER ALPHANUMERIC ALPHANUMERIC-EDITED ALSO ALTER ALTERNATE AND ANYCASE ANY APPLY ARE AREA AREAS ASCENDING ASSIGN AT AUTHOR B-AND B-NOT B-OR B-XOR BASED BASIS BEFORE BEGINNING BINARY BINARY-CHAR BINARY-DOUBLE BINARY-LONG BINARY-SHORT BIT BLANK BLOCK BOOLEAN BOTTOM BY BYTE-LENGTH CALL CANCEL CBL CD CF CH CHARACTER CHARACTERS CLASS CLASS-ID CLOCK-UNITS CLOSE COBOL CODE CODE-SET COL COLLATING COLS COLUMN COLUMNS COM-REG COMMA diff --git a/spec/visual/samples/cobol b/spec/visual/samples/cobol index 9911fe20f2..719d8e46c0 100644 --- a/spec/visual/samples/cobol +++ b/spec/visual/samples/cobol @@ -1,4 +1,9 @@ *----------------------- + * This file was sourced from https://github.com/openmainframeproject/cobol-programming-course + * Credits: + * The course materials were made available through a joint collaboration between IBM, its clients, and + * American River College and proposed as a new project by IBM. + *----------------------- * Copyright Contributors to the COBOL Programming Course * SPDX-License-Identifier: CC-BY-4.0 *----------------------- From 7c6a9340247c2a979e4773d25b1a6c9a671622d6 Mon Sep 17 00:00:00 2001 From: Bart Broere Date: Sun, 8 Sep 2024 12:40:32 +0200 Subject: [PATCH 11/18] Fix EOF issue with linter --- lib/rouge/demos/cobol | 2 +- spec/visual/samples/cobol | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/rouge/demos/cobol b/lib/rouge/demos/cobol index 719d8e46c0..ad2691affe 100644 --- a/lib/rouge/demos/cobol +++ b/lib/rouge/demos/cobol @@ -100,4 +100,4 @@ MOVE FIRST-NAME TO FIRST-NAME-O. MOVE COMMENTS TO COMMENTS-O. WRITE PRINT-REC. - * \ No newline at end of file + * diff --git a/spec/visual/samples/cobol b/spec/visual/samples/cobol index 719d8e46c0..ad2691affe 100644 --- a/spec/visual/samples/cobol +++ b/spec/visual/samples/cobol @@ -100,4 +100,4 @@ MOVE FIRST-NAME TO FIRST-NAME-O. MOVE COMMENTS TO COMMENTS-O. WRITE PRINT-REC. - * \ No newline at end of file + * From e1a9d211fb4ee812252a206c4ba800c2d5bec41b Mon Sep 17 00:00:00 2001 From: Bart Broere Date: Sun, 8 Sep 2024 15:46:00 +0200 Subject: [PATCH 12/18] Use negative lookbehind --- lib/rouge/lexers/cobol.rb | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/rouge/lexers/cobol.rb b/lib/rouge/lexers/cobol.rb index 660535c4eb..6839196d51 100644 --- a/lib/rouge/lexers/cobol.rb +++ b/lib/rouge/lexers/cobol.rb @@ -73,9 +73,9 @@ class COBOL < RegexLexer rule %r/'/, Str::Single, :string_single # Keywords and divisions - rule %r/\b(#{DIVISIONS.join('|')})\b/i, Keyword::Declaration - rule %r/\b(#{SECTIONS.join('|')})\b/i, Keyword::Namespace - rule %r/\b(#{KEYWORDS.join('|')})\b/i, Keyword + rule %r/(?.*/, Comment::Single From 8ca12ea91bbdf6e4bc1911b839cfe56f66e1f19a Mon Sep 17 00:00:00 2001 From: Bart Broere Date: Fri, 27 Sep 2024 18:51:17 +0200 Subject: [PATCH 13/18] Apply suggestions from code review Co-authored-by: Tan Le --- lib/rouge/lexers/cobol.rb | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/lib/rouge/lexers/cobol.rb b/lib/rouge/lexers/cobol.rb index 6839196d51..d1b1599402 100644 --- a/lib/rouge/lexers/cobol.rb +++ b/lib/rouge/lexers/cobol.rb @@ -56,13 +56,17 @@ class COBOL < RegexLexer ] # COBOL divisions and sections - DIVISIONS = %w[ - IDENTIFICATION ENVIRONMENT DATA PROCEDURE DIVISION - ] + def divisions + @divisions ||= %w( + IDENTIFICATION ENVIRONMENT DATA PROCEDURE DIVISION + ) + end - SECTIONS = %w[ - CONFIGURATION INPUT-OUTPUT FILE WORKING-STORAGE LOCAL-STORAGE LINKAGE SECTION - ] + def sections + @sections ||= %w( + CONFIGURATION INPUT-OUTPUT FILE WORKING-STORAGE LOCAL-STORAGE LINKAGE SECTION + ) + end state :root do # First detect the comments From aa769d98e82d0c11726a0dc0346bc78223101531 Mon Sep 17 00:00:00 2001 From: Bart Broere Date: Fri, 27 Sep 2024 20:04:19 +0200 Subject: [PATCH 14/18] Move constants into instance variables --- lib/rouge/lexers/cobol.rb | 123 +++++++++++++++++++------------------- 1 file changed, 63 insertions(+), 60 deletions(-) diff --git a/lib/rouge/lexers/cobol.rb b/lib/rouge/lexers/cobol.rb index d1b1599402..5d343a98d4 100644 --- a/lib/rouge/lexers/cobol.rb +++ b/lib/rouge/lexers/cobol.rb @@ -10,65 +10,68 @@ class COBOL < RegexLexer filenames '*.cob', '*.cbl' mimetypes 'text/x-cobol' - # List of COBOL keywords - # sourced from https://www.ibm.com/docs/en/cobol-zos/6.4?topic=appendixes-reserved-words - KEYWORDS = %w[ - ACCEPT ACCESS ACTIVE-CLASS ADD ADDRESS ADVANCING AFTER ALIGNED ALL ALLOCATE ALPHABET ALPHABETIC ALPHABETIC-LOWER - ALPHABETIC-UPPER ALPHANUMERIC ALPHANUMERIC-EDITED ALSO ALTER ALTERNATE AND ANYCASE ANY APPLY ARE AREA AREAS - ASCENDING ASSIGN AT AUTHOR B-AND B-NOT B-OR B-XOR BASED BASIS BEFORE BEGINNING BINARY BINARY-CHAR BINARY-DOUBLE - BINARY-LONG BINARY-SHORT BIT BLANK BLOCK BOOLEAN BOTTOM BY BYTE-LENGTH CALL CANCEL CBL CD CF CH CHARACTER - CHARACTERS CLASS CLASS-ID CLOCK-UNITS CLOSE COBOL CODE CODE-SET COL COLLATING COLS COLUMN COLUMNS COM-REG COMMA - COMMON COMMUNICATION COMP-1 COMP-2 COMP-3 COMP-4 COMP-5 COMP COMPUTATIONAL-1 COMPUTATIONAL-2 - COMPUTATIONAL-3 COMPUTATIONAL-4 COMPUTATIONAL-5 COMPUTATIONAL COMPUTE CONDITION CONSTANT CONTAINS CONTENT - CONTINUE CONTROL CONTROLS CONVERTING COPY CORR CORRESPONDING COUNT CRT CURRENCY CURSOR DATA-POINTER DATE - DATE-COMPILED DATE-WRITTEN DAY DAY-OF-WEEK DBCS DE DEBUG-CONTENTS DEBUG-ITEM DEBUG-LINE DEBUG-NAME DEBUG-SUB-1 - DEBUG-SUB-2 DEBUG-SUB-3 DEBUGGING DECIMAL-POINT DECLARATIVES DEFAULT DELETE DELIMITED DELIMITER DEPENDING - DESCENDING DESTINATION DETAIL DISABLE DISPLAY-1 DISPLAY DIVIDE DOWN DUPLICATES DYNAMIC EC EGCS EGI - EJECT ELSE EMI ENABLE END-ACCEPT END-ADD END-CALL END-COMPUTE END-DELETE END-DISPLAY END-DIVIDE END-EVALUATE - END-EXEC END-IF END-INVOKE END-JSON END-MULTIPLY END-OF-PAGE END-PERFORM END-READ END-RECEIVE END-RETURN - END-REWRITE END-SEARCH END-START END-STRING END-SUBTRACT END-UNSTRING END-WRITE END-XML ENDING END ENTER ENTRY - EO EOP EQUAL ERROR ESI EVALUATE EVERY EXCEPTION EXCEPTION-OBJECT EXEC EXECUTE EXIT EXTEND EXTERNAL - FACTORY FALSE FD FILE-CONTROL FILLER FINAL FIRST FLOAT-EXTENDED FLOAT-LONG FLOAT-SHORT FOOTING FOR FORMAT - FREE FROM FUNCTION FUNCTION-ID FUNCTION-POINTER GENERATE GET GIVING GLOBAL GO GOBACK GREATER GROUP GROUP-USAGE - HEADING HIGH-VALUE HIGH-VALUES I-O-CONTROL I-O ID IF IN INDEX INDEXED INDICATE INHERITS INITIAL - INITIALIZE INITIATE INPUT INSERT INSPECT INSTALLATION INTERFACE INTERFACE-ID INTO INVALID INVOKE - IS JAVA JNIENVPTR JSON JSON-CODE JSON-STATUS JUST JUSTIFIED KANJI KEY LABEL LAST LEADING LEFT LENGTH LESS LIMIT - LIMITS LINAGE-COUNTER LINAGE LINE-COUNTER LINES LINE LOCALE LOCK LOW-VALUE LOW-VALUES - MEMORY MERGE MESSAGE METHOD METHOD-ID MINUS MODE MODULES MORE-LABELS MOVE MULTIPLE MULTIPLY NATIONAL - NATIONAL-EDITED NATIVE NEGATIVE NESTED NEXT NO NOT NULL NULLS NUMBER NUMERIC NUMERIC-EDITED OBJECT - OBJECT-COMPUTER OBJECT-REFERENCE OCCURS OF OFF OMITTED ON OPEN OPTIONAL OPTIONS OR ORDER ORGANIZATION - OTHER OUTPUT OVERFLOW OVERRIDE PACKED-DECIMAL PADDING PAGE PAGE-COUNTER PASSWORD PERFORM PF PH PIC PICTURE - PLUS POINTER- POINTER-31 POINTER-32 POINTER-64 POINTER POSITION POSITIVE PRESENT PRINTING - PROCEDURE-POINTER PROCEDURES PROCEED PROCESSING PROGRAM-ID PROGRAM-POINTER PROGRAM PROPERTY PROTOTYPE - PURGE QUEUE QUOTE QUOTES RAISE RAISING RANDOM RD READ READY RECEIVE RECORD RECORDING RECORDS RECURSIVE REDEFINES - REEL REFERENCE REFERENCES RELATIVE RELEASE RELOAD REMAINDER REMOVAL RENAMES REPLACE REPLACING REPORT REPORTING - REPORTS REPOSITORY RERUN RESERVE RESET RESUME RETRY RETURN RETURN-CODE RETURNING REVERSED REWIND REWRITE RF RH - RIGHT ROUNDED RUN SAME SCREEN SD SEARCH SECTION SECURITY SEGMENT SEGMENT-LIMIT SELECT SELF SEND SENTENCE - SEPARATE SEQUENCE SEQUENTIAL SERVICE SET SHARING SHIFT-IN SHIFT-OUT SIGN SIZE SKIP1 SKIP2 SKIP3 - SORT-CONTROL SORT-CORE-SIZE SORT-FILE-SIZE SORT-MERGE SORT-MESSAGE SORT-MODE-SIZE SORT-RETURN SORT - SOURCE-COMPUTER SOURCES SOURCE SPACE SPACES SPECIAL-NAMES SQL SQLIMS STANDARD-1 STANDARD-2 STANDARD START STATUS STOP - STRING SUB-QUEUE-1 SUB-QUEUE-2 SUB-QUEUE-3 SUBTRACT SUM SUPER SUPPRESS SYMBOLIC SYNC SYNCHRONIZED SYSTEM-DEFAULT - TABLE TALLY TALLYING TAPE TERMINAL TERMINATE TEST TEXT THAN THEN THROUGH THRU TIME TIMES TITLE TO TOP TRACE - TRAILING TRUE TYPE TYPEDEF UNIT UNIVERSAL UNLOCK UNSTRING UNTIL UP UPON USAGE USE USER-DEFAULT USING UTF-8 - VAL-STATUS VALID VALIDATE VALIDATE-STATUS VALUE VALUES VARYING VOLATILE WHEN WHEN-COMPILED WITH WORDS - WRITE WRITE-ONLY XML-CODE XML-EVENT XML-INFORMATION XML-NAMESPACE XML-NAMESPACE-PREFIX - XML-NNAMESPACE XML-NNAMESPACE-PREFIX XML-NTEXT XML-SCHEMA XML-TEXT XML ZERO ZEROES ZEROS - ] - - # COBOL divisions and sections - def divisions - @divisions ||= %w( + state :root do + + # COBOL divisions and sections + def divisions + @divisions ||= %w[ IDENTIFICATION ENVIRONMENT DATA PROCEDURE DIVISION - ) - end + ] + end - def sections - @sections ||= %w( + def sections + @sections ||= %w[ CONFIGURATION INPUT-OUTPUT FILE WORKING-STORAGE LOCAL-STORAGE LINKAGE SECTION - ) - end + ] + end + + def keywords + # List of COBOL keywords + # sourced from https://www.ibm.com/docs/en/cobol-zos/6.4?topic=appendixes-reserved-words + @keywords ||= %w[ + ACCEPT ACCESS ACTIVE-CLASS ADD ADDRESS ADVANCING AFTER ALIGNED ALL ALLOCATE ALPHABET ALPHABETIC ALPHABETIC-LOWER + ALPHABETIC-UPPER ALPHANUMERIC ALPHANUMERIC-EDITED ALSO ALTER ALTERNATE AND ANYCASE ANY APPLY ARE AREA AREAS + ASCENDING ASSIGN AT AUTHOR B-AND B-NOT B-OR B-XOR BASED BASIS BEFORE BEGINNING BINARY BINARY-CHAR BINARY-DOUBLE + BINARY-LONG BINARY-SHORT BIT BLANK BLOCK BOOLEAN BOTTOM BY BYTE-LENGTH CALL CANCEL CBL CD CF CH CHARACTER + CHARACTERS CLASS CLASS-ID CLOCK-UNITS CLOSE COBOL CODE CODE-SET COL COLLATING COLS COLUMN COLUMNS COM-REG COMMA + COMMON COMMUNICATION COMP-1 COMP-2 COMP-3 COMP-4 COMP-5 COMP COMPUTATIONAL-1 COMPUTATIONAL-2 + COMPUTATIONAL-3 COMPUTATIONAL-4 COMPUTATIONAL-5 COMPUTATIONAL COMPUTE CONDITION CONSTANT CONTAINS CONTENT + CONTINUE CONTROL CONTROLS CONVERTING COPY CORR CORRESPONDING COUNT CRT CURRENCY CURSOR DATA-POINTER DATE + DATE-COMPILED DATE-WRITTEN DAY DAY-OF-WEEK DBCS DE DEBUG-CONTENTS DEBUG-ITEM DEBUG-LINE DEBUG-NAME DEBUG-SUB-1 + DEBUG-SUB-2 DEBUG-SUB-3 DEBUGGING DECIMAL-POINT DECLARATIVES DEFAULT DELETE DELIMITED DELIMITER DEPENDING + DESCENDING DESTINATION DETAIL DISABLE DISPLAY-1 DISPLAY DIVIDE DOWN DUPLICATES DYNAMIC EC EGCS EGI + EJECT ELSE EMI ENABLE END-ACCEPT END-ADD END-CALL END-COMPUTE END-DELETE END-DISPLAY END-DIVIDE END-EVALUATE + END-EXEC END-IF END-INVOKE END-JSON END-MULTIPLY END-OF-PAGE END-PERFORM END-READ END-RECEIVE END-RETURN + END-REWRITE END-SEARCH END-START END-STRING END-SUBTRACT END-UNSTRING END-WRITE END-XML ENDING END ENTER ENTRY + EO EOP EQUAL ERROR ESI EVALUATE EVERY EXCEPTION EXCEPTION-OBJECT EXEC EXECUTE EXIT EXTEND EXTERNAL + FACTORY FALSE FD FILE-CONTROL FILLER FINAL FIRST FLOAT-EXTENDED FLOAT-LONG FLOAT-SHORT FOOTING FOR FORMAT + FREE FROM FUNCTION FUNCTION-ID FUNCTION-POINTER GENERATE GET GIVING GLOBAL GO GOBACK GREATER GROUP GROUP-USAGE + HEADING HIGH-VALUE HIGH-VALUES I-O-CONTROL I-O ID IF IN INDEX INDEXED INDICATE INHERITS INITIAL + INITIALIZE INITIATE INPUT INSERT INSPECT INSTALLATION INTERFACE INTERFACE-ID INTO INVALID INVOKE + IS JAVA JNIENVPTR JSON JSON-CODE JSON-STATUS JUST JUSTIFIED KANJI KEY LABEL LAST LEADING LEFT LENGTH LESS LIMIT + LIMITS LINAGE-COUNTER LINAGE LINE-COUNTER LINES LINE LOCALE LOCK LOW-VALUE LOW-VALUES + MEMORY MERGE MESSAGE METHOD METHOD-ID MINUS MODE MODULES MORE-LABELS MOVE MULTIPLE MULTIPLY NATIONAL + NATIONAL-EDITED NATIVE NEGATIVE NESTED NEXT NO NOT NULL NULLS NUMBER NUMERIC NUMERIC-EDITED OBJECT + OBJECT-COMPUTER OBJECT-REFERENCE OCCURS OF OFF OMITTED ON OPEN OPTIONAL OPTIONS OR ORDER ORGANIZATION + OTHER OUTPUT OVERFLOW OVERRIDE PACKED-DECIMAL PADDING PAGE PAGE-COUNTER PASSWORD PERFORM PF PH PIC PICTURE + PLUS POINTER- POINTER-31 POINTER-32 POINTER-64 POINTER POSITION POSITIVE PRESENT PRINTING + PROCEDURE-POINTER PROCEDURES PROCEED PROCESSING PROGRAM-ID PROGRAM-POINTER PROGRAM PROPERTY PROTOTYPE + PURGE QUEUE QUOTE QUOTES RAISE RAISING RANDOM RD READ READY RECEIVE RECORD RECORDING RECORDS RECURSIVE REDEFINES + REEL REFERENCE REFERENCES RELATIVE RELEASE RELOAD REMAINDER REMOVAL RENAMES REPLACE REPLACING REPORT REPORTING + REPORTS REPOSITORY RERUN RESERVE RESET RESUME RETRY RETURN RETURN-CODE RETURNING REVERSED REWIND REWRITE RF RH + RIGHT ROUNDED RUN SAME SCREEN SD SEARCH SECTION SECURITY SEGMENT SEGMENT-LIMIT SELECT SELF SEND SENTENCE + SEPARATE SEQUENCE SEQUENTIAL SERVICE SET SHARING SHIFT-IN SHIFT-OUT SIGN SIZE SKIP1 SKIP2 SKIP3 + SORT-CONTROL SORT-CORE-SIZE SORT-FILE-SIZE SORT-MERGE SORT-MESSAGE SORT-MODE-SIZE SORT-RETURN SORT + SOURCE-COMPUTER SOURCES SOURCE SPACE SPACES SPECIAL-NAMES SQL SQLIMS STANDARD-1 STANDARD-2 STANDARD START STATUS STOP + STRING SUB-QUEUE-1 SUB-QUEUE-2 SUB-QUEUE-3 SUBTRACT SUM SUPER SUPPRESS SYMBOLIC SYNC SYNCHRONIZED SYSTEM-DEFAULT + TABLE TALLY TALLYING TAPE TERMINAL TERMINATE TEST TEXT THAN THEN THROUGH THRU TIME TIMES TITLE TO TOP TRACE + TRAILING TRUE TYPE TYPEDEF UNIT UNIVERSAL UNLOCK UNSTRING UNTIL UP UPON USAGE USE USER-DEFAULT USING UTF-8 + VAL-STATUS VALID VALIDATE VALIDATE-STATUS VALUE VALUES VARYING VOLATILE WHEN WHEN-COMPILED WITH WORDS + WRITE WRITE-ONLY XML-CODE XML-EVENT XML-INFORMATION XML-NAMESPACE XML-NAMESPACE-PREFIX + XML-NNAMESPACE XML-NNAMESPACE-PREFIX XML-NTEXT XML-SCHEMA XML-TEXT XML ZERO ZEROES ZEROS + ] + end - state :root do # First detect the comments rule %r/^( \*).*|^(^Debug \*).*/, Comment::Special @@ -77,9 +80,9 @@ def sections rule %r/'/, Str::Single, :string_single # Keywords and divisions - rule %r/(? Date: Fri, 27 Sep 2024 20:10:24 +0200 Subject: [PATCH 15/18] Last fixups --- lib/rouge/lexers/cobol.rb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/rouge/lexers/cobol.rb b/lib/rouge/lexers/cobol.rb index 5d343a98d4..39bd3b7cea 100644 --- a/lib/rouge/lexers/cobol.rb +++ b/lib/rouge/lexers/cobol.rb @@ -14,21 +14,21 @@ class COBOL < RegexLexer # COBOL divisions and sections def divisions - @divisions ||= %w[ + @divisions ||= %w( IDENTIFICATION ENVIRONMENT DATA PROCEDURE DIVISION - ] + ) end def sections - @sections ||= %w[ + @sections ||= %w( CONFIGURATION INPUT-OUTPUT FILE WORKING-STORAGE LOCAL-STORAGE LINKAGE SECTION - ] + ) end def keywords # List of COBOL keywords # sourced from https://www.ibm.com/docs/en/cobol-zos/6.4?topic=appendixes-reserved-words - @keywords ||= %w[ + @keywords ||= Set.new(%w( ACCEPT ACCESS ACTIVE-CLASS ADD ADDRESS ADVANCING AFTER ALIGNED ALL ALLOCATE ALPHABET ALPHABETIC ALPHABETIC-LOWER ALPHABETIC-UPPER ALPHANUMERIC ALPHANUMERIC-EDITED ALSO ALTER ALTERNATE AND ANYCASE ANY APPLY ARE AREA AREAS ASCENDING ASSIGN AT AUTHOR B-AND B-NOT B-OR B-XOR BASED BASIS BEFORE BEGINNING BINARY BINARY-CHAR BINARY-DOUBLE @@ -69,7 +69,7 @@ def keywords VAL-STATUS VALID VALIDATE VALIDATE-STATUS VALUE VALUES VARYING VOLATILE WHEN WHEN-COMPILED WITH WORDS WRITE WRITE-ONLY XML-CODE XML-EVENT XML-INFORMATION XML-NAMESPACE XML-NAMESPACE-PREFIX XML-NNAMESPACE XML-NNAMESPACE-PREFIX XML-NTEXT XML-SCHEMA XML-TEXT XML ZERO ZEROES ZEROS - ] + )) end # First detect the comments From 3ec7c3614ecf97f523034417f0feb185c68f9e81 Mon Sep 17 00:00:00 2001 From: Bart Broere Date: Fri, 27 Sep 2024 20:14:04 +0200 Subject: [PATCH 16/18] Set ordering causes issues --- lib/rouge/lexers/cobol.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/rouge/lexers/cobol.rb b/lib/rouge/lexers/cobol.rb index 39bd3b7cea..08846cb2e0 100644 --- a/lib/rouge/lexers/cobol.rb +++ b/lib/rouge/lexers/cobol.rb @@ -28,7 +28,7 @@ def sections def keywords # List of COBOL keywords # sourced from https://www.ibm.com/docs/en/cobol-zos/6.4?topic=appendixes-reserved-words - @keywords ||= Set.new(%w( + @keywords ||= %w( ACCEPT ACCESS ACTIVE-CLASS ADD ADDRESS ADVANCING AFTER ALIGNED ALL ALLOCATE ALPHABET ALPHABETIC ALPHABETIC-LOWER ALPHABETIC-UPPER ALPHANUMERIC ALPHANUMERIC-EDITED ALSO ALTER ALTERNATE AND ANYCASE ANY APPLY ARE AREA AREAS ASCENDING ASSIGN AT AUTHOR B-AND B-NOT B-OR B-XOR BASED BASIS BEFORE BEGINNING BINARY BINARY-CHAR BINARY-DOUBLE @@ -69,7 +69,7 @@ def keywords VAL-STATUS VALID VALIDATE VALIDATE-STATUS VALUE VALUES VARYING VOLATILE WHEN WHEN-COMPILED WITH WORDS WRITE WRITE-ONLY XML-CODE XML-EVENT XML-INFORMATION XML-NAMESPACE XML-NAMESPACE-PREFIX XML-NNAMESPACE XML-NNAMESPACE-PREFIX XML-NTEXT XML-SCHEMA XML-TEXT XML ZERO ZEROES ZEROS - )) + ) end # First detect the comments From 5b32755e8cb0e19b070f9a8b44718efe36333396 Mon Sep 17 00:00:00 2001 From: Tan Le Date: Fri, 4 Oct 2024 16:01:38 +1000 Subject: [PATCH 17/18] Extract keywords and sections to class methods --- lib/rouge/lexers/cobol.rb | 125 ++++++++++++++++++++------------------ 1 file changed, 65 insertions(+), 60 deletions(-) diff --git a/lib/rouge/lexers/cobol.rb b/lib/rouge/lexers/cobol.rb index 08846cb2e0..41035dff83 100644 --- a/lib/rouge/lexers/cobol.rb +++ b/lib/rouge/lexers/cobol.rb @@ -10,68 +10,68 @@ class COBOL < RegexLexer filenames '*.cob', '*.cbl' mimetypes 'text/x-cobol' - state :root do + identifier = /\p{Upper}[\p{Upper}\p{Digit}-]*/ - # COBOL divisions and sections - def divisions - @divisions ||= %w( + def self.divisions + @divisions ||= %w( IDENTIFICATION ENVIRONMENT DATA PROCEDURE DIVISION ) - end + end - def sections - @sections ||= %w( + def self.sections + @sections ||= %w( CONFIGURATION INPUT-OUTPUT FILE WORKING-STORAGE LOCAL-STORAGE LINKAGE SECTION ) - end + end - def keywords - # List of COBOL keywords - # sourced from https://www.ibm.com/docs/en/cobol-zos/6.4?topic=appendixes-reserved-words - @keywords ||= %w( - ACCEPT ACCESS ACTIVE-CLASS ADD ADDRESS ADVANCING AFTER ALIGNED ALL ALLOCATE ALPHABET ALPHABETIC ALPHABETIC-LOWER - ALPHABETIC-UPPER ALPHANUMERIC ALPHANUMERIC-EDITED ALSO ALTER ALTERNATE AND ANYCASE ANY APPLY ARE AREA AREAS - ASCENDING ASSIGN AT AUTHOR B-AND B-NOT B-OR B-XOR BASED BASIS BEFORE BEGINNING BINARY BINARY-CHAR BINARY-DOUBLE - BINARY-LONG BINARY-SHORT BIT BLANK BLOCK BOOLEAN BOTTOM BY BYTE-LENGTH CALL CANCEL CBL CD CF CH CHARACTER - CHARACTERS CLASS CLASS-ID CLOCK-UNITS CLOSE COBOL CODE CODE-SET COL COLLATING COLS COLUMN COLUMNS COM-REG COMMA - COMMON COMMUNICATION COMP-1 COMP-2 COMP-3 COMP-4 COMP-5 COMP COMPUTATIONAL-1 COMPUTATIONAL-2 - COMPUTATIONAL-3 COMPUTATIONAL-4 COMPUTATIONAL-5 COMPUTATIONAL COMPUTE CONDITION CONSTANT CONTAINS CONTENT - CONTINUE CONTROL CONTROLS CONVERTING COPY CORR CORRESPONDING COUNT CRT CURRENCY CURSOR DATA-POINTER DATE - DATE-COMPILED DATE-WRITTEN DAY DAY-OF-WEEK DBCS DE DEBUG-CONTENTS DEBUG-ITEM DEBUG-LINE DEBUG-NAME DEBUG-SUB-1 - DEBUG-SUB-2 DEBUG-SUB-3 DEBUGGING DECIMAL-POINT DECLARATIVES DEFAULT DELETE DELIMITED DELIMITER DEPENDING - DESCENDING DESTINATION DETAIL DISABLE DISPLAY-1 DISPLAY DIVIDE DOWN DUPLICATES DYNAMIC EC EGCS EGI - EJECT ELSE EMI ENABLE END-ACCEPT END-ADD END-CALL END-COMPUTE END-DELETE END-DISPLAY END-DIVIDE END-EVALUATE - END-EXEC END-IF END-INVOKE END-JSON END-MULTIPLY END-OF-PAGE END-PERFORM END-READ END-RECEIVE END-RETURN - END-REWRITE END-SEARCH END-START END-STRING END-SUBTRACT END-UNSTRING END-WRITE END-XML ENDING END ENTER ENTRY - EO EOP EQUAL ERROR ESI EVALUATE EVERY EXCEPTION EXCEPTION-OBJECT EXEC EXECUTE EXIT EXTEND EXTERNAL - FACTORY FALSE FD FILE-CONTROL FILLER FINAL FIRST FLOAT-EXTENDED FLOAT-LONG FLOAT-SHORT FOOTING FOR FORMAT - FREE FROM FUNCTION FUNCTION-ID FUNCTION-POINTER GENERATE GET GIVING GLOBAL GO GOBACK GREATER GROUP GROUP-USAGE - HEADING HIGH-VALUE HIGH-VALUES I-O-CONTROL I-O ID IF IN INDEX INDEXED INDICATE INHERITS INITIAL - INITIALIZE INITIATE INPUT INSERT INSPECT INSTALLATION INTERFACE INTERFACE-ID INTO INVALID INVOKE - IS JAVA JNIENVPTR JSON JSON-CODE JSON-STATUS JUST JUSTIFIED KANJI KEY LABEL LAST LEADING LEFT LENGTH LESS LIMIT - LIMITS LINAGE-COUNTER LINAGE LINE-COUNTER LINES LINE LOCALE LOCK LOW-VALUE LOW-VALUES - MEMORY MERGE MESSAGE METHOD METHOD-ID MINUS MODE MODULES MORE-LABELS MOVE MULTIPLE MULTIPLY NATIONAL - NATIONAL-EDITED NATIVE NEGATIVE NESTED NEXT NO NOT NULL NULLS NUMBER NUMERIC NUMERIC-EDITED OBJECT - OBJECT-COMPUTER OBJECT-REFERENCE OCCURS OF OFF OMITTED ON OPEN OPTIONAL OPTIONS OR ORDER ORGANIZATION - OTHER OUTPUT OVERFLOW OVERRIDE PACKED-DECIMAL PADDING PAGE PAGE-COUNTER PASSWORD PERFORM PF PH PIC PICTURE - PLUS POINTER- POINTER-31 POINTER-32 POINTER-64 POINTER POSITION POSITIVE PRESENT PRINTING - PROCEDURE-POINTER PROCEDURES PROCEED PROCESSING PROGRAM-ID PROGRAM-POINTER PROGRAM PROPERTY PROTOTYPE - PURGE QUEUE QUOTE QUOTES RAISE RAISING RANDOM RD READ READY RECEIVE RECORD RECORDING RECORDS RECURSIVE REDEFINES - REEL REFERENCE REFERENCES RELATIVE RELEASE RELOAD REMAINDER REMOVAL RENAMES REPLACE REPLACING REPORT REPORTING - REPORTS REPOSITORY RERUN RESERVE RESET RESUME RETRY RETURN RETURN-CODE RETURNING REVERSED REWIND REWRITE RF RH - RIGHT ROUNDED RUN SAME SCREEN SD SEARCH SECTION SECURITY SEGMENT SEGMENT-LIMIT SELECT SELF SEND SENTENCE - SEPARATE SEQUENCE SEQUENTIAL SERVICE SET SHARING SHIFT-IN SHIFT-OUT SIGN SIZE SKIP1 SKIP2 SKIP3 - SORT-CONTROL SORT-CORE-SIZE SORT-FILE-SIZE SORT-MERGE SORT-MESSAGE SORT-MODE-SIZE SORT-RETURN SORT - SOURCE-COMPUTER SOURCES SOURCE SPACE SPACES SPECIAL-NAMES SQL SQLIMS STANDARD-1 STANDARD-2 STANDARD START STATUS STOP - STRING SUB-QUEUE-1 SUB-QUEUE-2 SUB-QUEUE-3 SUBTRACT SUM SUPER SUPPRESS SYMBOLIC SYNC SYNCHRONIZED SYSTEM-DEFAULT - TABLE TALLY TALLYING TAPE TERMINAL TERMINATE TEST TEXT THAN THEN THROUGH THRU TIME TIMES TITLE TO TOP TRACE - TRAILING TRUE TYPE TYPEDEF UNIT UNIVERSAL UNLOCK UNSTRING UNTIL UP UPON USAGE USE USER-DEFAULT USING UTF-8 - VAL-STATUS VALID VALIDATE VALIDATE-STATUS VALUE VALUES VARYING VOLATILE WHEN WHEN-COMPILED WITH WORDS - WRITE WRITE-ONLY XML-CODE XML-EVENT XML-INFORMATION XML-NAMESPACE XML-NAMESPACE-PREFIX - XML-NNAMESPACE XML-NNAMESPACE-PREFIX XML-NTEXT XML-SCHEMA XML-TEXT XML ZERO ZEROES ZEROS - ) - end + # List of COBOL keywords + # sourced from https://www.ibm.com/docs/en/cobol-zos/6.4?topic=appendixes-reserved-words + def self.keywords + @keywords ||= Set.new(%w( + ACCEPT ACCESS ACTIVE-CLASS ADD ADDRESS ADVANCING AFTER ALIGNED ALL ALLOCATE ALPHABET ALPHABETIC ALPHABETIC-LOWER + ALPHABETIC-UPPER ALPHANUMERIC ALPHANUMERIC-EDITED ALSO ALTER ALTERNATE AND ANYCASE ANY APPLY ARE AREA AREAS + ASCENDING ASSIGN AT AUTHOR B-AND B-NOT B-OR B-XOR BASED BASIS BEFORE BEGINNING BINARY BINARY-CHAR BINARY-DOUBLE + BINARY-LONG BINARY-SHORT BIT BLANK BLOCK BOOLEAN BOTTOM BY BYTE-LENGTH CALL CANCEL CBL CD CF CH CHARACTER + CHARACTERS CLASS CLASS-ID CLOCK-UNITS CLOSE COBOL CODE CODE-SET COL COLLATING COLS COLUMN COLUMNS COM-REG COMMA + COMMON COMMUNICATION COMP-1 COMP-2 COMP-3 COMP-4 COMP-5 COMP COMPUTATIONAL-1 COMPUTATIONAL-2 + COMPUTATIONAL-3 COMPUTATIONAL-4 COMPUTATIONAL-5 COMPUTATIONAL COMPUTE CONDITION CONSTANT CONTAINS CONTENT + CONTINUE CONTROL CONTROLS CONVERTING COPY CORR CORRESPONDING COUNT CRT CURRENCY CURSOR DATA-POINTER DATE + DATE-COMPILED DATE-WRITTEN DAY DAY-OF-WEEK DBCS DE DEBUG-CONTENTS DEBUG-ITEM DEBUG-LINE DEBUG-NAME DEBUG-SUB-1 + DEBUG-SUB-2 DEBUG-SUB-3 DEBUGGING DECIMAL-POINT DECLARATIVES DEFAULT DELETE DELIMITED DELIMITER DEPENDING + DESCENDING DESTINATION DETAIL DISABLE DISPLAY-1 DISPLAY DIVIDE DOWN DUPLICATES DYNAMIC EC EGCS EGI + EJECT ELSE EMI ENABLE END-ACCEPT END-ADD END-CALL END-COMPUTE END-DELETE END-DISPLAY END-DIVIDE END-EVALUATE + END-EXEC END-IF END-INVOKE END-JSON END-MULTIPLY END-OF-PAGE END-PERFORM END-READ END-RECEIVE END-RETURN + END-REWRITE END-SEARCH END-START END-STRING END-SUBTRACT END-UNSTRING END-WRITE END-XML ENDING END ENTER ENTRY + EO EOP EQUAL ERROR ESI EVALUATE EVERY EXCEPTION EXCEPTION-OBJECT EXEC EXECUTE EXIT EXTEND EXTERNAL + FACTORY FALSE FD FILE-CONTROL FILLER FINAL FIRST FLOAT-EXTENDED FLOAT-LONG FLOAT-SHORT FOOTING FOR FORMAT + FREE FROM FUNCTION FUNCTION-ID FUNCTION-POINTER GENERATE GET GIVING GLOBAL GO GOBACK GREATER GROUP GROUP-USAGE + HEADING HIGH-VALUE HIGH-VALUES I-O-CONTROL I-O ID IF IN INDEX INDEXED INDICATE INHERITS INITIAL + INITIALIZE INITIATE INPUT INSERT INSPECT INSTALLATION INTERFACE INTERFACE-ID INTO INVALID INVOKE + IS JAVA JNIENVPTR JSON JSON-CODE JSON-STATUS JUST JUSTIFIED KANJI KEY LABEL LAST LEADING LEFT LENGTH LESS LIMIT + LIMITS LINAGE-COUNTER LINAGE LINE-COUNTER LINES LINE LOCALE LOCK LOW-VALUE LOW-VALUES + MEMORY MERGE MESSAGE METHOD METHOD-ID MINUS MODE MODULES MORE-LABELS MOVE MULTIPLE MULTIPLY NATIONAL + NATIONAL-EDITED NATIVE NEGATIVE NESTED NEXT NO NOT NULL NULLS NUMBER NUMERIC NUMERIC-EDITED OBJECT + OBJECT-COMPUTER OBJECT-REFERENCE OCCURS OF OFF OMITTED ON OPEN OPTIONAL OPTIONS OR ORDER ORGANIZATION + OTHER OUTPUT OVERFLOW OVERRIDE PACKED-DECIMAL PADDING PAGE PAGE-COUNTER PASSWORD PERFORM PF PH PIC PICTURE + PLUS POINTER- POINTER-31 POINTER-32 POINTER-64 POINTER POSITION POSITIVE PRESENT PRINTING + PROCEDURE-POINTER PROCEDURES PROCEED PROCESSING PROGRAM-ID PROGRAM-POINTER PROGRAM PROPERTY PROTOTYPE + PURGE QUEUE QUOTE QUOTES RAISE RAISING RANDOM RD READ READY RECEIVE RECORD RECORDING RECORDS RECURSIVE REDEFINES + REEL REFERENCE REFERENCES RELATIVE RELEASE RELOAD REMAINDER REMOVAL RENAMES REPLACE REPLACING REPORT REPORTING + REPORTS REPOSITORY RERUN RESERVE RESET RESUME RETRY RETURN RETURN-CODE RETURNING REVERSED REWIND REWRITE RF RH + RIGHT ROUNDED RUN SAME SCREEN SD SEARCH SECTION SECURITY SEGMENT SEGMENT-LIMIT SELECT SELF SEND SENTENCE + SEPARATE SEQUENCE SEQUENTIAL SERVICE SET SHARING SHIFT-IN SHIFT-OUT SIGN SIZE SKIP1 SKIP2 SKIP3 + SORT-CONTROL SORT-CORE-SIZE SORT-FILE-SIZE SORT-MERGE SORT-MESSAGE SORT-MODE-SIZE SORT-RETURN SORT + SOURCE-COMPUTER SOURCES SOURCE SPACE SPACES SPECIAL-NAMES SQL SQLIMS STANDARD-1 STANDARD-2 STANDARD START STATUS STOP + STRING SUB-QUEUE-1 SUB-QUEUE-2 SUB-QUEUE-3 SUBTRACT SUM SUPER SUPPRESS SYMBOLIC SYNC SYNCHRONIZED SYSTEM-DEFAULT + TABLE TALLY TALLYING TAPE TERMINAL TERMINATE TEST TEXT THAN THEN THROUGH THRU TIME TIMES TITLE TO TOP TRACE + TRAILING TRUE TYPE TYPEDEF UNIT UNIVERSAL UNLOCK UNSTRING UNTIL UP UPON USAGE USE USER-DEFAULT USING UTF-8 + VAL-STATUS VALID VALIDATE VALIDATE-STATUS VALUE VALUES VARYING VOLATILE WHEN WHEN-COMPILED WITH WORDS + WRITE WRITE-ONLY XML-CODE XML-EVENT XML-INFORMATION XML-NAMESPACE XML-NAMESPACE-PREFIX + XML-NNAMESPACE XML-NNAMESPACE-PREFIX XML-NTEXT XML-SCHEMA XML-TEXT XML ZERO ZEROES ZEROS + )) + end + state :root do # First detect the comments rule %r/^( \*).*|^(^Debug \*).*/, Comment::Special @@ -80,16 +80,21 @@ def keywords rule %r/'/, Str::Single, :string_single # Keywords and divisions - rule %r/(? Date: Mon, 7 Oct 2024 08:46:07 +1000 Subject: [PATCH 18/18] Support lower case keywords in GnuCobol --- lib/rouge/lexers/cobol.rb | 8 ++++---- spec/visual/samples/cobol | 7 +++++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/lib/rouge/lexers/cobol.rb b/lib/rouge/lexers/cobol.rb index 41035dff83..3bf59db2c6 100644 --- a/lib/rouge/lexers/cobol.rb +++ b/lib/rouge/lexers/cobol.rb @@ -10,7 +10,7 @@ class COBOL < RegexLexer filenames '*.cob', '*.cbl' mimetypes 'text/x-cobol' - identifier = /\p{Upper}[\p{Upper}\p{Digit}-]*/ + identifier = /\p{Alpha}[\p{Alnum}-]*/ def self.divisions @divisions ||= %w( @@ -81,11 +81,11 @@ def self.keywords # Keywords and divisions rule %r/(? GnuCOBOL Hello World example + id division. + program-id. hello. + procedure division. + display "Hello, world!" end-display + goback.