diff --git a/lib/rouge/demos/cobol b/lib/rouge/demos/cobol new file mode 100644 index 0000000000..ad2691affe --- /dev/null +++ b/lib/rouge/demos/cobol @@ -0,0 +1,103 @@ + *----------------------- + * This file was sourced from https://github.com/openmainframeproject/cobol-programming-course + * Credits: + * The course materials were made available through a joint collaboration between IBM, its clients, and + * American River College and proposed as a new project by IBM. + *----------------------- + * Copyright Contributors to the COBOL Programming Course + * SPDX-License-Identifier: CC-BY-4.0 + *----------------------- + IDENTIFICATION DIVISION. + *----------------------- + PROGRAM-ID. CBL0001 + AUTHOR. Otto B. Fun. + *-------------------- + ENVIRONMENT DIVISION. + *-------------------- + INPUT-OUTPUT SECTION. + FILE-CONTROL. + SELECT PRINT-LINE ASSIGN TO PRTLINE. + SELECT ACCT-REC ASSIGN TO ACCTREC. + *SELECT clause creates an internal file name + *ASSIGN clause creates a name for an external data source, + *which is associated with the JCL DDNAME used by the z/OS + *e.g. ACCTREC is linked in JCL file CBL0001J to &SYSUID..DATA + *where &SYSUID. stands for Your z/OS user id + *e.g. if Your user id is Z54321, + *the data set used for ACCTREC is Z54321.DATA + *------------- + DATA DIVISION. + *------------- + FILE SECTION. + FD PRINT-LINE RECORDING MODE F. + 01 PRINT-REC. + 05 ACCT-NO-O PIC X(8). + 05 ACCT-LIMIT-O PIC $$,$$$,$$9.99. + 05 ACCT-BALANCE-O PIC $$,$$$,$$9.99. + * PIC $$,$$$,$$9.99 -- Alternative for PIC on chapter 7.2.3, + * using $ to allow values of different amounts of digits + * and .99 instead of v99 to allow period display on output + 05 LAST-NAME-O PIC X(20). + 05 FIRST-NAME-O PIC X(15). + 05 COMMENTS-O PIC X(50). + * since the level 05 is higher than level 01, + * all variables belong to PRINT-REC (see chapter 7.3.3) + * + FD ACCT-REC RECORDING MODE F. + 01 ACCT-FIELDS. + 05 ACCT-NO PIC X(8). + 05 ACCT-LIMIT PIC S9(7)V99 COMP-3. + 05 ACCT-BALANCE PIC S9(7)V99 COMP-3. + * PIC S9(7)v99 -- seven-digit plus a sign digit value + * COMP-3 -- packed BCD (binary coded decimal) representation + 05 LAST-NAME PIC X(20). + 05 FIRST-NAME PIC X(15). + 05 CLIENT-ADDR. + 10 STREET-ADDR PIC X(25). + 10 CITY-COUNTY PIC X(20). + 10 USA-STATE PIC X(15). + 05 RESERVED PIC X(7). + 05 COMMENTS PIC X(50). + * + WORKING-STORAGE SECTION. + 01 FLAGS. + 05 LASTREC PIC X VALUE SPACE. + *------------------ + PROCEDURE DIVISION. + *------------------ + OPEN-FILES. + OPEN INPUT ACCT-REC. + OPEN OUTPUT PRINT-LINE. + * + READ-NEXT-RECORD. + PERFORM READ-RECORD + * The previous statement is needed before entering the loop. + * Both the loop condition LASTREC = 'Y' + * and the call to WRITE-RECORD depend on READ-RECORD having + * been executed before. + * The loop starts at the next line with PERFORM UNTIL + PERFORM UNTIL LASTREC = 'Y' + PERFORM WRITE-RECORD + PERFORM READ-RECORD + END-PERFORM + . + * + CLOSE-STOP. + CLOSE ACCT-REC. + CLOSE PRINT-LINE. + GOBACK. + * + READ-RECORD. + READ ACCT-REC + AT END MOVE 'Y' TO LASTREC + END-READ. + * + WRITE-RECORD. + MOVE ACCT-NO TO ACCT-NO-O. + MOVE ACCT-LIMIT TO ACCT-LIMIT-O. + MOVE ACCT-BALANCE TO ACCT-BALANCE-O. + MOVE LAST-NAME TO LAST-NAME-O. + MOVE FIRST-NAME TO FIRST-NAME-O. + MOVE COMMENTS TO COMMENTS-O. + WRITE PRINT-REC. + * diff --git a/lib/rouge/lexers/cobol.rb b/lib/rouge/lexers/cobol.rb new file mode 100644 index 0000000000..3bf59db2c6 --- /dev/null +++ b/lib/rouge/lexers/cobol.rb @@ -0,0 +1,137 @@ +# -*- coding: utf-8 -*- # +# frozen_string_literal: true + +module Rouge + module Lexers + class COBOL < RegexLexer + title 'COBOL' + desc 'COBOL (Common Business-Oriented Language) programming language' + tag 'cobol' + filenames '*.cob', '*.cbl' + mimetypes 'text/x-cobol' + + identifier = /\p{Alpha}[\p{Alnum}-]*/ + + def self.divisions + @divisions ||= %w( + IDENTIFICATION ENVIRONMENT DATA PROCEDURE DIVISION + ) + end + + def self.sections + @sections ||= %w( + CONFIGURATION INPUT-OUTPUT FILE WORKING-STORAGE LOCAL-STORAGE LINKAGE SECTION + ) + end + + # List of COBOL keywords + # sourced from https://www.ibm.com/docs/en/cobol-zos/6.4?topic=appendixes-reserved-words + def self.keywords + @keywords ||= Set.new(%w( + ACCEPT ACCESS ACTIVE-CLASS ADD ADDRESS ADVANCING AFTER ALIGNED ALL ALLOCATE ALPHABET ALPHABETIC ALPHABETIC-LOWER + ALPHABETIC-UPPER ALPHANUMERIC ALPHANUMERIC-EDITED ALSO ALTER ALTERNATE AND ANYCASE ANY APPLY ARE AREA AREAS + ASCENDING ASSIGN AT AUTHOR B-AND B-NOT B-OR B-XOR BASED BASIS BEFORE BEGINNING BINARY BINARY-CHAR BINARY-DOUBLE + BINARY-LONG BINARY-SHORT BIT BLANK BLOCK BOOLEAN BOTTOM BY BYTE-LENGTH CALL CANCEL CBL CD CF CH CHARACTER + CHARACTERS CLASS CLASS-ID CLOCK-UNITS CLOSE COBOL CODE CODE-SET COL COLLATING COLS COLUMN COLUMNS COM-REG COMMA + COMMON COMMUNICATION COMP-1 COMP-2 COMP-3 COMP-4 COMP-5 COMP COMPUTATIONAL-1 COMPUTATIONAL-2 + COMPUTATIONAL-3 COMPUTATIONAL-4 COMPUTATIONAL-5 COMPUTATIONAL COMPUTE CONDITION CONSTANT CONTAINS CONTENT + CONTINUE CONTROL CONTROLS CONVERTING COPY CORR CORRESPONDING COUNT CRT CURRENCY CURSOR DATA-POINTER DATE + DATE-COMPILED DATE-WRITTEN DAY DAY-OF-WEEK DBCS DE DEBUG-CONTENTS DEBUG-ITEM DEBUG-LINE DEBUG-NAME DEBUG-SUB-1 + DEBUG-SUB-2 DEBUG-SUB-3 DEBUGGING DECIMAL-POINT DECLARATIVES DEFAULT DELETE DELIMITED DELIMITER DEPENDING + DESCENDING DESTINATION DETAIL DISABLE DISPLAY-1 DISPLAY DIVIDE DOWN DUPLICATES DYNAMIC EC EGCS EGI + EJECT ELSE EMI ENABLE END-ACCEPT END-ADD END-CALL END-COMPUTE END-DELETE END-DISPLAY END-DIVIDE END-EVALUATE + END-EXEC END-IF END-INVOKE END-JSON END-MULTIPLY END-OF-PAGE END-PERFORM END-READ END-RECEIVE END-RETURN + END-REWRITE END-SEARCH END-START END-STRING END-SUBTRACT END-UNSTRING END-WRITE END-XML ENDING END ENTER ENTRY + EO EOP EQUAL ERROR ESI EVALUATE EVERY EXCEPTION EXCEPTION-OBJECT EXEC EXECUTE EXIT EXTEND EXTERNAL + FACTORY FALSE FD FILE-CONTROL FILLER FINAL FIRST FLOAT-EXTENDED FLOAT-LONG FLOAT-SHORT FOOTING FOR FORMAT + FREE FROM FUNCTION FUNCTION-ID FUNCTION-POINTER GENERATE GET GIVING GLOBAL GO GOBACK GREATER GROUP GROUP-USAGE + HEADING HIGH-VALUE HIGH-VALUES I-O-CONTROL I-O ID IF IN INDEX INDEXED INDICATE INHERITS INITIAL + INITIALIZE INITIATE INPUT INSERT INSPECT INSTALLATION INTERFACE INTERFACE-ID INTO INVALID INVOKE + IS JAVA JNIENVPTR JSON JSON-CODE JSON-STATUS JUST JUSTIFIED KANJI KEY LABEL LAST LEADING LEFT LENGTH LESS LIMIT + LIMITS LINAGE-COUNTER LINAGE LINE-COUNTER LINES LINE LOCALE LOCK LOW-VALUE LOW-VALUES + MEMORY MERGE MESSAGE METHOD METHOD-ID MINUS MODE MODULES MORE-LABELS MOVE MULTIPLE MULTIPLY NATIONAL + NATIONAL-EDITED NATIVE NEGATIVE NESTED NEXT NO NOT NULL NULLS NUMBER NUMERIC NUMERIC-EDITED OBJECT + OBJECT-COMPUTER OBJECT-REFERENCE OCCURS OF OFF OMITTED ON OPEN OPTIONAL OPTIONS OR ORDER ORGANIZATION + OTHER OUTPUT OVERFLOW OVERRIDE PACKED-DECIMAL PADDING PAGE PAGE-COUNTER PASSWORD PERFORM PF PH PIC PICTURE + PLUS POINTER- POINTER-31 POINTER-32 POINTER-64 POINTER POSITION POSITIVE PRESENT PRINTING + PROCEDURE-POINTER PROCEDURES PROCEED PROCESSING PROGRAM-ID PROGRAM-POINTER PROGRAM PROPERTY PROTOTYPE + PURGE QUEUE QUOTE QUOTES RAISE RAISING RANDOM RD READ READY RECEIVE RECORD RECORDING RECORDS RECURSIVE REDEFINES + REEL REFERENCE REFERENCES RELATIVE RELEASE RELOAD REMAINDER REMOVAL RENAMES REPLACE REPLACING REPORT REPORTING + REPORTS REPOSITORY RERUN RESERVE RESET RESUME RETRY RETURN RETURN-CODE RETURNING REVERSED REWIND REWRITE RF RH + RIGHT ROUNDED RUN SAME SCREEN SD SEARCH SECTION SECURITY SEGMENT SEGMENT-LIMIT SELECT SELF SEND SENTENCE + SEPARATE SEQUENCE SEQUENTIAL SERVICE SET SHARING SHIFT-IN SHIFT-OUT SIGN SIZE SKIP1 SKIP2 SKIP3 + SORT-CONTROL SORT-CORE-SIZE SORT-FILE-SIZE SORT-MERGE SORT-MESSAGE SORT-MODE-SIZE SORT-RETURN SORT + SOURCE-COMPUTER SOURCES SOURCE SPACE SPACES SPECIAL-NAMES SQL SQLIMS STANDARD-1 STANDARD-2 STANDARD START STATUS STOP + STRING SUB-QUEUE-1 SUB-QUEUE-2 SUB-QUEUE-3 SUBTRACT SUM SUPER SUPPRESS SYMBOLIC SYNC SYNCHRONIZED SYSTEM-DEFAULT + TABLE TALLY TALLYING TAPE TERMINAL TERMINATE TEST TEXT THAN THEN THROUGH THRU TIME TIMES TITLE TO TOP TRACE + TRAILING TRUE TYPE TYPEDEF UNIT UNIVERSAL UNLOCK UNSTRING UNTIL UP UPON USAGE USE USER-DEFAULT USING UTF-8 + VAL-STATUS VALID VALIDATE VALIDATE-STATUS VALUE VALUES VARYING VOLATILE WHEN WHEN-COMPILED WITH WORDS + WRITE WRITE-ONLY XML-CODE XML-EVENT XML-INFORMATION XML-NAMESPACE XML-NAMESPACE-PREFIX + XML-NNAMESPACE XML-NNAMESPACE-PREFIX XML-NTEXT XML-SCHEMA XML-TEXT XML ZERO ZEROES ZEROS + )) + end + + state :root do + # First detect the comments + rule %r/^( \*).*|^(^Debug \*).*/, Comment::Special + + # Strings + rule %r/"/, Str::Double, :string_double + rule %r/'/, Str::Single, :string_single + + # Keywords and divisions + rule %r/(?.*/, Comment::Single + + # Operators + rule %r/[+\-*\/><=]/, Operator + + # Whitespace remaining + rule %r/\s/, Text::Whitespace + + # Anything else remaining + rule %r/.+/, Text + end + + # TODO double check string escaping in COBOL + # TODO Fix that a string opened by " can't be closed by ' + # TODO Fix that strings can't be multi-line + + # Handle strings where " opens a string and must be closed by " + state :string_double do + # Ensure strings can't span multiple lines + rule %r/[^"\\\n]+/, Str + rule %r/\\./, Str::Escape + rule %r/"/, Str::Double, :pop! + rule %r/\n/, Error # Flag an error if a string goes to the next line + end + + # Handle strings where ' opens a string and must be closed by ' + state :string_single do + # Ensure strings can't span multiple lines + rule %r/[^'\\\n]+/, Str + rule %r/\\./, Str::Escape + rule %r/'/, Str::Single, :pop! + rule %r/\n/, Error # Flag an error if a string goes to the next line + end + end + end +end diff --git a/spec/lexers/cobol_spec.rb b/spec/lexers/cobol_spec.rb new file mode 100644 index 0000000000..17f15395cd --- /dev/null +++ b/spec/lexers/cobol_spec.rb @@ -0,0 +1,79 @@ +# -*- coding: utf-8 -*- # +# frozen_string_literal: true + +describe Rouge::Lexers::COBOL do + let(:subject) { Rouge::Lexers::COBOL.new } + + include Support::Lexing + + it 'highlights COBOL keywords correctly' do + tokens = subject.lex('IDENTIFICATION DIVISION.').to_a + assert { tokens.size == 4 } + assert { tokens.first[0] == Token['Keyword.Declaration'] } + assert { tokens.last[0] == Token['Punctuation'] } + end + + it 'highlights COBOL sections correctly' do + tokens = subject.lex('WORKING-STORAGE SECTION.').to_a + assert { tokens.size == 4 } + assert { tokens.first[0] == Token['Keyword.Namespace'] } + assert { tokens.last[0] == Token['Punctuation'] } + end + + it 'handles comments correctly' do + tokens = subject.lex('*> This is a comment').to_a + assert { tokens.size == 1 } + assert { tokens.first[0] == Token['Comment.Single'] } + end + + it 'highlights special comments with asterisks in position 7 correctly' do + tokens = subject.lex(' * This is a special comment').to_a + assert { tokens.size == 1 } + assert { tokens.first[0] == Token['Comment.Special'] } + + tokens = subject.lex('Debug * This is a Debug comment').to_a + assert { tokens.size == 1 } + assert { tokens.first[0] == Token['Comment.Special'] } + end + + it 'ensures strings cannot be multi-line and must match opening and closing quotes' do + tokens = subject.lex('"This is a string"').to_a + assert { tokens.size == 3 } + assert { tokens.first[0] == Token['Literal.String.Double'] } + + tokens = subject.lex("'This is a string'").to_a + assert { tokens.size == 3 } + assert { tokens.first[0] == Token['Literal.String.Single'] } + + tokens = subject.lex('"This string doesn\'t close').to_a + assert { tokens.size == 2 } # Should detect an unclosed string and raise an error or issue a second token + end + + it 'recognizes operators like "+ (2 ** ...)" correctly' do + tokens = subject.lex('X = 2 + (2 ** 3)').to_a + assert { tokens.size == 15 } + assert { tokens[0][0] == Token['Name'] } + assert { tokens[2][0] == Token['Operator'] } + assert { tokens[4][0] == Token['Literal.Number'] } + assert { tokens[6][0] == Token['Operator'] } + assert { tokens[8][0] == Token['Punctuation'] } + assert { tokens[9][0] == Token['Literal.Number'] } + assert { tokens[11][0] == Token['Operator'] } + assert { tokens[13][0] == Token['Literal.Number'] } + assert { tokens[14][0] == Token['Punctuation'] } + + end + + describe 'guessing' do + include Support::Guessing + + it 'guesses by filename' do + assert_guess :filename => 'foo.cob' + assert_guess :filename => 'foo.cbl' + end + + it 'guesses by mimetype' do + assert_guess :mimetype => 'text/x-cobol' + end + end +end diff --git a/spec/visual/samples/cobol b/spec/visual/samples/cobol new file mode 100644 index 0000000000..3f674457a2 --- /dev/null +++ b/spec/visual/samples/cobol @@ -0,0 +1,110 @@ + *----------------------- + * This file was sourced from https://github.com/openmainframeproject/cobol-programming-course + * Credits: + * The course materials were made available through a joint collaboration between IBM, its clients, and + * American River College and proposed as a new project by IBM. + *----------------------- + * Copyright Contributors to the COBOL Programming Course + * SPDX-License-Identifier: CC-BY-4.0 + *----------------------- + IDENTIFICATION DIVISION. + *----------------------- + PROGRAM-ID. CBL0001 + AUTHOR. Otto B. Fun. + *-------------------- + ENVIRONMENT DIVISION. + *-------------------- + INPUT-OUTPUT SECTION. + FILE-CONTROL. + SELECT PRINT-LINE ASSIGN TO PRTLINE. + SELECT ACCT-REC ASSIGN TO ACCTREC. + *SELECT clause creates an internal file name + *ASSIGN clause creates a name for an external data source, + *which is associated with the JCL DDNAME used by the z/OS + *e.g. ACCTREC is linked in JCL file CBL0001J to &SYSUID..DATA + *where &SYSUID. stands for Your z/OS user id + *e.g. if Your user id is Z54321, + *the data set used for ACCTREC is Z54321.DATA + *------------- + DATA DIVISION. + *------------- + FILE SECTION. + FD PRINT-LINE RECORDING MODE F. + 01 PRINT-REC. + 05 ACCT-NO-O PIC X(8). + 05 ACCT-LIMIT-O PIC $$,$$$,$$9.99. + 05 ACCT-BALANCE-O PIC $$,$$$,$$9.99. + * PIC $$,$$$,$$9.99 -- Alternative for PIC on chapter 7.2.3, + * using $ to allow values of different amounts of digits + * and .99 instead of v99 to allow period display on output + 05 LAST-NAME-O PIC X(20). + 05 FIRST-NAME-O PIC X(15). + 05 COMMENTS-O PIC X(50). + * since the level 05 is higher than level 01, + * all variables belong to PRINT-REC (see chapter 7.3.3) + * + FD ACCT-REC RECORDING MODE F. + 01 ACCT-FIELDS. + 05 ACCT-NO PIC X(8). + 05 ACCT-LIMIT PIC S9(7)V99 COMP-3. + 05 ACCT-BALANCE PIC S9(7)V99 COMP-3. + * PIC S9(7)v99 -- seven-digit plus a sign digit value + * COMP-3 -- packed BCD (binary coded decimal) representation + 05 LAST-NAME PIC X(20). + 05 FIRST-NAME PIC X(15). + 05 CLIENT-ADDR. + 10 STREET-ADDR PIC X(25). + 10 CITY-COUNTY PIC X(20). + 10 USA-STATE PIC X(15). + 05 RESERVED PIC X(7). + 05 COMMENTS PIC X(50). + * + WORKING-STORAGE SECTION. + 01 FLAGS. + 05 LASTREC PIC X VALUE SPACE. + *------------------ + PROCEDURE DIVISION. + *------------------ + OPEN-FILES. + OPEN INPUT ACCT-REC. + OPEN OUTPUT PRINT-LINE. + * + READ-NEXT-RECORD. + PERFORM READ-RECORD + * The previous statement is needed before entering the loop. + * Both the loop condition LASTREC = 'Y' + * and the call to WRITE-RECORD depend on READ-RECORD having + * been executed before. + * The loop starts at the next line with PERFORM UNTIL + PERFORM UNTIL LASTREC = 'Y' + PERFORM WRITE-RECORD + PERFORM READ-RECORD + END-PERFORM + . + * + CLOSE-STOP. + CLOSE ACCT-REC. + CLOSE PRINT-LINE. + GOBACK. + * + READ-RECORD. + READ ACCT-REC + AT END MOVE 'Y' TO LASTREC + END-READ. + * + WRITE-RECORD. + MOVE ACCT-NO TO ACCT-NO-O. + MOVE ACCT-LIMIT TO ACCT-LIMIT-O. + MOVE ACCT-BALANCE TO ACCT-BALANCE-O. + MOVE LAST-NAME TO LAST-NAME-O. + MOVE FIRST-NAME TO FIRST-NAME-O. + MOVE COMMENTS TO COMMENTS-O. + WRITE PRINT-REC. + * + + *> GnuCOBOL Hello World example + id division. + program-id. hello. + procedure division. + display "Hello, world!" end-display + goback.