Skip to content

Commit

Permalink
autodetection of fixed/free source format
Browse files Browse the repository at this point in the history
In cobc:
* cobc.c (main): initialize cb_config_text_column to 72 to avoid a
  race condition in config files.
* pplex.l (ppopen): try to autodetect the format of a file after
  checking the BOM of a file
* pplex.l (cobc_set_source_format): add new format name "AUTO" to
  activate auto-detection of file format
  • Loading branch information
lefessan committed Jan 31, 2023
1 parent 647f0a2 commit fdbe408
Show file tree
Hide file tree
Showing 8 changed files with 175 additions and 20 deletions.
5 changes: 5 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,11 @@ NEWS - user visible changes -*- outline -*-
be reported and recovered from. This is configurable with option
`missing-period`.

If not specified, the compiler tries to automatically recognize the format,
using either fixed or free, depending on column 7 in the first non-space
line. This feature can be disabled by setting the format manually
with `-free`, `-fixed` or `-fformat`.

** the new -febcdic-table option enables one to specify the
translation table used when dealing with EBCDIC codeset;
these translation tables are stored as new configuration files with
Expand Down
9 changes: 9 additions & 0 deletions cobc/ChangeLog
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,15 @@
* codeoptim.c: adjusted inline functions (cob_cmp_packed_int,
cob_get_packed_int) to use register types and less intermediate values

2023-01-28 Fabrice Le Fessant <[email protected]>

* cobc.c (main): initialize cb_config_text_column to 72 to avoid a
race condition in config files.
* pplex.l (ppopen): try to autodetect the format of a file after
checking the BOM of a file
* pplex.l (cobc_set_source_format): add new format name "AUTO" to
activate auto-detection of file format

2023-01-26 Simon Sobisch <[email protected]>

* codegen.c: generate field offsets and indexes as long long to
Expand Down
18 changes: 13 additions & 5 deletions cobc/cobc.c
Original file line number Diff line number Diff line change
Expand Up @@ -5009,6 +5009,7 @@ preprocess (struct filename *fn)
} else {
sourcename = fn->source;
}
save_source_format = cobc_get_source_format ();
if (ppopen (sourcename, NULL) != 0) {
cobc_terminate (sourcename);
}
Expand Down Expand Up @@ -5037,7 +5038,6 @@ preprocess (struct filename *fn)

/* Save default exceptions and flags in case program directives change them */
memcpy (save_exception_table, cb_exception_table, exception_table_size);
save_source_format = cobc_get_source_format ();
save_fold_copy = cb_fold_copy;
save_fold_call = cb_fold_call;
save_ref_mod_zero_length = cb_ref_mod_zero_length;
Expand Down Expand Up @@ -5124,8 +5124,8 @@ preprocess (struct filename *fn)
/* Routines to generate program listing */


static void
set_listing_header_code (void)
void
cobc_set_listing_header_code (void)
{
strcpy (cb_listing_header, "LINE ");
if (! CB_SF_FREE (cb_listing_file_struct->source_format)) {
Expand Down Expand Up @@ -8874,7 +8874,6 @@ process_file (struct filename *fn, int status)
cb_listing_page = 0;
cobc_elided_strcpy (cb_listing_filename, fn->source,
sizeof (cb_listing_filename), 0);
set_listing_header_code ();
}

if (cb_compile_level >= CB_LEVEL_PREPROCESS
Expand All @@ -8887,7 +8886,10 @@ process_file (struct filename *fn, int status)
cb_flag_syntax_only = 1;
cb_flag_fast_compare = 0;
}
}
} else
if (cb_src_list_file) {
cobc_set_listing_header_code ();
}

if (cobc_list_file) {
putc ('\n', cb_listing_file);
Expand Down Expand Up @@ -8978,6 +8980,12 @@ main (int argc, char **argv)
cb_saveargc = argc;
cb_saveargv = argv;

/* This value is used when setting the format in FIXED mode in
pplex, but it might happen that the value has not yet been
initialized in the configuration file.
*/
cb_config_text_column = 72;

/* Process command line arguments */
iargs = process_command_line (argc, argv);

Expand Down
3 changes: 3 additions & 0 deletions cobc/cobc.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ enum cb_format {
CB_FORMAT_ICOBOL_CRT, /* ICOBOL Free-form format (CRT) */
CB_FORMAT_ACUTERM, /* ACU Terminal format, named "TERMINAL" */
CB_FORMAT_COBOLX, /* GCOS's COBOLX */
CB_FORMAT_AUTO, /* Auto-detect format */
};
#define CB_SF_FREE(sf) (sf == CB_FORMAT_FREE)
#define CB_SF_FIXED(sf) (sf == CB_FORMAT_FIXED || sf == CB_FORMAT_COBOL85)
Expand Down Expand Up @@ -651,6 +652,8 @@ extern void cb_add_error_to_listing (const char *, int, const char *, char *);
DECLNORET extern void flex_fatal_error (const char *, const char *,
const int) COB_A_NORETURN;

extern void cobc_set_listing_header_code ();

/* reserved.c */
extern struct reserved_word_list *cobc_user_res_list;

Expand Down
72 changes: 61 additions & 11 deletions cobc/pplex.l
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ static int quotation_mark = 0;
static int listing_line = 0;
static int requires_listing_line;
static int requires_new_line = 0;
static enum cb_format source_format;
static enum cb_format source_format = CB_FORMAT_AUTO;
static int indicator_column = 7;
static int text_column = 72; /* end of area B (in single-byte
characters) */
Expand Down Expand Up @@ -1148,6 +1148,23 @@ pp_set_replace_list (struct cb_replace_list *list, const cob_u32_t is_pushpop)
}
}

static int
is_fixed_indicator (char c){
switch (c){ /* same indicators as in ppinput() */
case ' ':
case '-':
case 'd':
case 'D':
case '*':
case '/':
case '\\':
case '$':
return 1;
default:
return 0;
}
}

/* open file (source or coypbook) for further processing */
int
ppopen (const char *name, struct cb_replace_list *replacing_list)
Expand Down Expand Up @@ -1208,19 +1225,47 @@ ppopen (const char *name, struct cb_replace_list *replacing_list)
/* Check for BOM - *not* for input from stdin as rewind() clears the input
buffer if used on stdin and output in console has normally no BOM at all */
if (ppin && strcmp (name, COB_DASH) != 0) {
unsigned char bom[4];
if (fread (bom, 3, 1, ppin) == 1) {
if (bom[0] != 0xEF || bom[1] != 0xBB || bom[2] != 0xBF) {
rewind (ppin);
int fseek_to = 0 ;
#define COBC_LOOKAHEAD 20
unsigned char buffer[COBC_LOOKAHEAD];
int nread = fread (buffer, 1, COBC_LOOKAHEAD, ppin);
int pos = 0;
if (nread >= 3 && buffer[0] == 0xEF && buffer[1] == 0xBB && buffer[2] == 0xBF) {
fseek_to = 3;
pos = 3;
}
if (source_format == CB_FORMAT_AUTO){
/* If indicator is wrong on first line, switch to free format */
/* skip empty lines */
int amount_of_0a_seen = 0;
while (nread-pos > 7 && (buffer[pos] == '\r' || buffer[pos] == '\n')){
if (buffer[pos] == '\n') amount_of_0a_seen++;
pos++;
}
/* check tab or indicator */
if ( nread-pos > 7 && buffer[pos] != '\t' && !is_fixed_indicator (buffer[pos+6]) ){
struct cb_tree_common loc;
loc.source_file = name;
loc.source_line = 1 + amount_of_0a_seen;
loc.source_column = 7;
cb_note_x (COB_WARNOPT_NONE, &loc, _("free format detected"));
(void) cobc_deciph_source_format ("FREE");
}
} else {
rewind (ppin);
}
fseek(ppin, fseek_to, SEEK_SET);
}

/* Save name for listing */
if (cb_current_file && !cb_current_file->name) {
cb_current_file->name = cobc_strdup (name);
if (source_format==CB_FORMAT_AUTO)
cobc_set_source_format (CB_FORMAT_FIXED);

if (cb_current_file){
if (cb_current_file->source_format==CB_FORMAT_AUTO)
cb_current_file->source_format=cobc_get_source_format ();
/* This is delayed until after format detection */
cobc_set_listing_header_code ();
/* Save name for listing */
if (!cb_current_file->name)
cb_current_file->name = cobc_strdup (name);
}

/* Add to dependency list */
Expand Down Expand Up @@ -1473,7 +1518,8 @@ ppparse_error (const char *err_msg)
int
cobc_has_areacheck_directive (const char * directive) {
if (source_format != CB_FORMAT_FIXED &&
source_format != CB_FORMAT_FREE) {
source_format != CB_FORMAT_FREE &&
source_format != CB_FORMAT_AUTO ) {
return 1;
} else {
cb_plex_warning (COBC_WARN_FILLER, newline_count,
Expand Down Expand Up @@ -1508,6 +1554,7 @@ cobc_set_source_format (const enum cb_format sf) {

switch (source_format) {
case CB_FORMAT_FIXED:
case CB_FORMAT_AUTO:
case CB_FORMAT_COBOL85:
text_column = cb_config_text_column; /* 72 by default */
break;
Expand Down Expand Up @@ -1539,6 +1586,7 @@ cobc_set_source_format (const enum cb_format sf) {
switch (source_format) {
case CB_FORMAT_FIXED:
case CB_FORMAT_FREE:
case CB_FORMAT_AUTO:
case CB_FORMAT_XOPEN_FFF:
emit_area_a_tokens = 0;
cobc_areacheck = 0;
Expand Down Expand Up @@ -1591,6 +1639,8 @@ cobc_parse_source_format (enum cb_format *const out, const char *const sfname) {
format = CB_FORMAT_ACUTERM;
} else if (!cb_strcasecmp (sfname, "COBOLX")) {
format = CB_FORMAT_COBOLX;
} else if (!cb_strcasecmp (sfname, "AUTO")) {
format = CB_FORMAT_AUTO;
} else {
return 1; /* invalid argument */
}
Expand Down
2 changes: 1 addition & 1 deletion config/default.conf
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ standard-define 0

# Default source reference-format; values: FIXED, FREE, COBOL85,
# VARIABLE, XOPEN, XCARD, CRT, TERMINAL, COBOLX
format: fixed
format: auto

# Value: int
tab-width: 8
Expand Down
11 changes: 9 additions & 2 deletions doc/gnucobol.texi
Original file line number Diff line number Diff line change
Expand Up @@ -420,9 +420,11 @@ Place the output into @var{file}.

GnuCOBOL supports fixed, free, Micro Focus' Variable, X/Open Free-form,
ICOBOL xCard and Free-form, ACUCOBOL-GT Terminal, and COBOLX source
formats. The default format is the fixed format. This can be
formats. By default, the compiler tries to autodetect the format using
the indicator on the first line, using the fixed format for correct
indicators and the free format for incorrect ones. This can be
overridden either by the @code{>>SOURCE [FORMAT] [IS]
@{FIXED|FREE|COBOL85|VARIABLE|XOPEN|XCARD|CRT|TERMINAL|COBOLX@}}
@{FIXED|FREE|COBOL85|VARIABLE|XOPEN|XCARD|CRT|TERMINAL|COBOLX|AUTO@}}
directive, or by one of the following options:

@table @code
Expand Down Expand Up @@ -472,6 +474,11 @@ that the indicator area is always present in column 1; the program-text
area starts in column 2 and extends up to the end of the record. Lines
may contain up to 255 characters.

@item -fformat=auto
Autodetection of format. The compiler will use the first line of the file
to detect whether the file is in fixed format (with a correct indicator
at position 7), or in free format.

@end table

Note that with source formats @code{XOPEN}, @code{CRT}, @code{TERMINAL},
Expand Down
75 changes: 74 additions & 1 deletion tests/testsuite.src/syn_misc.at
Original file line number Diff line number Diff line change
Expand Up @@ -2858,7 +2858,7 @@ prog3.cob:15: error: syntax error, unexpected Identifier
])
# Check that invalid indicator and doesn't abort preprocessing
# and that errors in preprocessing doesn't abort compilation
AT_CHECK([$COMPILE_ONLY prog3.cob], [1], [],
AT_CHECK([$COMPILE_ONLY -fixed prog3.cob], [1], [],
[prog3.cob:2: error: invalid indicator 'F' at column 7
prog3.cob:3: error: invalid indicator 'M' at column 7
prog3.cob:4: error: invalid indicator 'N' at column 7
Expand Down Expand Up @@ -9572,3 +9572,76 @@ pgm1.cob:18: error: start of statement in Area A

AT_CLEANUP

AT_SETUP([autodetect format])
AT_KEYWORDS([cobc free fixed format])

AT_DATA([free.cob], [
IDENTIFICATION DIVISION.
PROGRAM-ID. prog.
PROCEDURE DIVISION.
DISPLAY "OK" NO ADVANCING
END-DISPLAY.
STOP RUN.
])

AT_DATA([fixed1.cob], [
IDENTIFICATION DIVISION.
PROGRAM-ID. prog.
* this is a comment
PROCEDURE DIVISION.
DISPLAY "OK" NO ADVANCING
END-DISPLAY.
STOP RUN.
])

AT_DATA([fixed2.cob], [
* this is a comment
IDENTIFICATION DIVISION.
PROGRAM-ID. prog.
PROCEDURE DIVISION.
DISPLAY "OK" NO ADVANCING
END-DISPLAY.
STOP RUN.
])

# Generate source files with DOM
AT_CHECK([printf '\xEF\xBB\xBF' > header.dom])
AT_CHECK([cat header.dom free.cob >> domfree.cob])
AT_CHECK([cat header.dom fixed1.cob >> domfixed.cob])

AT_CHECK([$COMPILE_ONLY -fformat=auto free.cob fixed1.cob fixed2.cob], [0], [],
[free.cob:2: note: free format detected
])
AT_CHECK([$COMPILE_ONLY -fformat=auto domfree.cob domfixed.cob], [0], [],
[domfree.cob:2: note: free format detected
])

# In case we decide to have a disabled warning at some point
# AT_CHECK([$COMPILE_ONLY -fformat=auto -w free.cob fixed1.cob fixed2.cob], [0], [], [])
# AT_CHECK([$COMPILE_ONLY -fformat=auto -w domfree.cob domfixed.cob], [0], [], [])

AT_CHECK([$COMPILE_ONLY -fformat=fixed free.cob], [1], [],
[free.cob:2: error: invalid indicator 'T' at column 7
free.cob:3: error: invalid indicator 'R' at column 7
free.cob:4: error: invalid indicator 'E' at column 7
free.cob:5: error: invalid indicator 'I' at column 7
free.cob:6: error: invalid indicator 'N' at column 7
free.cob:7: error: invalid indicator 'T' at column 7
free.cob:8: error: PROGRAM-ID header missing
])

AT_CHECK([$COMPILE_ONLY -fformat=fixed domfree.cob], [1], [],
[domfree.cob:2: error: invalid indicator 'T' at column 7
domfree.cob:3: error: invalid indicator 'R' at column 7
domfree.cob:4: error: invalid indicator 'E' at column 7
domfree.cob:5: error: invalid indicator 'I' at column 7
domfree.cob:6: error: invalid indicator 'N' at column 7
domfree.cob:7: error: invalid indicator 'T' at column 7
domfree.cob:8: error: PROGRAM-ID header missing
])

AT_CHECK([$COMPILE_ONLY -fformat=fixed fixed1.cob], [0], [], [])
AT_CHECK([$COMPILE_ONLY -fformat=fixed fixed2.cob], [0], [], [])
AT_CHECK([$COMPILE_ONLY -fformat=fixed domfixed.cob], [0], [], [])

AT_CLEANUP

0 comments on commit fdbe408

Please sign in to comment.