From ef05357a52ebc7ad61187ecf53f46010f945fa70 Mon Sep 17 00:00:00 2001 From: Fabrice Le Fessant Date: Tue, 23 Apr 2024 10:13:05 +0200 Subject: [PATCH] attempt at optimizing allocs in replace.c --- cobc/replace.c | 319 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 214 insertions(+), 105 deletions(-) diff --git a/cobc/replace.c b/cobc/replace.c index 6ebe254f3..cab72b3d5 100644 --- a/cobc/replace.c +++ b/cobc/replace.c @@ -21,6 +21,13 @@ along with GnuCOBOL. If not, see . */ +/* TODO: + * Replace queues implemented by lists by queues in arrays + * Check which tokens should be strdup. At maximum, only needed for + tokens received directly from the lexer. Maybe not all tokens from + the parser also need strdup. +*/ + #include "tarstamp.h" #include "config.h" @@ -86,6 +93,152 @@ #define DEBUG_REPLACE #endif +/* BEGIN implementation of a queue of text and token pairs. The + implementation could easily be translated for any other data */ + +struct cb_token_queue { + int maxsize; + int pos; + int length; + const char **texts; + const char **tokens; +}; + +/* initial_size should be a power of two */ +static struct cb_token_queue *token_queue_new(int initial_size) +{ + struct cb_token_queue *q ; + q = cobc_malloc (sizeof (struct cb_token_queue)); + q->maxsize = initial_size; + q->pos = 0; + q->length = 0; + q->texts = cobc_malloc (sizeof(char*) * initial_size * 2); + q->tokens = q->texts + initial_size; + return q; +} + +static void token_queue_put(struct cb_token_queue *q, int strdup, + const char *text, + const char *token) +{ + int pos; + if (q->length == q->maxsize) { + int maxsize = q->maxsize * 2; + int n = q->maxsize - q->pos; + const char **p = + cobc_malloc (sizeof(char *) * maxsize * 2); + const char **old_text = q->texts; + + memcpy (p, q->texts + q->pos, + sizeof(char *) * n); + if (q->pos > 0) { + memcpy(p + n, q->texts, + sizeof(char *) * ( q->maxsize - n ) ); + } + q->texts = p; + + p += maxsize; + + memcpy (p, q->tokens + q->pos, + sizeof(char *) * n); + if (q->pos > 0) { + memcpy(p + n, q->tokens, + sizeof(char *) * ( q->maxsize - n ) ); + } + q->tokens = p; + + cobc_free (old_text); + q->pos = 0; + } + pos = (q->pos+q->length) % q->maxsize ; + q->texts[pos] = strdup ? cobc_plex_strdup(text) : text; + if (token != NULL && strdup){ + token = cobc_plex_strdup(token); + } + q->tokens[pos] = token; + q->length++; +} + +#if 0 +static void token_queue_put_back(struct cb_token_queue *q, char *text, + char *token) +{ + int pos; + /* put_back only when the element was just taken. + assert (q->length < q->maxsize); */ + pos = (q->pos-1) % q->maxsize ; + q->texts[pos] = text; + q->tokens[pos] = token; + q->length++; +} +#endif + +static +int token_queue_is_empty (struct cb_token_queue *q) +{ + return (q->length == 0); +} + +static +int token_queue_length (struct cb_token_queue *q) +{ + return q->length; +} + +static +void token_queue_peek (struct cb_token_queue *q, const char **text, + const char **token) { + /* assert (q->length > 0); */ + if (text) + *text = q->texts[q->pos]; + if (token) + *token = q->tokens[q->pos]; +} + +static void token_queue_get(struct cb_token_queue *q, int index, + const char **text, + const char **token) { + /* assert (q->length - index > 0); */ + int pos = ( q->pos + index ) % q->maxsize; + if (text) + *text = q->texts[pos]; + if (token) + *token = q->tokens[pos]; +} + +static +void token_queue_take (struct cb_token_queue *q, const char **text, + const char **token) { + + /* assert (q->length > 0); */ + + if (text) + *text = q->texts[q->pos]; + if (token) + *token = q->tokens[q->pos]; + q->length--; + q->pos = (q->pos+1) % q->maxsize; +} + +static +void token_queue_remove (struct cb_token_queue *q, int n) +{ + /* assert (q->length >= n); */ + q->length -= n; + q->pos = ( q->pos + n ) % q->maxsize; +} + +static +void token_queue_empty (struct cb_token_queue *q) +{ + /* assert (q->length >= n); */ + q->length = 0; + q->pos = 0; +} + +/* END implementation of queues */ + + struct cb_token_list { struct cb_token_list *next; /* next pointer */ struct cb_token_list *last; @@ -113,7 +266,7 @@ struct cb_replacement_state { /* The list of tokens that are currently being checked for * replacements. Empty, unless a partial match occurred. */ - struct cb_token_list *token_queue ; + struct cb_token_queue *token_queue ; /* We don't queue WORD tokens immediately, because * preprocessing could create larger words. Instead, we buffer @@ -216,64 +369,10 @@ static void do_replace (WITH_DEPTH struct cb_replacement_state* repls); static void check_replace_after_match (WITH_DEPTH struct cb_replacement_state *repls); static void check_replace_all (WITH_DEPTH struct cb_replacement_state *repls, const struct cb_text_list *new_text, - struct cb_token_list *texts, + int matched, const struct cb_text_list *src, const struct cb_replace_list *replace_list); -static struct cb_token_list * -token_list_add (WITH_DEPTH struct cb_token_list *list, - const char *text, - const char *token); - -/* This specific token_list_add function does a standard append on - list, without expecting `last` field to be correctly set. This is - important as `pp_token_list_add` only correctly works when always - adding on the same head, other `last` fields in the middle of the - list not being correctly updated... - */ -static struct cb_token_list * -token_list_add (WITH_DEPTH struct cb_token_list *list, - const char *text, const char *token) -{ -#ifdef DEBUG_REPLACE_TRACE - fprintf (stderr, "%stoken_list_add(%s,'%s')\n", - DEPTH, string_of_token_list(list), text); -#endif - struct cb_token_list *p; - - p = cobc_plex_malloc (sizeof (struct cb_token_list)); - p->text = cobc_plex_strdup (text); - if (token == NULL) { - p->token = NULL; - } else { - p->token = cobc_plex_strdup (token); - } - - p->next = NULL; - if (list==NULL) { - return p; - } else { - struct cb_token_list *cursor = list; - for (; cursor->next != NULL; cursor = cursor->next); - cursor->next = p; - return list; - } -} - -static void -pop_token (WITH_DEPTH struct cb_replacement_state *repls, - const char **text, const char **token) -{ - const struct cb_token_list *q = repls->token_queue; - repls->token_queue = q->next ; -#ifdef DEBUG_REPLACE_TRACE - fprintf (stderr, "%spop_token(%s) -> '%s'\n", - DEPTH, repls->name, q->text); -#endif - if (text) *text = q->text; - if (token) *token = q->token; -} - static void ppecho_switch (WITH_DEPTH struct cb_replacement_state *repls, const char* text, const char* token) @@ -309,16 +408,19 @@ ppecho_switch_text_list (WITH_DEPTH struct cb_replacement_state *repls, static void -ppecho_switch_token_list (WITH_DEPTH struct cb_replacement_state *repls, - const struct cb_token_list *p) +ppecho_switch_token_queue (WITH_DEPTH struct cb_replacement_state *repls, + struct cb_token_queue *q) { #ifdef DEBUG_REPLACE_TRACE - fprintf (stderr, "%sppecho_switch_token_list(%s, %s)\n", + fprintf (stderr, "%sppecho_switch_token_queue(%s, %s)\n", DEPTH, repls->name, string_of_token_list(p)); #endif - - for (;p;p=p->next){ - ppecho_switch (MORE_DEPTH repls, p->text, p->token); + int n; + const char *text; + const char *token; + for ( n = token_queue_length (q); n>0 ; --n){ + token_queue_take (q, &text, &token); + ppecho_switch (MORE_DEPTH repls, text, token); } } @@ -411,7 +513,7 @@ check_replace (WITH_DEPTH struct cb_replacement_state* repls, /* remove the text from the current stream */ const char* text; const char* token; - pop_token (MORE_DEPTH repls, &text, &token); + token_queue_take (MORE_DEPTH repls->token_queue, &text, &token); /* pass it to the next stream */ ppecho_switch (MORE_DEPTH repls, text, token); @@ -433,14 +535,16 @@ check_replace (WITH_DEPTH struct cb_replacement_state* repls, int leading = (src->lead_trail == CB_REPLACE_LEADING); unsigned int strict = src->strict; const char *src_text = src->text_list->text; - const char *text = repls->token_queue->text; + const char *text; + + token_queue_peek (repls->token_queue, &text, NULL); if (is_leading_or_trailing (MORE_DEPTH leading, src_text,text,strict)){ /* MATCH */ /* remove the text from the current stream */ - pop_token (MORE_DEPTH repls, NULL, NULL); + token_queue_remove (MORE_DEPTH repls->token_queue, 1); /* perform a partial replacement on the text, and pass it to the next stream */ @@ -458,10 +562,11 @@ check_replace (WITH_DEPTH struct cb_replacement_state* repls, /* we need to compare a list of texts from * this stream with a list of texts from the * replacement */ - check_replace_all (MORE_DEPTH repls,new_text, - repls->token_queue, - src->text_list, - replace_list); + check_replace_all(MORE_DEPTH repls, + new_text, + 0, + src->text_list, + replace_list); } } } @@ -472,12 +577,12 @@ is_space_or_nl (const char c) return c == ' ' || c == '\n'; } -/* `check_replace_all( repls, new_text, texts, src, replace_list )`: +/* `check_replace_all( repls, new_text, matched, src, replace_list )`: * checks whether a particular replacement is possible on the current * list of texts. * * `repls` is the current stream state * * `new_text` is the text by which the texts should be replace in case of match - * * `texts` is the list of texts found in the source that remains to be matched + * * `matched` is the number of already matched text tokens found in the source, and matched by previous values * * `src` is the list of texts from the replacement to be matched * * `replace_list` is the next replacements to try in case of failure */ @@ -485,7 +590,7 @@ static void check_replace_all (WITH_DEPTH struct cb_replacement_state *repls, const struct cb_text_list *new_text, - struct cb_token_list *texts, + int matched, const struct cb_text_list *src, const struct cb_replace_list *replace_list) { @@ -505,18 +610,20 @@ check_replace_all (WITH_DEPTH /* MATCH */ /* pass the new text to the next stream */ ppecho_switch_text_list (MORE_DEPTH repls, new_text) ; - /* keep only in this stream the remaining texts that have not been matched */ - repls->token_queue = texts ; + /* keep only in this stream the remaining texts that have not + * been matched */ + token_queue_remove (repls->token_queue, matched); /* restart replacements on the stream */ check_replace_after_match (MORE_DEPTH repls); } else { const char* src_text = src->text; if (is_space_or_nl(src_text[0])) { /* skip spaces in replacement */ - check_replace_all (MORE_DEPTH repls,new_text,texts, - src->next, replace_list); + check_replace_all(MORE_DEPTH repls, new_text, + matched, + src->next, replace_list); } else { - if (texts == NULL){ + if ( token_queue_length (repls->token_queue) == matched){ /* PARTIAL MATCH, we have emptied the * list of texts, but there are still * texts in the replacement, so wait @@ -526,16 +633,17 @@ check_replace_all (WITH_DEPTH fprintf (stderr, "%s check_replace_all --> PARTIAL MATCH\n", DEPTH); #endif } else { - const char* text = texts->text; - texts = texts->next; + const char *text; + token_queue_get(repls->token_queue, matched, &text, NULL); + matched++; if (is_space_or_nl(text[0])) { /* skip spaces in texts */ - check_replace_all (MORE_DEPTH repls, - new_text, - texts, src, - replace_list); + check_replace_all(MORE_DEPTH repls, + new_text, matched, + src, + replace_list); } else { - if (!strcasecmp(src_text,text)){ + if (!strcasecmp (src_text,text)){ /* We could match one * text from the * stream with a text @@ -543,10 +651,10 @@ check_replace_all (WITH_DEPTH * replacement, so * move on to the next * text */ - check_replace_all( + check_replace_all ( MORE_DEPTH repls, new_text, - texts,src->next, + matched, src->next, replace_list); } else { /* match failed, move @@ -570,18 +678,19 @@ check_replace_after_match (WITH_DEPTH struct cb_replacement_state *repls) fprintf (stderr, "%scheck_replace_after_match(%s)\n", DEPTH, repls->name); #endif - repls->current_list = NULL; - if (repls->token_queue != NULL){ - if (is_space_or_nl (repls->token_queue->text[0])) { - ppecho_switch (MORE_DEPTH repls, - repls->token_queue->text, - repls->token_queue->token); - repls->token_queue = repls->token_queue->next; - check_replace_after_match (MORE_DEPTH repls); - } else { - do_replace (MORE_DEPTH repls); - } - } + repls->current_list = NULL; + while (!token_queue_is_empty(repls->token_queue)) { + const char *text; + const char *token; + token_queue_peek (repls->token_queue, &text, &token); + if (is_space_or_nl (text[0])) { + ppecho_switch(MORE_DEPTH repls, text, token); + token_queue_remove (repls->token_queue, 1); + } else { + do_replace(MORE_DEPTH repls); + return; + } + } } static void @@ -597,9 +706,9 @@ do_replace (WITH_DEPTH struct cb_replacement_state* repls) * withing the queue, as it has already been * parsed before any COPY-REPLACING * substitution. */ - ppecho_switch_token_list (MORE_DEPTH repls, - repls->token_queue); - repls->token_queue = NULL; + ppecho_switch_token_queue (MORE_DEPTH repls, + repls->token_queue); + token_queue_empty (repls->token_queue); } else { check_replace (MORE_DEPTH repls, repls->replace_list); } @@ -692,9 +801,9 @@ add_text_to_replace (WITH_DEPTH struct cb_replacement_state *repls, "%s add_text_to_replace () -> push_text()\n", DEPTH); #endif - repls->token_queue = - token_list_add(MORE_DEPTH repls->token_queue, - text, token); + token_queue_put(MORE_DEPTH repls->token_queue, + 1, /* TODO strdup check */ + text, token); do_replace (MORE_DEPTH repls); } @@ -735,7 +844,7 @@ create_replacements (enum cb_ppecho ppecho) = cobc_malloc (sizeof(struct cb_replacement_state)); s->text_prequeue = NULL; - s->token_queue = NULL; + s->token_queue = token_queue_new (8); s->replace_list = NULL ; s->current_list = NULL ; s->ppecho = ppecho;