Improve formatting of console output

- The console output needs to be formatted so it is better organized and more user friendly. The file read messages need to be in green. The file write messages need to be in blue. Each major operation should have a heading and a footer. - Correct formatting error in DESCRIPTION file. - Remove the references.bib file from .Rbuildignore. - Add headings to all major functions. - Correct error in CITATION file. - Set version number to 0.0.1 in DESCRIPTION file. - Set development mode to unreleased in pkgdown config file. - When the wordpredictor package is loaded, it shows a red warning message from the pryr package. Only the object_size method from the pryr package is used once. Instead of using this method a method may be added to the Model class for calculating the object size. The dependency on pryr package should be removed. - Build pkgdown website.
pakjiddat · Jun 14, 2021 · a7d7f23 · a7d7f23
1 parent 609cd05
commit a7d7f23
Show file tree

Hide file tree

Showing 79 changed files with 3,050 additions and 717 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -12,4 +12,3 @@
 ^vignettes/features_cache$
 ^vignettes/overview_cache$
 ^CRAN-RELEASE$
-^vignettes/references.bib$
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: wordpredictor
 Title: Develop Text Prediction Models Based on N-Grams
-Version: 1.0.0
+Version: 0.0.1
 URL: https://github.com/pakjiddat/word-predictor, https://pakjiddat.github.io/word-predictor/
 BugReports: https://github.com/pakjiddat/word-predictor/issues
 Authors@R: 
@@ -12,7 +12,6 @@ Authors@R:
 Description: The wordpredictor package allows developing n-gram models for text
     prediction. It provides methods for data cleaning, data sampling,
     tokenization, model generation, model evaluation and word prediction.
-
     For information on how n-gram models work we referred to: Jurafsky, Daniel &
     Martin, James. (2008). "Speech and Language Processing: An Introduction to
     Natural Language Processing, Computational Linguistics, and Speech

diff --git a/NAMESPACE b/NAMESPACE
@@ -30,7 +30,6 @@ importFrom(ggplot2,labs)
 importFrom(ggplot2,xlab)
 importFrom(ggplot2,ylab)
 importFrom(patchwork,plot_annotation)
-importFrom(pryr,object_size)
 importFrom(stringr,boundary)
 importFrom(stringr,str_count)
 importFrom(stringr,str_match)
diff --git a/R/base.R b/R/base.R
@@ -27,7 +27,10 @@ Base <- R6::R6Class(
         initialize = function(fn = NULL, lc = 100, ve = 2) {
             # If the given file name is not NULL and is not valid
             if (!is.null(fn) && !file.exists(fn)) {
-                private$dm("The given file name is not valid", md = -1, ty = "e")
+                private$dm(
+                    "The given file name is not valid",
+                    md = -1, ty = "e"
+                )
             }
 
             # The base class attributes are set
@@ -189,8 +192,10 @@ Base <- R6::R6Class(
         # @param is_csv If the data is a csv file
         # @return The file data
         read_file = function(fn, is_csv) {
+            # The information message
+            msg <- paste0("Reading \033[0;", 32, "m'", fn, "'\033[0m")
             # Information message is shown
-            private$dm("Reading file:", fn, md = 1)
+            private$dm(msg, md = 1)
             # If the file is not a csv file
             if (!is_csv) {
                 # File is opened for reading
@@ -215,8 +220,10 @@ Base <- R6::R6Class(
         # @param lc The number of lines to read.
         # @return The file data
         read_lines = function(fn, lc) {
+            # The information message
+            msg <- paste0("Reading \033[0;", 32, "m'", fn, "'\033[0m")
             # Information message is shown
-            private$dm("Reading file:", fn, md = 1)
+            private$dm(msg, md = 1)
             # File is opened for reading
             con <- file(fn)
             # The file contents are read
@@ -236,8 +243,10 @@ Base <- R6::R6Class(
         # @param fn The name of the file.
         # @param is_append Indicates if data should be saved.
         write_file = function(data, fn, is_append) {
+            # The information message
+            msg <- paste0("Writing \033[0;", 34, "m'", fn, "'\033[0m")
             # Information message is shown
-            private$dm("Saving file:", fn, md = 1)
+            private$dm(msg, md = 1)
             # If the given data is a data frame
             if ("data.frame" %in% class(data)) {
                 # The data frame is  written to a file
@@ -264,8 +273,10 @@ Base <- R6::R6Class(
         # @param obj The object to save.
         # @param fn The file name.
         save_obj = function(obj, fn) {
+            # The information message
+            msg <- paste0("Writing \033[0;", 34, "m'", fn, "'\033[0m")
             # Information message is shown
-            private$dm("Saving file:", fn, md = 1)
+            private$dm(msg, md = 1)
             # The object is saved to a file in version 2 format
             saveRDS(obj, fn, version = 2)
             # The information message is shown
@@ -278,8 +289,10 @@ Base <- R6::R6Class(
         # @param fn The file name.
         # @return The loaded R obj.
         read_obj = function(fn) {
+            # The information message
+            msg <- paste0("Reading \033[0;", 32, "m'", fn, "'\033[0m")
             # Information message is shown
-            private$dm("Reading file:", fn, md = 1)
+            private$dm(msg, md = 1)
             # If the file does not exist
             if (!file.exists(fn)) {
                 # The error message
@@ -321,6 +334,32 @@ Base <- R6::R6Class(
             }
         },
 
+        # @description
+        # Displays the given heading text in bold.
+        # @param text The heading text to display.
+        # @param char The padding character to use.
+        # @param md The minimum debugging level.
+        # @param ll The total length of the line. Default is 80 chars.
+        dh = function(text, char, md, ll = 80) {
+            # If verbose is >= min_debug, then message is displayed
+            if (private$ve >= md) {
+                # The heading prefix
+                pre <- paste0(rep(char, 2), collapse = "")
+                pre <- paste0(pre, " ", collapse = "")
+                # The number of times the suffix should be repeated
+                c <- ll - (nchar(text) - 3)
+                # The heading text is added
+                msg <- paste0(pre, text, collapse = "")
+                msg <- paste0(msg, " ", collapse = "")
+                # The heading suffix
+                su <- paste0(rep(char, c), collapse = "")
+                msg <- paste0(msg, su, collapse = "")
+                msg <- paste0(msg, "\n", collapse = "")
+                # The heading prefix is printed
+                cat(msg)
+            }
+        },
+
         # @description
         # Performs processing on the data. It should be
         # overriden by a derived class.

diff --git a/R/data-analyzer.R b/R/data-analyzer.R
@@ -1,4 +1,4 @@
-#' It allows analyzing input text files and n-gram token files
+#' Analyzes input text files and n-gram token files
 #'
 #' @description
 #' It provides a method that returns information about text files, such as
@@ -92,22 +92,18 @@ DataAnalyzer <- R6::R6Class(
         #' # files generated by the function can be viewed
         #' em$td_env()
         plot_n_gram_stats = function(opts) {
-            # The opts is merged with the da_opts attribute
-            private$da_opts <- modifyList(private$da_opts, opts)
-            # The da_opts is merged with the base class opts attribute
-            private$opts <- modifyList(private$opts, private$da_opts)
+            # The information message is shown
+            private$dh("Displaying Plot", "-", md = 1)
             # The n-gram data is read
             df <- private$read_obj(private$fn)
-            # The information message is shown
-            private$dm("Displaying Plot", md = 1)
             # If the coverage option was specified
-            if (private$opts[["type"]] == "coverage") {
+            if (opts[["type"]] == "coverage") {
                 # The y values
-                y <- as.character(1:private$opts[["n"]])
+                y <- as.character(1:opts[["n"]])
                 # The x values
                 x <- numeric()
                 # The percentage frequencies is calculated
-                for (i in 1:private$opts[["n"]]) {
+                for (i in 1:opts[["n"]]) {
                     # The percentage of tokens with frequency i
                     x[i] <- 100 * (nrow(df[df$freq == i, ]) / nrow(df))
                     # The percentage is rounded to 2 decimal places
@@ -123,12 +119,12 @@ DataAnalyzer <- R6::R6Class(
                 )
             }
             # If the top_features option was specified
-            else if (private$opts[["type"]] == "top_features") {
+            else if (opts[["type"]] == "top_features") {
                 # The plot labels
                 labels <- list(
                     y = "Frequency",
                     x = "Feature",
-                    title = paste("Top", private$opts[["n"]], "Features")
+                    title = paste("Top", opts[["n"]], "Features")
                 )
             }
             # The freq column is converted to numeric
@@ -138,7 +134,7 @@ DataAnalyzer <- R6::R6Class(
             # The data frame is sorted in descending order
             df <- (df[order(df$freq, decreasing = T), ])
             # The top n terms are extracted
-            df <- df[1:private$opts[["n"]], ]
+            df <- df[1:opts[["n"]], ]
             # The chart is plotted
             g <- private$display_plot(df, labels)
 
@@ -162,7 +158,7 @@ DataAnalyzer <- R6::R6Class(
                 print(g)
             }
             # The information message is shown
-            private$dm(" \u2714\n", md = 1)
+            private$dh("DONE", "=", md = 1)
 
             return(df)
         },
@@ -203,6 +199,8 @@ DataAnalyzer <- R6::R6Class(
         #' # files generated by the function can be viewed
         #' em$td_env()
         get_file_info = function(res) {
+            # The information message is shown
+            private$dh("Generating file stats", "-", md = 1)
             # The list of files to check
             fl <- NULL
             # If a directory name was passed
@@ -280,6 +278,9 @@ DataAnalyzer <- R6::R6Class(
 
             # The required stats
             stats <- list("file_stats" = fstats, "overall_stats" = ostats)
+            # The information message is shown
+            private$dh("DONE", "=", md = 1)
+
             # The required stats are returned
             return(stats)
         },
@@ -301,7 +302,8 @@ DataAnalyzer <- R6::R6Class(
         #' # The name of the folder that will contain all the files. It will be
         #' # created in the current directory. NULL implies tempdir will be used
         #' fn <- NULL
-        #' # The required files. They are default files that are part of the package
+        #' # The required files. They are default files that are part of the
+        #' # package
         #' rf <- c("n2.RDS")
         #' # An object of class EnvManager is created
         #' em <- EnvManager$new(ve = ve, rp = "./")

diff --git a/R/data-cleaner.R b/R/data-cleaner.R
@@ -130,19 +130,15 @@ DataCleaner <- R6::R6Class(
         #' # files generated by the function can be viewed
         #' em$td_env()
         clean_file = function() {
-            # The information message
-            msg <- paste0("Cleaning the file: ", private$fn, "\n")
             # The information message is shown
-            private$dm(msg, md = 1)
+            private$dh("Cleaning file", "-", md = 1)
             # The base class process_file function is called
             private$process_file(
                 private$pre_process, private$process,
                 private$post_process
             )
-            # The information message
-            msg <- paste0("The file: ", private$fn, " has been cleaned\n")
             # The information message is shown
-            private$dm(msg, md = 1)
+            private$dh("DONE", "=", md = 1)
             # If the data should not be saved
             if (!private$dc_opts[["save_data"]]) {
                 # The processed output is returned

diff --git a/R/data-sampler.R b/R/data-sampler.R
@@ -1,4 +1,4 @@
-#' It allows generating data samples from text files.
+#' Generates data samples from text files
 #'
 #' @description
 #' It provides a method for generating training, testing and validation data
@@ -108,10 +108,12 @@ DataSampler <- R6::R6Class(
         #' # Start of environment setup code
         #' # The level of detail in the information messages
         #' ve <- 0
-        #' # The name of the folder that will contain all the files. It will be created in
-        #' # the current directory. NULL implies tempdir will be used.
+        #' # The name of the folder that will contain all the files. It will be
+        #' # created in the current directory. NULL implies tempdir will be
+        #' # used
         #' fn <- NULL
-        #' # The required files. They are default files that are part of the package
+        #' # The required files. They are default files that are part of the
+        #' # package
         #' rf <- c("input.txt")
         #' # An object of class EnvManager is created
         #' em <- EnvManager$new(ve = ve)
@@ -133,8 +135,8 @@ DataSampler <- R6::R6Class(
         #'     )
         #' )
         #'
-        #' # The test environment is removed. Comment the below line, so the files
-        #' # generated by the function can be viewed
+        #' # The test environment is removed. Comment the below line, so the
+        #' # files generated by the function can be viewed
         #' em$td_env()
         generate_data = function(fn, percs) {
             # The directory containing the input and output files
@@ -152,10 +154,11 @@ DataSampler <- R6::R6Class(
             if (!file.exists(fn)) {
                 # The error message
                 private$dm("The input file: ",
-                           fn,
-                           " does not exist",
-                           md = -1,
-                           ty = "e")
+                    fn,
+                    " does not exist",
+                    md = -1,
+                    ty = "e"
+                )
             }
             # If the train, test and validation files already exist
             if (file.exists(paste0(dir, "/train.txt")) &&
@@ -196,11 +199,8 @@ DataSampler <- R6::R6Class(
                 # The validation data is written to file
                 private$write_file(validate_ds, paste0(dir, "/validate.txt"), F)
             }
-            # The information message
-            msg <- "Training, testing and validation data sets"
-            msg <- paste0(msg, " were successfully generated\n")
             # The information message is shown
-            private$dm(msg, md = 1)
+            private$dh("DONE", "=", md = 1)
         }
     ),
     private = list(
@@ -219,9 +219,8 @@ DataSampler <- R6::R6Class(
         # @param dc_opts The options for cleaning the data.
         # @return The sampled data is returned
         generate_sf_from_f = function(fn = NULL, ss, ic, ir, of, is, dc_opts) {
-            # The information message
-            private$dm(
-                "Generating sample file from the file:", fn, "\n", md = 1)
+            # The information message is shown
+            private$dh("Generating sample file", "-", md = 1)
             # The input file is read
             data <- private$read_file(fn, F)
             # The number of lines in the main file
@@ -258,11 +257,8 @@ DataSampler <- R6::R6Class(
                 # The sample file is cleaned
                 data <- dc$clean_file()
             }
-            # Information message is shown
-            private$dm(
-                "Sample file was sucessfully generated\n",
-                md = 1
-            )
+            # The information message is shown
+            private$dh("DONE", "=", md = 1)
 
             return(data)
         }