Merge pull request #167 from EmilHvitfeldt/julia-can-build-it-now

New build from my ARM mac 🙌
EmilHvitfeldt · Nov 17, 2021 · d120e8b · d120e8b
2 parents 7e745e8 + ea4a11a
commit d120e8b
Show file tree

Hide file tree

Showing 61 changed files with 9,773 additions and 9,849 deletions.
diff --git a/04_stemming.Rmd b/04_stemming.Rmd
@@ -332,12 +332,6 @@ Section \@ref(mlregressionlemmatization) demonstrates how to use textrecipes wit
 
 Let's briefly walk through how to use spacyr.
 
-```{r include=FALSE, message=FALSE, eval=julias_computer}
-library(spacyr)
-spacy_initialize(condaenv = "tf_env", entity = FALSE)
-```
-
-
 ```{r lemmafirtree, dependson="tidyfirtree", fig.cap="Results for lemmatization, rather than stemming"}
 library(spacyr)
 spacy_initialize(entity = FALSE)

diff --git a/08_dl_dnn.Rmd b/08_dl_dnn.Rmd
@@ -1,31 +1,6 @@
 # Dense neural networks {#dldnn}
 
-```{r include = FALSE, eval = julias_computer}
-library(keras)
-library(tensorflow)
-
-## for Julia's ARM chip
-use_python("~/miniforge3/bin/python")
-use_condaenv("tf_env")
-reticulate::py_discover_config("tensorflow")
-junk <- keras_model_sequential()
-
-tensorflow::tf$random$set_seed(1234)
-
-hook_output = knit_hooks$get('output')
-knit_hooks$set(output = function(x, options) {
-  # this hook is used only when the linewidth option is not NULL
-  if (!is.null(n <- options$linewidth)) {
-    x = knitr:::split_lines(x)
-    # any lines wider than n should be wrapped
-    if (any(nchar(x) > n)) x = strwrap(x, width = n)
-    x = paste(x, collapse = '\n')
-  }
-  hook_output(x, options)
-})
-```
-
-```{r include = FALSE, eval = !julias_computer}
+```{r, include=FALSE}
 library(keras)
 tensorflow::tf$random$set_seed(1234)
 hook_output = knit_hooks$get('output')

diff --git a/09_dl_lstm.Rmd b/09_dl_lstm.Rmd
@@ -1,19 +1,6 @@
 # Long short-term memory (LSTM) networks  {#dllstm}
 
-```{r include = FALSE, eval = julias_computer}
-library(keras)
-library(tensorflow)
-
-## for Julia's ARM chip
-use_python("~/miniforge3/bin/python")
-use_condaenv("tf_env")
-reticulate::py_discover_config("tensorflow")
-junk <- keras_model_sequential()
-
-tensorflow::tf$random$set_seed(1234)
-``` 
-
-```{r include = FALSE, eval = !julias_computer}
+```{r include = FALSE}
 library(keras)
 tensorflow::tf$random$set_seed(1234)
 ``` 

diff --git a/10_dl_cnn.Rmd b/10_dl_cnn.Rmd
@@ -1,32 +1,6 @@
 # Convolutional neural networks {#dlcnn}
 
-```{r include = FALSE, eval = julias_computer}
-library(keras)
-library(tensorflow)
-
-## for Julia's ARM chip
-use_python("~/miniforge3/bin/python")
-use_condaenv("tf_env")
-reticulate::py_discover_config("tensorflow")
-junk <- keras_model_sequential()
-
-
-tensorflow::tf$random$set_seed(1234)
-
-hook_output = knit_hooks$get('output')
-knit_hooks$set(output = function(x, options) {
-  # this hook is used only when the linewidth option is not NULL
-  if (!is.null(n <- options$linewidth)) {
-    x = knitr:::split_lines(x)
-    # any lines wider than n should be wrapped
-    if (any(nchar(x) > n)) x = strwrap(x, width = n)
-    x = paste(x, collapse = '\n')
-  }
-  hook_output(x, options)
-})
-```
-
-```{r include = FALSE, eval = !julias_computer}
+```{r include = FALSE}
 library(keras)
 knitr::opts_chunk$set(message = FALSE, 
   warning = FALSE, error = TRUE)

diff --git a/_common.R b/_common.R
@@ -87,8 +87,6 @@ columnize <- function(words, ncol = 5) {
 
 }
 
-julias_computer <- FALSE
-
 sparse_bp <- hardhat::default_recipe_blueprint(composition = "dgCMatrix")
 
 ## for Keras chapters

diff --git a/bookdown.rds b/bookdown.rds
diff --git a/docs/02_tokenization.md b/docs/02_tokenization.md
@@ -866,11 +866,11 @@ bench::mark(check = FALSE, iterations = 10,
 #> # A tibble: 5 × 6
 #>   expression      min   median `itr/sec` mem_alloc `gc/sec`
 #>   <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
-#> 1 corpus       82.2ms   88.7ms     10.5     4.58MB     1.16
-#> 2 tokenizers  100.8ms  107.7ms      8.90    1.01MB     2.23
-#> 3 text2vec     91.8ms   98.1ms     10.0       21MB     1.11
-#> 4 quanteda    165.5ms  177.4ms      5.64     8.7MB     1.41
-#> 5 base R      333.5ms  349.4ms      2.88   10.51MB     1.92
+#> 1 corpus       45.1ms   45.8ms     20.5    12.19MB     8.79
+#> 2 tokenizers   60.9ms   61.6ms     16.0     1.08MB     0   
+#> 3 text2vec     51.4ms   52.2ms     18.9    21.43MB     2.10
+#> 4 quanteda     95.9ms   99.5ms     10.0     8.71MB     2.50
+#> 5 base R      187.6ms    192ms      5.16   10.51MB     3.44
 ```
 
 The corpus package [@Perry2020] offers excellent performance for tokenization, and other options are not much worse. One exception is using a base R function as a tokenizer; you will see significant performance gains by instead using a package built specifically for text tokenization.

diff --git a/docs/02_tokenization_files/figure-html/ngramtokens-1.svg b/docs/02_tokenization_files/figure-html/ngramtokens-1.svg
-Original file line number
+Diff line change
@@ Expand Up / @@ -87,8 +87,6 @@ columnize <- function(words, ncol = 5) { @@
     }
-    julias_computer <- FALSE
     sparse_bp <- hardhat::default_recipe_blueprint(composition = "dgCMatrix")
     ## for Keras chapters
@@ Expand Down @@