diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5b6a065 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.Rproj.user +.Rhistory +.RData +.Ruserdata diff --git a/404.html b/404.html index f619acc..7202086 100644 --- a/404.html +++ b/404.html @@ -23,7 +23,7 @@
404 Page not found - 2021-03-07 + 2023-05-30 @@ -58,7 +58,6 @@

404 NOT FOUND

-

Dependence plot for each feature

    -
  • Here we choose to show top 6 features ranked by mean|SHAP|
  • +
  • Here we choose to show top 6 features ranked by mean|SHAP|
    +
data_long <-  shap.prep(mod1, X_train = X_train)
 shap_values <- shap.values(mod1, X_train)
@@ -223,7 +228,8 @@ 

Dependence plot for each feature

gridExtra::grid.arrange(grobs = fig_list, ncol = 2)

    -
  • If Use the built-in xgb.shap.plot function
  • +
  • If Use the built-in xgb.shap.plot function
    +
xgboost::xgb.plot.shap(data = X_train, model = mod1, top_n = 4, n_col = 2)

@@ -300,10 +306,9 @@

Stack plot by clustering groups

} -
- diff --git a/post/2018/11/15/xgboost-dart-example/index.html b/post/2018/11/15/xgboost-dart-example/index.html index 485410f..ec5a820 100644 --- a/post/2018/11/15/xgboost-dart-example/index.html +++ b/post/2018/11/15/xgboost-dart-example/index.html @@ -370,10 +370,9 @@

one_drop

} -
- diff --git a/post/2019-04-17-ggplot-heatmap-us-50-states-map-and-china-province-map_files/figure-html/unnamed-chunk-2-1.png b/post/2019-04-17-ggplot-heatmap-us-50-states-map-and-china-province-map_files/figure-html/unnamed-chunk-2-1.png index b3af423..0398b17 100644 Binary files a/post/2019-04-17-ggplot-heatmap-us-50-states-map-and-china-province-map_files/figure-html/unnamed-chunk-2-1.png and b/post/2019-04-17-ggplot-heatmap-us-50-states-map-and-china-province-map_files/figure-html/unnamed-chunk-2-1.png differ diff --git a/post/2019-04-17-ggplot-heatmap-us-50-states-map-and-china-province-map_files/figure-html/unnamed-chunk-3-1.png b/post/2019-04-17-ggplot-heatmap-us-50-states-map-and-china-province-map_files/figure-html/unnamed-chunk-3-1.png index 8a3fa50..10ad675 100644 Binary files a/post/2019-04-17-ggplot-heatmap-us-50-states-map-and-china-province-map_files/figure-html/unnamed-chunk-3-1.png and b/post/2019-04-17-ggplot-heatmap-us-50-states-map-and-china-province-map_files/figure-html/unnamed-chunk-3-1.png differ diff --git a/post/2019-04-17-ggplot-heatmap-us-50-states-map-and-china-province-map_files/figure-html/unnamed-chunk-4-1.png b/post/2019-04-17-ggplot-heatmap-us-50-states-map-and-china-province-map_files/figure-html/unnamed-chunk-4-1.png index cdd82cf..0bb29ab 100644 Binary files a/post/2019-04-17-ggplot-heatmap-us-50-states-map-and-china-province-map_files/figure-html/unnamed-chunk-4-1.png and b/post/2019-04-17-ggplot-heatmap-us-50-states-map-and-china-province-map_files/figure-html/unnamed-chunk-4-1.png differ diff --git a/post/2019-04-17-ggplot-heatmap-us-50-states-map-and-china-province-map_files/figure-html/unnamed-chunk-4-2.png b/post/2019-04-17-ggplot-heatmap-us-50-states-map-and-china-province-map_files/figure-html/unnamed-chunk-4-2.png index 5755c95..59539c6 100644 Binary files a/post/2019-04-17-ggplot-heatmap-us-50-states-map-and-china-province-map_files/figure-html/unnamed-chunk-4-2.png and b/post/2019-04-17-ggplot-heatmap-us-50-states-map-and-china-province-map_files/figure-html/unnamed-chunk-4-2.png differ diff --git a/post/2019/02/24/shiny-in-blogdown/index.html b/post/2019/02/24/shiny-in-blogdown/index.html index f6856a3..2306aa1 100644 --- a/post/2019/02/24/shiny-in-blogdown/index.html +++ b/post/2019/02/24/shiny-in-blogdown/index.html @@ -149,10 +149,9 @@

My Shiny app example

} -
- diff --git a/post/2019/07/18/visualization-of-shap-for-xgboost/index.html b/post/2019/07/18/visualization-of-shap-for-xgboost/index.html index 1097683..060042d 100644 --- a/post/2019/07/18/visualization-of-shap-for-xgboost/index.html +++ b/post/2019/07/18/visualization-of-shap-for-xgboost/index.html @@ -362,10 +362,9 @@

References

} -
- diff --git a/post/2019/07/28/some-experience-on-writing-r-package/index.html b/post/2019/07/28/some-experience-on-writing-r-package/index.html index a6466ec..5e1e1e0 100644 --- a/post/2019/07/28/some-experience-on-writing-r-package/index.html +++ b/post/2019/07/28/some-experience-on-writing-r-package/index.html @@ -197,10 +197,9 @@

Some nice suggestions from the CRAN team when submitting the package

} -
- diff --git a/post/2019/09/15/drake-powerful-tool-for-automatic-reproducible-workflow/index.html b/post/2019/09/15/drake-powerful-tool-for-automatic-reproducible-workflow/index.html index 8227203..f927d22 100644 --- a/post/2019/09/15/drake-powerful-tool-for-automatic-reproducible-workflow/index.html +++ b/post/2019/09/15/drake-powerful-tool-for-automatic-reproducible-workflow/index.html @@ -190,10 +190,9 @@

Yang Liu } -
- diff --git a/post/2020-06-15-us-and-china-admin1-covid-19-mortality-and-incidence-rate_files/figure-html/unnamed-chunk-1-1.png b/post/2020-06-15-us-and-china-admin1-covid-19-mortality-and-incidence-rate_files/figure-html/unnamed-chunk-1-1.png index ba471c7..e4bc720 100644 Binary files a/post/2020-06-15-us-and-china-admin1-covid-19-mortality-and-incidence-rate_files/figure-html/unnamed-chunk-1-1.png and b/post/2020-06-15-us-and-china-admin1-covid-19-mortality-and-incidence-rate_files/figure-html/unnamed-chunk-1-1.png differ diff --git a/post/2020-06-15-us-and-china-admin1-covid-19-mortality-and-incidence-rate_files/figure-html/unnamed-chunk-2-1.png b/post/2020-06-15-us-and-china-admin1-covid-19-mortality-and-incidence-rate_files/figure-html/unnamed-chunk-2-1.png index 5a45592..a5d750d 100644 Binary files a/post/2020-06-15-us-and-china-admin1-covid-19-mortality-and-incidence-rate_files/figure-html/unnamed-chunk-2-1.png and b/post/2020-06-15-us-and-china-admin1-covid-19-mortality-and-incidence-rate_files/figure-html/unnamed-chunk-2-1.png differ diff --git a/post/2020-06-15-us-and-china-admin1-covid-19-mortality-and-incidence-rate_files/figure-html/unnamed-chunk-3-1.png b/post/2020-06-15-us-and-china-admin1-covid-19-mortality-and-incidence-rate_files/figure-html/unnamed-chunk-3-1.png index b58086c..d292c46 100644 Binary files a/post/2020-06-15-us-and-china-admin1-covid-19-mortality-and-incidence-rate_files/figure-html/unnamed-chunk-3-1.png and b/post/2020-06-15-us-and-china-admin1-covid-19-mortality-and-incidence-rate_files/figure-html/unnamed-chunk-3-1.png differ diff --git a/post/2020-06-15-us-and-china-admin1-covid-19-mortality-and-incidence-rate_files/figure-html/unnamed-chunk-3-2.png b/post/2020-06-15-us-and-china-admin1-covid-19-mortality-and-incidence-rate_files/figure-html/unnamed-chunk-3-2.png index e4e571e..faf3374 100644 Binary files a/post/2020-06-15-us-and-china-admin1-covid-19-mortality-and-incidence-rate_files/figure-html/unnamed-chunk-3-2.png and b/post/2020-06-15-us-and-china-admin1-covid-19-mortality-and-incidence-rate_files/figure-html/unnamed-chunk-3-2.png differ diff --git a/post/2020/01/11/working-with-3d-array-as-long-format-data-in-r/index.html b/post/2020/01/11/working-with-3d-array-as-long-format-data-in-r/index.html index 01726cc..5862a42 100644 --- a/post/2020/01/11/working-with-3d-array-as-long-format-data-in-r/index.html +++ b/post/2020/01/11/working-with-3d-array-as-long-format-data-in-r/index.html @@ -266,10 +266,9 @@

Recover the array

} -
- diff --git a/post/2020/06/12/ggplot-us-state-and-china-province-heatmap/index.html b/post/2020/06/12/ggplot-us-state-and-china-province-heatmap/index.html index 613d5d7..da41485 100644 --- a/post/2020/06/12/ggplot-us-state-and-china-province-heatmap/index.html +++ b/post/2020/06/12/ggplot-us-state-and-china-province-heatmap/index.html @@ -74,20 +74,22 @@

Yang Liu - + @@ -101,7 +103,7 @@

Yang Liu

1. US Map by state

-

The dt1 below can be used by both methods, but to use usmap only need two variables: state names and value to plot.

+

The dt1 below can be used by both methods, but using usmap only need two variables: state names and value to plot.

suppressPackageStartupMessages({
 library(ggplot2)
 library(maps)
@@ -260,14 +262,10 @@ 

Method 1. China map by province using downloaded shap files

china_map_data <- china_map_data[!is.na(province)] china_map_data <- china_map_data[AREA > 0.1] head(china_map_data, 3)
-
##       AREA PERIMETER BOU2_4M_ BOU2_4M_ID ADCODE93 ADCODE99
-## 1:  54.447    68.489        2         23   230000   230000
-## 2: 129.113   129.933        3         15   150000   150000
-## 3: 175.591    84.905        4         65   650000   650000
-##                                                            province id
-## 1:                                 <U+9ED1><U+9F99><U+6C5F><U+7701>  0
-## 2:                 <U+5185><U+8499><U+53E4><U+81EA><U+6CBB><U+533A>  1
-## 3: <U+65B0><U+7586><U+7EF4><U+543E><U+5C14><U+81EA><U+6CBB><U+533A>  2
+
##       AREA PERIMETER BOU2_4M_ BOU2_4M_ID ADCODE93 ADCODE99         province id
+## 1:  54.447    68.489        2         23   230000   230000         黑龙江省  0
+## 2: 129.113   129.933        3         15   150000   150000     内蒙古自治区  1
+## 3: 175.591    84.905        4         65   650000   650000 新疆维吾尔自治区  2
# 
 dt_china = setDT(fortify(china_map))
 head(dt_china, 3)
@@ -396,10 +394,9 @@

Method 2. Using geojsonMap (leaflet)

} -
- diff --git a/post/2020/06/14/us-and-china-admin1-covid19-mortality-and-incidence-rate/index.html b/post/2020/06/14/us-and-china-admin1-covid19-mortality-and-incidence-rate/index.html index c4a652d..d6612ae 100644 --- a/post/2020/06/14/us-and-china-admin1-covid19-mortality-and-incidence-rate/index.html +++ b/post/2020/06/14/us-and-china-admin1-covid19-mortality-and-incidence-rate/index.html @@ -70,9 +70,9 @@

Yang Liu @@ -105,10 +105,10 @@

China

dt_JUH <- get.JHU.daily() dt_CN <- dt_JUH[Country_Region %in% c("China", "Taiwan*"),] dt_CN[Country_Region=="Taiwan*", Province_State:= "Taiwan"] -dt_china_map <- readRDS("D:/liuyanguu/Blogdown/Data/dt_china.rds") +dt_china_map <- readRDS(here::here("Data/dt_china.rds")) setnames(dt_china_map, "province_EN", "state") unique(dt_CN$Province_State[!dt_CN$Province_State%in%dt_china_map$state])
-
## [1] "Hong Kong" "Macau"
+
## [1] "Hong Kong" "Macau"     "Unknown"
make_heatmap(data = dt_CN, 
              geo_data = dt_china_map,
              state_var = "Province_State", fill_var = "Case-Fatality_Ratio")
@@ -175,10 +175,9 @@

Compared to lung cancer mortality

} -
- diff --git a/post/2021/01/06/rmarkdown-to-github-pages/index.html b/post/2021/01/06/rmarkdown-to-github-pages/index.html index 5529733..a5f758a 100644 --- a/post/2021/01/06/rmarkdown-to-github-pages/index.html +++ b/post/2021/01/06/rmarkdown-to-github-pages/index.html @@ -147,7 +147,7 @@

RMarkdown into Github Pages

@@ -182,10 +182,9 @@

RMarkdown into Github Pages

} -
- diff --git a/post/2023/05/29/plot-multiple-countries-on-the-world-map/gadm36_AGO_1_sp.rds b/post/2023/05/29/plot-multiple-countries-on-the-world-map/gadm36_AGO_1_sp.rds new file mode 100644 index 0000000..9117da9 Binary files /dev/null and b/post/2023/05/29/plot-multiple-countries-on-the-world-map/gadm36_AGO_1_sp.rds differ diff --git a/post/2023/05/29/plot-multiple-countries-on-the-world-map/gadm36_GHA_1_sp.rds b/post/2023/05/29/plot-multiple-countries-on-the-world-map/gadm36_GHA_1_sp.rds new file mode 100644 index 0000000..b383ab7 Binary files /dev/null and b/post/2023/05/29/plot-multiple-countries-on-the-world-map/gadm36_GHA_1_sp.rds differ diff --git a/post/2023/05/29/plot-multiple-countries-on-the-world-map/gadm36_HTI_1_sp.rds b/post/2023/05/29/plot-multiple-countries-on-the-world-map/gadm36_HTI_1_sp.rds new file mode 100644 index 0000000..e644a17 Binary files /dev/null and b/post/2023/05/29/plot-multiple-countries-on-the-world-map/gadm36_HTI_1_sp.rds differ diff --git a/post/2023/05/29/plot-multiple-countries-on-the-world-map/gadm36_TGO_1_sp.rds b/post/2023/05/29/plot-multiple-countries-on-the-world-map/gadm36_TGO_1_sp.rds new file mode 100644 index 0000000..82fb09f Binary files /dev/null and b/post/2023/05/29/plot-multiple-countries-on-the-world-map/gadm36_TGO_1_sp.rds differ diff --git a/post/2023/05/29/plot-multiple-countries-on-the-world-map/gadm36_UGA_1_sp.rds b/post/2023/05/29/plot-multiple-countries-on-the-world-map/gadm36_UGA_1_sp.rds new file mode 100644 index 0000000..c4ec10e Binary files /dev/null and b/post/2023/05/29/plot-multiple-countries-on-the-world-map/gadm36_UGA_1_sp.rds differ diff --git a/post/2023/05/29/plot-multiple-countries-on-the-world-map/gadm36_ZAF_1_sp.rds b/post/2023/05/29/plot-multiple-countries-on-the-world-map/gadm36_ZAF_1_sp.rds new file mode 100644 index 0000000..06b8bd8 Binary files /dev/null and b/post/2023/05/29/plot-multiple-countries-on-the-world-map/gadm36_ZAF_1_sp.rds differ diff --git a/post/2023/05/29/plot-multiple-countries-on-the-world-map/index.html b/post/2023/05/29/plot-multiple-countries-on-the-world-map/index.html new file mode 100644 index 0000000..1798370 --- /dev/null +++ b/post/2023/05/29/plot-multiple-countries-on-the-world-map/index.html @@ -0,0 +1,305 @@ + + + + + + + Plot multiple countries on the world map | Welcome to my blog + + + + + + + + + +
+
+ + + + +
+ + + +

This post will show it’s quite easy to download and plot the administrative areas of multiple countries on the world map. +I will also showcase a bug that puzzled me for a long time and I recently figured out: strange connecting lines among countries!

+
+

The most straightforward way

+

(You may download the world map from Github)

+
suppressPackageStartupMessages({
+library("data.table")
+library("ggplot2")
+library("rgdal")
+library("raster")
+library("rgeos")
+library("here")
+library("ggthemes")
+})
+
+# download data from GADM directly and row-bind the spatial polygons data frame
+cnames <- c("Haiti", "Togo", "Uganda", "Ghana", "South Africa", "Angola")
+download.GADM <- function(cname0) raster::getData("GADM", country = cname0, level = 1)
+dt_geos <- raster::bind(lapply(cnames, download.GADM))
+dt_geos <- sp::spTransform(dt_geos, CRS("+proj=robin")) # robin transformation
+dt_geos_df <- broom::tidy(dt_geos, region = "GID_1")
+
+  x_min <- min(dt_geos_df$long)*1.2
+  x_max <- max(dt_geos_df$long)*1.2
+  y_min <- min(dt_geos_df$lat)* 1.2
+  y_max <- max(dt_geos_df$lat)* 1.2
+  
+  map_theme <- ggthemes::theme_map() +
+    theme(legend.position = "bottom", legend.direction = "horizontal", legend.justification = c("center"))
+  
+  shp_world_robin <- readRDS(here::here("../Data/World.shp/sp.world.robin.rds")) # this has to be sourced locally
+  ggplot() +
+    geom_polygon(data = shp_world_robin, aes(x = long, y = lat, group = group), fill="lightgray", 
+                 colour = "white", size=0.05) +
+    geom_polygon(data = dt_geos, aes(x = long, y = lat, group = group), 
+                 color = "red", size=0.05, fill="#ffc069")  +
+    # if want to crop map:
+    coord_fixed(xlim=c(x_min, x_max), ylim=c(y_min, y_max)) +
+    map_theme +
+    guides(fill = guide_legend(nrow = 1, title.position = "top"))
+

+
+
+

A more complicated example

+

Now we will also plot some values on the map. These values need to be merged directly to the spatial polygons sp data frame, or the corresponding data frame. Here I show the second approach: transforming sp data frame into a data frame first. And we can supply either the sp data frame or the data frame to ggplot2::geom_polygon.

+

Note that in GADM admin 1 shape files, GID_1 is a unique identifier even after binding multiple countries, here we intentionally use region names (the NAME_1 column from GADM file) as the region identifier to show a bug. And in my case we indeed have to use the region names to merge with estimates.

+
# `region` will become the id/group used to identify each area 
+# Either broom::tidy or ggplot2::fortify can work:
+# dt_geos_data <- broom::tidy(dt_geos, region = "NAME_1")
+# dt_geos_data <- ggplot2::fortify(dt_geos, region = "NAME_1")
+
+download.GADM.df <- function(cname0){
+  dfsp <- raster::getData("GADM", country = cname0, level = 1) # download data from GADM
+  dfsp <- sp::spTransform(dfsp, CRS("+proj=robin")) # robin transformation
+  dfsf <- broom::tidy(dfsp, region = "NAME_1") # get df
+  dfsf$country <- cname0 # add a country identifier 
+  return(setDT(dfsf))
+}
+
+dt_geos_data <- rbindlist(lapply(cnames, download.GADM.df))
+
+# imaging we want to plot some estimates, not only the map
+dt_admin1 <- unique(setDT(dt_geos_data)[,.(country, id)])
+set.seed(1234)
+dt_admin1$value <- rgamma(nrow(dt_admin1), shape = 4, scale = 15) # some random values
+
+setkey(dt_geos_data, country, id)
+setkey(dt_admin1, country, id)
+dt_geos_data_value <- dt_geos_data[dt_admin1]
+
+legend_break <- c(0, 25, 50, 75,  100, 150, 200, 300, 500)
+legend_label <- c("≤25", "25 to 50", "50 to 75", "75 to 100", "100 to 150","150 to 200","200 to 300",">300")
+legend_color <- c("#80BD41", "#CFF4FF","#feec9f","#ffc069","#fa8c16","#d46b08","#ad4e00","#612500")
+dt_geos_data_value$col <- cut(dt_geos_data_value$value, breaks = legend_break, labels = legend_label)
+
+
+

The bug: strange connecting lines

+
ggplot() +
+  geom_polygon(data = shp_world_robin, aes(x = long, y = lat, group = group), fill="lightgray", 
+               colour = "white", size=0.05) +
+  geom_polygon(data = dt_geos_data_value, aes(x = long, y = lat, group = group, fill= col), 
+               color = "red", size=0.05)  +
+  coord_fixed(xlim=c(x_min, x_max), ylim=c(y_min, y_max)) + # crop map
+  scale_fill_manual("Some random values for example", values = legend_color, drop = FALSE) +# Keep all legend item
+  map_theme + 
+  guides(fill = guide_legend(nrow = 1, title.position = "top"))
+

+

The reason is some countries have shared admin names, which comes from NAME_1 in the GADM file (during the broom::tidy or ggplot2::fortify step).
+Here, Haiti and Togo have these shared admin 1 names: “Centre.1” and “Centre.2” (see the table below).
+The bug is solved by setting group to a unique identifier in the ggplot2::geom_polygon:

+
dt_geos_data_value[, county_group := paste(country, group, sep = "_")]
+unique(dt_geos_data_value[group %in% c("Centre.1"),.(country, id, group, county_group, value)])
+
##    country     id    group   county_group    value
+## 1:   Haiti Centre Centre.1 Haiti_Centre.1 24.09058
+## 2:    Togo Centre Centre.1  Togo_Centre.1 54.32445
+
ggplot() +
+    geom_polygon(data = shp_world_robin, aes(x = long, y = lat, group = group), fill="lightgray", 
+                 colour = "white", size=0.05) +
+    geom_polygon(data = dt_geos_data_value, aes(x = long, y = lat, group = county_group, fill= col), 
+                 # it's important to set `group = county_group` instead of `group = group`
+                 color = "red", size=0.05)  +
+    coord_fixed(xlim=c(x_min, x_max), ylim=c(y_min, y_max))  +
+    scale_fill_manual("Some random values for example", values = legend_color, drop = FALSE) +# Keep all legend item
+    map_theme + 
+    guides(fill = guide_legend(nrow = 1, title.position = "top"))
+

+

Hope this post is helpful to people who meet similar issues!

+
+ +
+ + + + + + + + + + + + + + + + + +
+ + +
+
+ + + +
+ + + +
+ + +comments powered by Disqus + + + + +
+ + + + + + + + + + + + + + + + + + diff --git a/post/2023/05/29/plot-multiple-countries-on-the-world-map/index_files/figure-html/unnamed-chunk-1-1.png b/post/2023/05/29/plot-multiple-countries-on-the-world-map/index_files/figure-html/unnamed-chunk-1-1.png new file mode 100644 index 0000000..660cd05 Binary files /dev/null and b/post/2023/05/29/plot-multiple-countries-on-the-world-map/index_files/figure-html/unnamed-chunk-1-1.png differ diff --git a/post/2023/05/29/plot-multiple-countries-on-the-world-map/index_files/figure-html/unnamed-chunk-2-1.png b/post/2023/05/29/plot-multiple-countries-on-the-world-map/index_files/figure-html/unnamed-chunk-2-1.png new file mode 100644 index 0000000..7847d49 Binary files /dev/null and b/post/2023/05/29/plot-multiple-countries-on-the-world-map/index_files/figure-html/unnamed-chunk-2-1.png differ diff --git a/post/2023/05/29/plot-multiple-countries-on-the-world-map/index_files/figure-html/unnamed-chunk-3-1.png b/post/2023/05/29/plot-multiple-countries-on-the-world-map/index_files/figure-html/unnamed-chunk-3-1.png new file mode 100644 index 0000000..7847d49 Binary files /dev/null and b/post/2023/05/29/plot-multiple-countries-on-the-world-map/index_files/figure-html/unnamed-chunk-3-1.png differ diff --git a/post/2023/05/29/plot-multiple-countries-on-the-world-map/index_files/figure-html/unnamed-chunk-4-1.png b/post/2023/05/29/plot-multiple-countries-on-the-world-map/index_files/figure-html/unnamed-chunk-4-1.png new file mode 100644 index 0000000..8efd73b Binary files /dev/null and b/post/2023/05/29/plot-multiple-countries-on-the-world-map/index_files/figure-html/unnamed-chunk-4-1.png differ diff --git a/post/2023/05/29/plot-multiple-countries-on-the-world-map/temp/World map.png b/post/2023/05/29/plot-multiple-countries-on-the-world-map/temp/World map.png new file mode 100644 index 0000000..6b0aefb Binary files /dev/null and b/post/2023/05/29/plot-multiple-countries-on-the-world-map/temp/World map.png differ diff --git a/post/2023/05/29/plot-multiple-countries-on-the-world-map/temp/World map_exp1.png b/post/2023/05/29/plot-multiple-countries-on-the-world-map/temp/World map_exp1.png new file mode 100644 index 0000000..3e67f34 Binary files /dev/null and b/post/2023/05/29/plot-multiple-countries-on-the-world-map/temp/World map_exp1.png differ diff --git a/post/2023/05/29/plot-multiple-countries-on-the-world-map/temp/World map_exp2.png b/post/2023/05/29/plot-multiple-countries-on-the-world-map/temp/World map_exp2.png new file mode 100644 index 0000000..a582d6a Binary files /dev/null and b/post/2023/05/29/plot-multiple-countries-on-the-world-map/temp/World map_exp2.png differ diff --git a/post/index.html b/post/index.html index 3767d9b..caa47a0 100644 --- a/post/index.html +++ b/post/index.html @@ -1,268 +1,265 @@ - - - - - - - Posts | Welcome to my blog - - - - - - - - - - -
-
- - - - -
-
- - - Github - - -
-

RMarkdown to Github Pages

-
- Yang Liu - / - 2021-01-06 -
-
- - - - -
Thumbnail
- - - - - + + + + + + + Posts | Welcome to my blog + + + + + + + +
+ + +
+
+ + + + +
+
+ + + Data Visualization + + +
+

Plot multiple countries on the world map

+
+ Yang Liu + / + 2023-05-29 +
+ +
+ +
+
+ + + Github + + +
+

RMarkdown to Github Pages

+
+ Yang Liu + / + 2021-01-06 +
+ -
- -
-
- - - Data Visualization - - -
-

US and China Admin1 COVID19 mortality and incidence rate

-
- Yang Liu - / - 2020-06-14 -
- +
+ +
+
+ + + Data Visualization + + +
+

US and China Admin1 COVID19 mortality and incidence rate

+
+ Yang Liu + / + 2020-06-14 +
+ -
- -
-
- - - Data Visualization - - -
-

ggplot US state and China province heatmap

-
- Yang Liu - / - 2020-06-12 -
- +
+ +
+
+ + + Data Visualization + + +
+

ggplot US state and China province heatmap

+
+ Yang Liu + / + 2020-06-12 +
+ -
- -
-
- - - Data Engineering - - -
-

Working with 3D array as long-format data in R

-
- Yang Liu - / - 2020-01-11 -
- -
- -
-
- - - Reproducible Workflow - - -
-

Drake: powerful tool for automatic reproducible workflow

-
- Yang Liu - / - 2019-09-15 -
- +
+ +
+
+ + + Data Engineering + + +
+

Working with 3D array as long-format data in R

+
+ Yang Liu + / + 2020-01-11 +
+ +
+ +
+
+ + + Reproducible Workflow + + +
+

Drake: powerful tool for automatic reproducible workflow

+
+ Yang Liu + / + 2019-09-15 +
+ -
- -
-
- - - R package - - -
-

Notes on writing an R package

-
- Yang Liu - / - 2019-07-28 -
- -
- -
+RMarkdown file could be very slow to generate if lots of calculations are involved. Any small revise makes you rerun everything. When we use drake we can do all the… + +
+
+ +
-
- - - -
+ + + +
+ + + + + +
+ + - - - - - - - - - - - - - + + + + + diff --git a/post/index.xml b/post/index.xml index 140b723..121fe92 100644 --- a/post/index.xml +++ b/post/index.xml @@ -6,7 +6,18 @@ Recent content in Posts on Welcome to my blog Hugo -- gohugo.io en-us - Wed, 06 Jan 2021 00:00:00 +0000 + Mon, 29 May 2023 00:00:00 +0000 + + Plot multiple countries on the world map + https://liuyanguu.github.io/post/2023/05/29/plot-multiple-countries-on-the-world-map/ + Mon, 29 May 2023 00:00:00 +0000 + + https://liuyanguu.github.io/post/2023/05/29/plot-multiple-countries-on-the-world-map/ + This post will show it’s quite easy to download and plot the administrative areas of multiple countries on the world map. I will also showcase a bug that puzzled me for a long time and I recently figured out: strange connecting lines among countries! +The most straightforward way (You may download the world map from Github) +suppressPackageStartupMessages({ library(&quot;data.table&quot;) library(&quot;ggplot2&quot;) library(&quot;rgdal&quot;) library(&quot;raster&quot;) library(&quot;rgeos&quot;) library(&quot;here&quot;) library(&quot;ggthemes&quot;) }) # download data from GADM directly and row-bind the spatial polygons data frame cnames &lt;- c(&quot;Haiti&quot;, &quot;Togo&quot;, &quot;Uganda&quot;, &quot;Ghana&quot;, &quot;South Africa&quot;, &quot;Angola&quot;) download. + + RMarkdown to Github Pages https://liuyanguu.github.io/post/2021/01/06/rmarkdown-to-github-pages/ @@ -24,7 +35,7 @@ Create in the root directory a yaml file: " Sun, 14 Jun 2020 00:00:00 +0000 https://liuyanguu.github.io/post/2020/06/14/us-and-china-admin1-covid19-mortality-and-incidence-rate/ - US China Compared to lung cancer mortality Similar to the last heatmap post I just revised, here I wrapped up the function and showing COVID-19 data just downloaded from JHU CSSE Github page. The function can take a given dataset and plot designated variable. + US China Compared to lung cancer mortality Similar to the last heatmap post I just revised, here I wrapped up the function and showing COVID-19 data just downloaded from JHU CSSE Github page. The function can take a given dataset and plot designated variable. Code hosted on my Github repo. US # major function, can download from Github repo Blogdown/hugo-xmag/Code source(here::here(&quot;Code/COVID_make_map.R&quot;)) # US dt_JUH_US &lt;- get.JHU.us.state() make_heatmap(data = dt_JUH_US, geo_data = get_state_name(), state_var = &quot;Province_State&quot;, fill_var = &quot;Mortality_Rate&quot;, label_var = &quot;abb&quot;) us_maps &lt;- lapply(c(&quot;Mortality_Rate&quot;, &quot;Incident_Rate&quot;, &quot;Testing_Rate&quot;, &quot;Hospitalization_Rate&quot;), make_heatmap, data = dt_JUH_US, geo_data = get_state_name(), state_var = &quot;Province_State&quot;, label_var = &quot;abb&quot;) plot_grid &lt;- gridExtra::grid. @@ -44,9 +55,9 @@ US # major function, can download from Github repo Blogdown/hugo-xmag/Code sourc Sat, 11 Jan 2020 00:00:00 +0000 https://liuyanguu.github.io/post/2020/01/11/working-with-3d-array-as-long-format-data-in-r/ - Example: a 3-D array with dimension 4x3x2 Melt into long Recover the array check.and.install.pkgs &lt;- function(pkgs){ new.packages &lt;- pkgs[!pkgs %in% installed.packages()[,&quot;Package&quot;]] if(length(new.packages)) install.packages(new.packages, dependencies = TRUE) suppressPackageStartupMessages(invisible(lapply(pkgs, library, character.only = TRUE))) } check.and.install.pkgs(c(&quot;data.table&quot;, &quot;reshape2&quot;, &quot;scatterplot3d&quot;)) Happy New Year! + Example: a 3-D array with dimension 4x3x2 Melt into long Recover the array check.and.install.pkgs &lt;- function(pkgs){ new.packages &lt;- pkgs[!pkgs %in% installed.packages()[,&quot;Package&quot;]] if(length(new.packages)) install.packages(new.packages, dependencies = TRUE) suppressPackageStartupMessages(invisible(lapply(pkgs, library, character.only = TRUE))) } check.and.install.pkgs(c(&quot;data.table&quot;, &quot;reshape2&quot;, &quot;scatterplot3d&quot;)) Happy New Year! Recently I spent some time working with array in R. -I believe it is a bad idea to work with array using for loop, which is both slow and error-prone. +I believe it is a bad idea to work with array using for loop, which is both slow and error-prone. We can just melt it into a long data, do the work, and arrange back into array in the end if needed. @@ -65,8 +76,9 @@ RMarkdown file could be very slow to generate if lots of calculations are involv Sun, 28 Jul 2019 00:00:00 +0000 https://liuyanguu.github.io/post/2019/07/28/some-experience-on-writing-r-package/ - Some of my own experience On description Namespace load vs attach Documentation Potential problems when checking the package Some nice suggestions from the CRAN team when submitting the package Although ‘SHAPforxgboost’ is not a package too complicated, it took me some time to get the package pass all the cran check. Now (Aug.03,2019) it is available on cran. Install by either -install.packages(&quot;SHAPforxgboost&quot;) or + Some of my own experience On description Namespace load vs attach Documentation Potential problems when checking the package Some nice suggestions from the CRAN team when submitting the package Although ‘SHAPforxgboost’ is not a package too complicated, it took me some time to get the package pass all the cran check. Now (Aug.03,2019) it is available on cran. Install by either +install.packages(&quot;SHAPforxgboost&quot;) or +devtools::install_github(&quot;liuyanguu/SHAPforxgboost&quot;) Use the ‘usethis’ package https://usethis.r-lib.org/ to set up the structure of the package. @@ -75,9 +87,9 @@ install.packages(&quot;SHAPforxgboost&quot;) or Thu, 18 Jul 2019 00:00:00 +0000 https://liuyanguu.github.io/post/2019/07/18/visualization-of-shap-for-xgboost/ - The SHAPforxgboost package Why SHAP values Local explanation Consistency in global feature importance SHAP plots Summary plot Dependence plot Interaction effects SHAP force plot References The SHAPforxgboost package I wrote the R package SHAPforxgboost to cover all the plotting functions illustrated in this post. This post serves as the vignette for the package. + The SHAPforxgboost package Why SHAP values Local explanation Consistency in global feature importance SHAP plots Summary plot Dependence plot Interaction effects SHAP force plot References The SHAPforxgboost package I wrote the R package SHAPforxgboost to cover all the plotting functions illustrated in this post. This post serves as the vignette for the package. Please install from CRAN or Github. -install.packages(&quot;SHAPforxgboost&quot;) # or devtools::install_github(&quot;liuyanguu/SHAPforxgboost&quot;) Why SHAP values SHAP’s main advantages are local explanation and consistency in global model structure. +install.packages(&quot;SHAPforxgboost&quot;) # or devtools::install_github(&quot;liuyanguu/SHAPforxgboost&quot;) Why SHAP values SHAP’s main advantages are local explanation and consistency in global model structure. @@ -86,10 +98,10 @@ install.packages(&quot;SHAPforxgboost&quot;) # or devtools::install_gith Sun, 24 Feb 2019 00:00:00 +0000 https://liuyanguu.github.io/post/2019/02/24/shiny-in-blogdown/ - How to embed Shiny My Shiny app example How to embed Shiny Since Blogdown is for static websites, it cannot run Shiny in rmarkdown directly. According to discussion here and document here. + How to embed Shiny My Shiny app example How to embed Shiny Since Blogdown is for static websites, it cannot run Shiny in rmarkdown directly. According to discussion here and document here. The only way to do it is using iframe and write outside the chunk: &lt;iframe src="https://liuyanguu.shinyapps.io/bcl_app/" width=1000 height=800"&gt;&lt;/iframe&gt; -There is also a built-in function in knitr to do the same thing and write in the chunk. +There is also a built-in function in knitr to do the same thing and write in the chunk. The only difference is that we can only set height, and the shiny app would appear slightly different with the siderbarPanel at the top and the mainPanel beneath it. @@ -98,8 +110,8 @@ There is also a built-in function in knitr to do the same thing and write in the Thu, 15 Nov 2018 00:00:00 +0000 https://liuyanguu.github.io/post/2018/11/15/xgboost-dart-example/ - Data Shrinkage DART: Dropout - MART skip_drop rate_drop one_drop It is always a good idea to study the packaged algorithm with a simple example. Inspired by my colleague Kodi’s excellent work showing how xgboost handles missing values, I tried a simple 5x2 dataset to show how shrinkage and DART influence the growth of trees in the model. -Data set.seed(123) n0 &lt;- 5 X &lt;- data. + Data Shrinkage DART: Dropout - MART skip_drop rate_drop one_drop It is always a good idea to study the packaged algorithm with a simple example. Inspired by my colleague Kodi’s excellent work showing how xgboost handles missing values, I tried a simple 5x2 dataset to show how shrinkage and DART influence the growth of trees in the model. +Data set.seed(123) n0 &lt;- 5 X &lt;- data.frame(x1 = runif(n0), x2 = runif(n0)) Y &lt;- c(1, 5, 20, 50, 100) cbind(X, Y) ## x1 x2 Y ## 1 0. @@ -108,7 +120,7 @@ Data set.seed(123) n0 &lt;- 5 X &lt;- data. Sun, 14 Oct 2018 00:00:00 +0000 https://liuyanguu.github.io/post/2018/10/14/shap-visualization-for-xgboost/ - Example 1 SHAP summary plot Alternative ways: SHAP dependence plot SHAP interaction effect plot SHAP force plot Example 2 Summary plot Dependence plot for each feature Force plot Stack plot by clustering groups Update 19/07/21: + Example 1 SHAP summary plot Alternative ways: SHAP dependence plot SHAP interaction effect plot SHAP force plot Example 2 Summary plot Dependence plot for each feature Force plot Stack plot by clustering groups Update 19/07/21: Since my R Package SHAPforxgboost has been released on CRAN, I updated this post using the new functions and illustrate how to use these functions using two datasets. For more information, please refer to: SHAP visualization for XGBoost in R @@ -118,7 +130,7 @@ Since my R Package SHAPforxgboost has been released on CRAN, I updated this post Fri, 20 Jul 2018 00:00:00 +0000 https://liuyanguu.github.io/post/2018/07/20/spatial-data-in-r-dividing-raster-layers-into-equal-area-rings/ - Saturation in ten cities with equal-area rings R Code for one city Results for the ring saturations Average saturation in each ring Methodology Original Code This data visualization example include: + Saturation in ten cities with equal-area rings R Code for one city Results for the ring saturations Average saturation in each ring Methodology Original Code This data visualization example include: * Import .img file as a raster * Turn raster into a data.frame of points (coordinates) and values * Dividing the points into 100 equal-area rings @@ -132,19 +144,10 @@ Since my R Package SHAPforxgboost has been released on CRAN, I updated this post Mon, 16 Jul 2018 00:00:00 +0000 https://liuyanguu.github.io/post/2018/07/16/how-to-draw-heatmap-with-colorful-dendrogram/ - Data Version 1: Color both the branches and labels Version 2: color only the labels. Version 3: If there is no color, and we do not reorder the branches This data visualization example include: + Data Version 1: Color both the branches and labels Version 2: color only the labels. Version 3: If there is no color, and we do not reorder the branches This data visualization example include: * Hierarchical clustering, dendrogram and heat map based on normalized odds ratios * The dendrogram was built separately to give color to dendrogram’s branches/labels based on cluster using dendextend -* Heatmap is made by heatmap. - - - - eXtreme Gradient Boosting (XGBoost): Better than random forest or gradient boosting - https://liuyanguu.github.io/post/2018/07/09/extreme-gradient-boosting-xgboost-better-than-random-forest-or-gradient-boosting/ - Mon, 09 Jul 2018 00:00:00 +0000 - - https://liuyanguu.github.io/post/2018/07/09/extreme-gradient-boosting-xgboost-better-than-random-forest-or-gradient-boosting/ - Overview About the Data 1. Extreme Gradient Boosting 2. Gradient boosting 3. Random Forest 4. Lasso 5. Best Subset Compare MSEs Original Code are stored on github Overview I recently had the great pleasure to meet with Professor Allan Just and he introduced me to eXtreme Gradient Boosting (XGBoost). I have extended the earlier work on my old blog by comparing the results across XGBoost, Gradient Boosting (GBM), Random Forest, Lasso, and Best Subset. +* Heatmap is made by heatmap.2 from gplots using the built dendrogram @@ -154,7 +157,7 @@ Since my R Package SHAPforxgboost has been released on CRAN, I updated this post https://liuyanguu.github.io/post/2018/07/01/catalogue-of-my-old-blog/ Introducing my new blog written solely in R Markdown When I realized it was so convenient to write blog directly using R Markdown, I searched if there is a specific tool for it. And I found Blogdown, an R package developed by Yihui Xie, who also developed R Markdown. - Catalog As a summary, I would like to create a catalog for the main topics I wrote on google blogger before July 2018. +Catalog As a summary, I would like to create a catalog for the main topics I wrote on google blogger before July 2018. diff --git a/post/page/1/index.html b/post/page/1/index.html index 5bc707b..c055f15 100644 --- a/post/page/1/index.html +++ b/post/page/1/index.html @@ -1 +1,10 @@ -https://liuyanguu.github.io/post/ \ No newline at end of file + + + + https://liuyanguu.github.io/post/ + + + + + + diff --git a/post/page/2/index.html b/post/page/2/index.html index a6166a7..a568b74 100644 --- a/post/page/2/index.html +++ b/post/page/2/index.html @@ -1,194 +1,227 @@ - - - - - - - Posts | Welcome to my blog - - - - - - - - - - -
-
- - - - -
-
- - - R package - - -
-

SHAP for XGBoost in R: SHAPforxgboost

-
- Yang Liu - / - 2019-07-18 -
-
- - - - -
Thumbnail
- - - - - + + + + + + + Posts | Welcome to my blog + + + + + + + +
+ + +
+
+ + + + +
+
+ + + R package + + +
+

Notes on writing an R package

+
+ Yang Liu + / + 2019-07-28 +
+ +
+ +
+
+ + + R package + + +
+

SHAP for XGBoost in R: SHAPforxgboost

+
+ Yang Liu + / + 2019-07-18 +
+ -
- -
-
- - - Data Visualization - - -
-

Shiny in Blogdown

-
- Yang Liu - / - 2019-02-24 -
- +
+ +
+
+ + + Data Visualization + + +
+

Shiny in Blogdown

+
+ Yang Liu + / + 2019-02-24 +
+ -
- -
-
- - - Machine Learning - - -
-

Study shrinkage and DART in xgboost modeling using a simple dataset

-
- Yang Liu - / - 2018-11-15 -
- +
+ +
+
+ + + Machine Learning + + +
+

Study shrinkage and DART in xgboost modeling using a simple dataset

+
+ Yang Liu + / + 2018-11-15 +
+ -
- -
-
- - - Machine Learning - - -
-

SHAP Visualization in R (first post)

-
- Yang Liu - / - 2018-10-14 -
- +
+ +
+
+ + + Machine Learning + + +
+

SHAP Visualization in R (first post)

+
+ Yang Liu + / + 2018-10-14 +
+ -
- -
-
- - - Data Visualization - - -
-

Spatial data in R: Dividing raster layers into equal-area rings

-
- Yang Liu - / - 2018-07-20 -
- +
+ +
+
+ + + Data Visualization + + +
+

Spatial data in R: Dividing raster layers into equal-area rings

+
+ Yang Liu + / + 2018-07-20 +
+
- -
+[…] […] list.of.packages <- c("raster",… + +
+
+ +
-
- - - -

+ + + +
+ + + + + +
+ + - - - - - - - - - - - - - + + + + + diff --git a/post/page/3/index.html b/post/page/3/index.html index 37511fc..5806834 100644 --- a/post/page/3/index.html +++ b/post/page/3/index.html @@ -23,7 +23,7 @@ @@ -146,9 +148,8 @@

Catalog of my old blog< ga('send', 'pageview'); } - - - + +