-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path01_dateCleaning.R
66 lines (58 loc) · 4.43 KB
/
01_dateCleaning.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
### Date cleaning script
source("00_fileReading.R")
### Cleaning dates ---------------------------------------------------------------------------------------------
## Sampling Event sheet
str(sE.df)
## converting all dates into date format
sE.df <- sE.df %>% mutate(eventDate=as.factor(eventDate))
levels(sE.df$eventDate)
sE.df <- mutate(sE.df, eventDate=fct_recode(eventDate
,"2016-02/2016-05"="02-2016/05"
,"2016-06-10/2016-06-23"="10-23 JUNI 2016"
,"2011-10-16/2011-10-30"="16-30 OKTOBER 2011"
,"2006-05-19/2006-06-20"="19 MEI-20 JUNI 2006"
,"2011-05-24/2011-06-06"="24 MEI-6 JUNI 2011"
,"2004-10-24/2004-11-09"="24 OKTOBER-9 NOVEMBER 2004"
,"2012-05-27/2012-06-06"="27 MEI-06 JUNI 2012"
,"2006-05-27/2006-06-07"="27 MEI-07 JUNI 2006"
,"2017-10-23/2017-06-13"="27 OKTOBER-13 JUNI 2017"
,"2013-04/2013-05"="APRIL-MEI /2013"
,"2015-06/2015-08"="JUNI-AGUSTUS 2015"))
## checking all sampling event with dates separated with |
grep("|",sE.df$eventDate)
sE.df %>% filter(eventDate=="2013-02-25/2013-03-01 | 2013-06-15/2013-07-05")
occ.df %>% filter(eventID=="UI-2014WM-RL010-GP001")
# cases are unique so the best way is to change the remaining incorrect format one by one
sE.df <- mutate(sE.df, eventDate=fct_recode(eventDate
,"1998-01/1998-12" = "1998-01/1998-02|1998-08/1998-12"
,"1998-12/2000-07" = "1998-12 | 2000-07"
,"1999-08/2000-01" = "1999-08 | 1999-10 | 2000-01"
,"1999-08/2000-02" = "1999-08 | 1999-10 | 2000-02"
,"1999-08/2000-03" = "1999-08 | 1999-10 | 2000-03"
,"1999-08/2000-04" = "1999-08 | 1999-10 | 2000-04"
,"1999-09-14/1999-09-29" = "1999-09-14/15 | 1999-09-21/22 | 1999-09-28/29"
,"2000-05-08/2000-06-26" = "2000-05-08 | 2000-06-06 | 2000-06-26"
,"2000-06/2000-11" = "2000-06 | 2000-09 | 2000-11"
,"2000-08-24/2000-11-13" = "2000-08-24 | 2000-09-18 | 2000-10-16 | 2000-11-13"
,"2002-08/2003-06" = "2002-08/11| 2003-04/06"
,"2002-09-01/2002-09-29" = "2002-09-1 | 2002-09-15 | 2002-09-29"
,"2005-07-05/2005-08-17" = "2005-07-05/06 | 2005-07-19/20 | 2005-08-03/04 | 2005-08-17"
,"2006-07-24/2006-09-01" = "2006-07-24/25 | 2006-08-09/10 | 2006-08-31/2006-09-01"
,"2008-03/2008-07" = "2008-03/05|2008-07"
,"2009-01/2009-04" = "2009-01|2009-04"
,"2009-08/2011-07" = "2009-08/12|2010-07/12|2011-07"
,"2010-02-15/2010-08-08" = "2010-02-15/2010-02-17 | 2010-08-04/2010-08-08"
,"2010-04-02/2010-08-08" = "2010-04-02/04| 2010-08-04/08"
,"2010-09/2011-05" = "2010-09|2011-05"
,"2012-04/07" = "2012-04|07"
,"2012-06/2012-10" = "2012-06| 2012-10"
,"2012-10-3/2012-11-9" = "2012-10-3/5/2012-11-1/9"
,"2012-10/2013-03" = "2012-10/12 | 2013-03"
,"2013-02-25/2013-07-05" = "2013-02-25/2013-03-01 | 2013-06-15/2013-07-05"
,"2015-06/2015-12" = "2015-06/2015-08 | 2015-12"))
# checking whether there still exist "|"
sum(is.na(str_extract("|", as.character(sE.df$eventDate))))
levels(sE.df$eventDate)
# Occurrence
occ.df$eventDate<-sE.df[match(occ.df$eventID,sE.df$eventID),"eventDate"]
str(occ.df)