-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprepare_altered_temp.py
244 lines (213 loc) · 10.4 KB
/
prepare_altered_temp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
# calculate temperature anomalies with respect to 2020
# prepare Figure S7
# import packages
import xarray as xr
import matplotlib.pyplot as plt
import numpy as np
import os
import datetime
import pandas as pd
from scipy.stats import pearsonr
# set working directory
path = os.path.dirname(os.path.abspath(__file__))
os.chdir(path)
#%%
# import MAR data
# define data directory
data_dir = '../data'
# define run of climate model MAR
MAR_run = 'CESM2_ssp585' #'CESM2_ssp126'#
var_name = 'ST2'
# Average over how many days?
MODIS_day_average=8
# Percentile:
MODIS_percentile=99
# Total time period (in days)
MODIS_total_days=6944 # From 1 January 2001 to 1 January 2020 = 6939 + 5 days to be divisble by 8: easier for later on (so approx. 19 years)
# open all .nc files in folder
data_raw = xr.open_mfdataset(f'{data_dir}/{MAR_run}/{var_name}_*.nc')
# select only surface temperature from files
surface_temperature=data_raw[f'{var_name}'][:]
# convert to datetimeindex (if not already datetimeindex)
if str(surface_temperature.TIME.values.dtype) == 'object':
datetimeindex = surface_temperature.indexes['TIME'].to_datetimeindex()
surface_temperature['TIME'] = datetimeindex
# average 8 days (synchronized with MODIS obs) (start date of used MODIS data is 2001/01/01, end date is 2019/12/27), every date represents the first day of the 8-day period
# MAR data runs from 1980/01/01 to 2100/12/31
# find first day of 8-day period of MAR data (so that the start date is aligned with the MODIS data)
first_modis_date = datetime.date(2001,1,1)
last_modis_date = datetime.date(2020,1,1)
first_MAR_date = datetime.date(1981,1,1) # (1980,1,1)
last_MAR_date = datetime.date(2100,12,31) # (2100,12,31)
n_8day_cycles_before = (first_modis_date - first_MAR_date)/8
n_8day_cycles_after = (last_MAR_date - first_modis_date)/8
# define start and end day of MAR data
MAR_start = np.datetime64(first_modis_date - datetime.timedelta(days=(n_8day_cycles_before.days*8)))
MAR_end = np.datetime64(first_modis_date + datetime.timedelta(days=(n_8day_cycles_after.days*8)))
# drop irrelevant days of MAR data
surface_temperature = surface_temperature.where(surface_temperature.TIME > MAR_start,drop=True)
surface_temperature = surface_temperature.where(surface_temperature.TIME < MAR_end, drop=True)
# if time is not divisible by 8 days drop last x entries
surface_temperature = surface_temperature[:(len(surface_temperature)//8)*8]
#%%
# check that the difference between dates in MAR is not too big
if (last_MAR_date - first_MAR_date).days - len(surface_temperature) > 50:
print('check MAR dates!')
# average surface temperature over 8-days
surface_temperature_8day = surface_temperature.coarsen(TIME=8).mean()
# timestamp now indicates the middle of the interval, while the modis timestamp indicates the beginning of the 8-day period
surface_temperature_8day["modis_time"] = surface_temperature_8day.TIME - pd.Timedelta(4, unit='D')
#%%
# function extract data at meteorite and toclassify locations
def extractdata(
data_raw,savename,variablename,file_locs_mets,file_locs_toclassify):
# import meteorite locations (gridded)
locs_mets = pd.read_csv('../data/'+file_locs_mets+'.csv')
# select values at meteorite locations (change coordinates to MAR values in km)
data_at_mets = data_raw.interp(
X=locs_mets.x.to_xarray()/1000.,
Y=locs_mets.y.to_xarray()/1000.)
# export data at meteorite locations
data_at_mets_df = data_at_mets.to_dataframe(name=variablename)[
['X','Y',variablename]]
data_at_mets_df.to_csv(
'../data/'+savename+'_at_mets.csv',
header=True,index=False)
# import locations to be classified
locs_toclass = pd.read_csv('../data/'+file_locs_toclassify+'.csv')
# select values at locations to be classified
data_at_toclass = data_raw.interp(
X=locs_toclass.x.to_xarray()/1000.,
Y=locs_toclass.y.to_xarray()/1000.)
# export data
data_at_toclass_df = data_at_toclass.to_dataframe(name=variablename)[
['X','Y',variablename]]
return data_at_mets_df, data_at_toclass_df
# function to calculate the 99th percentile over a 19yr period
def timemask(start_date):
# define timemask
timemask_left = np.datetime64(start_date - datetime.timedelta(days=1))
timemask_right = np.datetime64(start_date + datetime.timedelta(days=19*365.25))
timemask = ((surface_temperature_8day['modis_time'] >=
timemask_left) & (surface_temperature_8day['modis_time'] <=
timemask_right))
# select data over the period
surface_temperature_8day_modistime = surface_temperature_8day.where(timemask,drop=True)
# rechunk dataarray along time dimension
surface_temperature_rechunked = surface_temperature_8day_modistime.chunk({'TIME': -1})
# calculate 99th percentile
surf_temp_99thperc = surface_temperature_rechunked.quantile(0.99,dim='TIME')
# use function extractdata to extract data at meteorite and toclassify locations
data_at_mets, data_at_toclass = extractdata(surf_temp_99thperc,str(start_date),'99th_MAR','stempPERC99_at_mets','stempPERC99_at_toclass')
return(data_at_mets, data_at_toclass)
#%%
# select values on meteorite locations and on other observation locations
# read in meteorite location file
locs_mets = pd.read_csv('../data/stempPERC99_at_mets.csv')
locs_toclass = pd.read_csv('../data/stempPERC99_at_toclass.csv')
# use np.round to account for tiny differences (order 10^-9) in coordinates that are likely introduced through conversion from meters to kms
# locs_toclass.x = np.round(locs_toclass.x)
# locs_toclass.y = np.round(locs_toclass.y)
yrs = np.arange(2001,2082,1)
for yr in yrs:
start_date = datetime.date(yr,1,1)
print(start_date)
MAR_at_mets, MAR_at_toclass = timemask(start_date)
# prepare dataframe for merge
# reset coordinates to meters
MAR_at_mets['x'] = MAR_at_mets.X*1000.
MAR_at_mets['y'] = MAR_at_mets.Y*1000.
MAR_at_mets = MAR_at_mets.drop(['X','Y'],axis=1)
# rename column according to starting year of 19 year period
MAR_at_mets = MAR_at_mets.rename(columns={"99th_MAR":str(yr)})
# merge year with all observations
locs_mets = pd.merge(locs_mets,MAR_at_mets)
# prepare dataframe for merge
# reset coordinates to meters
# use np.round to account for tiny differences (order 10^-9) in coordinates that are likely introduced through conversion from meters to kms
MAR_at_toclass['x'] = np.round(MAR_at_toclass.X*1000.)
MAR_at_toclass['y'] = np.round(MAR_at_toclass.Y*1000.)
MAR_at_toclass = MAR_at_toclass.drop(['X','Y'],axis=1)
# rename column according to starting year of 19 year period
MAR_at_toclass = MAR_at_toclass.rename(columns={"99th_MAR":str(yr)})
# merge year with all observations
locs_toclass = pd.merge(MAR_at_toclass,locs_toclass, on=['x','y'])
#%%
# compare MODIS observations to MAR data
# merge all observations
merged_obs = pd.merge(locs_toclass,locs_mets,on=["x","y",'stemp','2001'],how="left")
# set fontsize for plot
font = {'family' : 'arial',
'weight' : 'normal',
'size' : 7}
plt.rc('font', **font)
# plot figure
fig,ax = plt.subplots(1,1,figsize=(8.8/2.54, 8/2.54))
# plot 2d histogram
plt.hist2d(merged_obs['2001'],merged_obs['stemp'],bins=60,cmap='Oranges',cmin=0)
# set aspect equal
ax = plt.gca()
ax.set_aspect('equal', adjustable='box')
# plot labels and title
plt.xlabel('MAR temperature (°C)')
plt.ylabel('MODIS temperature (°C)')
plt.title('99th percentile of 19-year distribution \nof 8-daily surface temperature estimates \nover all places used for classification')
# calculate Pearson correlation coefficient
corr, _ = pearsonr(locs_toclass['2001'],locs_toclass['stemp'])
ax.annotate(f'Pearson correlation coefficient: {np.round(corr,2)}', xy=(-24,5))
# calculate trend and offset
trend,offset = np.polyfit(locs_toclass['2001'],locs_toclass['stemp'],1)
#plt.plot(locs_toclass['2001'],np.polyval((trend,offset),locs_toclass['2001']),linewidth=0.3,color='k',label=f'{np.round(trend,2)}*$T_{{MAR}}$ + {np.round(offset,2)}')
# plot 1-1 slope
plt.plot(locs_toclass['2001'],np.polyval((1,0),locs_toclass['2001']),linewidth=0.3,color='k',label=f'1:1 slope')
# plot colorbar
plt.colorbar(label='Counts')
# plot legend
plt.legend(loc='lower right',fontsize=9)
# adjust spacing
plt.subplots_adjust(top = 1, bottom = 0, right = 1, left = 0,
hspace = 0, wspace = 0)
# save figure
fig.savefig(f'../figures/MAR_vs_MODIS_{MAR_run}_.pdf',bbox_inches = 'tight',
pad_inches = 0.01,dpi=300)
#%%
# export temperature anomalies
for yr in yrs:
# prepare dataframes for export
# calculate the increase in temperature with respect to 2001 and add this to the observed temperatures
locs_mets['altered_temp'] = (locs_mets[str(yr)] - locs_mets['2001']) + locs_mets['stemp']
yr_mets = locs_mets[['x','y','altered_temp']].rename(columns={'altered_temp':'stemp'})
locs_toclass['altered_temp'] = (locs_toclass[str(yr)] - locs_toclass['2001']) + locs_toclass['stemp']
yr_toclass = locs_toclass[['x','y','altered_temp']].rename(columns={'altered_temp':'stemp'})
# export dataframes as .csv
yr_mets.to_csv(f'../data/{MAR_run}_{str(yr)}_mets.csv', index=False) # the year is the start year of 19yr interval (!)
yr_toclass.to_csv(f'../data/{MAR_run}_{str(yr)}_toclass.csv', index=False)
# delete variables
del(yr_mets,yr_toclass)
#%%
# plot values over time
# scatter values at meteorite finding locations
plt.scatter(yrs+19,locs_mets.median()[3::],color='orange')
errorbarperc = 0.01
plt.errorbar(yrs+19,locs_mets.median()[3::],
[locs_mets.quantile(errorbarperc)[3::]-locs_mets.median()[3::],
locs_mets.median()[3:-1]-locs_mets.quantile(1-errorbarperc)[3::]],
fmt='none',
capsize=3,
color='orange')
# scatter values at locations to classify
locs_toclass_plt = locs_toclass.drop(columns=['x','y','stemp','altered_temp'])
plt.scatter(locs_toclass_plt.columns.astype(int)+19,locs_toclass_plt.median(),color='gray')
plt.errorbar(locs_toclass_plt.columns.astype(int)+19,locs_toclass_plt.median(),
[locs_toclass_plt.quantile(errorbarperc)-locs_toclass_plt.median(),
locs_toclass_plt.median()-locs_toclass_plt.quantile(1-errorbarperc)],
fmt='none',
capsize=3,
color='gray')
# plot labels
plt.xlabel('end of interval (yr)')
plt.ylabel('99th percentile of surface temperature')
# set limits
plt.xlim([1999,2100])
# save figure
plt.savefig('../figures/temp_evolution.png',dpi=200)