import numpy as np
import pandas as pd
import xarray as xr
import datetime
import statsmodels.api as sm
from statsmodels.sandbox.regression.predstd import wls_prediction_std
from statsmodels.iolib.table import (SimpleTable, default_txt_fmt)
import matplotlib.pyplot as plt
from matplotlib import rc
rc('font', size=12)
rc('text', usetex=False)
# matplotlib.pyplot.xkcd()
C_MAR = "#000000" # Belgian flag black
C_RACMO = "#af1523" # Netherland flag red
C_lightblue = np.array((166, 206, 227))/255
C_darkblue = np.array((31, 120, 180))/255
C_lightgreen = np.array((127, 223, 138))/255
C_darkgreen = np.array((51, 160, 44))/255
C_darkblue = "#1f7ab7"
from adjust_spines import adjust_spines
import matplotlib.patheffects as pe
import seaborn as sns
- I set
DATADIR
as abash
environment variable in my login scripts. - This is so that Python babel blocks can also easily get that property.
import os
DATADIR = os.environ['DATADIR']
Example:
<<get_DATADIR>>
print(DATADIR)
set -o nounset
set -o pipefail
# set -o errexit
### uncomment the above line when doing initial run. When rerunning and
### counting on GRASS failing w/ overwrite issues (speed increase), the
### line above must be commented
red='\033[0;31m'; orange='\033[0;33m'; green='\033[0;32m'; nc='\033[0m' # No Color
log_info() { echo -e "${green}[$(date --iso-8601=seconds)] [INFO] ${@}${nc}"; }
log_warn() { echo -e "${orange}[$(date --iso-8601=seconds)] [WARN] ${@}${nc}"; }
log_err() { echo -e "${red}[$(date --iso-8601=seconds)] [ERR] ${@}${nc}" >&2; }
trap ctrl_c INT # trap ctrl-c and call ctrl_c()
ctrl_c() { log_err "CTRL-C. Cleaning up"; }
debug() { if [[ debug:- == 1 ]]; then log_warn "debug:"; echo $@; fi; }
<<GRASS_config>>
https://grass.osgeo.org/grass74/manuals/variables.html
GRASS_VERBOSE | |
---|---|
-1 | complete silence (also errors and warnings are discarded) |
0 | only errors and warnings are printed |
1 | progress and important messages are printed (percent complete) |
2 | all module messages are printed |
3 | additional verbose messages are printed |
export GRASS_VERBOSE=3
# export GRASS_MESSAGE_FORMAT=silent
if [ -z ${DATADIR+x} ]; then
echo "DATADIR environment varible is unset."
echo "Fix with: \"export DATADIR=/path/to/data\""
exit 255
fi
set -x # print commands to STDOUT before running them
<<py_init>>
w = pd.read_csv("/home/kdm/data/van_As_2018/Watson_discharge_day_v03.txt", sep="\s+",
parse_dates=[[0,1,2]],
index_col=0)\
.drop(["DayOfYear", "DayOfCentury"], axis='columns')\
.rename({"WaterFluxDiversOnly(m3/s)" : "divers",
"Uncertainty(m3/s)" : "divers_err",
"WaterFluxDivers&Temperature(m3/s)" : "divers_t",
"Uncertainty(m3/s).1" : "divers_t_err",
"WaterFluxCumulative(km3)" : "cum",
"Uncertainty(km3)" : "cum_err"},
axis='columns')
obs = w[['divers_t','divers_t_err']].rename({'divers_t':'Observed',
'divers_t_err':'Observed error'}, axis='columns')
obs.index.name = 'time'
obs.to_csv("./dat/runoff/obs_W.csv")
<<py_init>>
obs = pd.read_csv("/home/kdm/data.me/qaanaaq/discharge2017.txt", index_col=0, parse_dates=True)
tmp = pd.read_csv("/home/kdm/data.me/qaanaaq/discharge2018.txt", index_col=0, parse_dates=True)
obs = pd.concat((obs,tmp))
tmp = pd.read_csv("/home/kdm/data.me/qaanaaq/discharge2019.txt", index_col=0, parse_dates=True)
obs = pd.concat((obs,tmp))
obs = obs.resample('1D')\
.mean()\
.rename({'Discharge':'Observed'}, axis='columns')
obs.index.name = "time"
obs.to_csv("./dat/runoff/obs_Q.csv")
<<py_init>>
root="/home/kdm/data/Tedstone_2017"
# for y in np.arange(2009,2012+1):
csv = []
for y in np.arange(2009,2012+1):
df = pd.read_csv(root + "/leverett_Q_" + str(y) + "_UTC.csv",
comment="#", index_col=0)\
.rename({"Discharge m3 s-1": "Observed"}, axis="columns")
df.index = datetime.datetime(y,1,1) + np.array([datetime.timedelta(_-1) for _ in df.index])
csv.append(df)
obs = pd.concat(csv, axis='index')\
.resample('1D').mean()
obs.index.name = "time"
obs.to_csv("./dat/runoff/obs_L.csv")
<<py_init>>
<<get_DATADIR>>
root=DATADIR+"/Hawkings_2016"
print(root)
obs = pd.read_excel(root+"/NarsarsuaqDischarge2013.xlsx")\
.rename({"Q (m3 sec-1)" : "Observed"}, axis="columns")
obs.index = datetime.datetime(2013,1,1) + np.array([datetime.timedelta(_-1) for _ in obs['DecDay']])
obs.index.name = "time"
obs.drop('DecDay', inplace=True, axis='columns')
obs = obs.resample('1D').mean().dropna()
obs.to_csv("./dat/runoff/obs_Ks.csv")
<<py_init>>
obs = pd.read_csv("/home/kdm/data/GEM/GEM.csv", parse_dates=True, index_col=0)
obs.index.name = 'time'
# name, abbreviation
nloc = [['Kobbefjord', "Kb"],
['Oriartorfik', "O"],
['Teqinngalip', "T"],
['Kingigtorssuaq', "K"],
['Røde_Elv', "R"],
['Zackenberg', "Z"]]
for nl in nloc:
obs[nl[0]].to_csv("./dat/runoff/obs_" + nl[1] + ".csv")
names = ['Kb Kobbefjord','K Kingigtorssuaq','L Leverett','Ks Kiattuut Sermiat','O Oriartorfik','Q Qaanaaq','R Røde Elv','T Teqinngalip','W Watson', 'Z Zackenberg']
name = [' '.join(_.split(" ")[1:]) for _ in names]
loc = [_.split(" ")[0] for _ in names]
obs = {} # store all in dict of dataarrays
for i,l in enumerate(loc):
df_obs = pd.read_csv("./dat/runoff/obs_" + l + ".csv", index_col=0, parse_dates=True)
df_obs.columns = ['obs'] if l != 'W' else ['obs','err']
df_RCM = pd.read_csv("./dat/runoff/" + l + ".csv", index_col=0, parse_dates=True)
df = df_obs.merge(df_RCM, left_index=True, right_index=True)
# add upstream ice to all basins where it exists (not O or K)
df['MAR'] = df['MAR_land'] + df['MAR_ice_upstream'] if 'MAR_ice_upstream' in df.columns else df['MAR_land']
# Leverett should be just upstream ice, no land runoff
if l == 'L': df['MAR'] = df['MAR_ice']
# Same for RACMO
df['RACMO'] = df['RACMO_land'] + df['RACMO_ice_upstream'] if 'RACMO_ice_upstream' in df.columns else df['RACMO_land']
if l == 'L': df['RACMO'] = df['RACMO_ice']
df['MAR'] = df['MAR'].rolling('7D', min_periods=5).mean()
df['RACMO'] = df['RACMO'].rolling('7D', min_periods=5).mean()
df.attrs['name'] = name[i]
obs[l] = df
# one entry with everything, no time index, just all observation and model points
o,MAR,RACMO = [],[],[]
for k in loc:
o = np.append(o, obs[k]['obs'])
MAR = np.append(MAR, obs[k]['MAR'])
RACMO = np.append(RACMO, obs[k]['RACMO'])
df = pd.DataFrame((o,MAR,RACMO), index=['obs','MAR','RACMO']).T
df.attrs['name'] = "all"
obs_all = df
# same as above but without GEM basins
o,MAR,RACMO = [],[],[]
for k in loc:
if k in ['Kb','K','O','T']: continue
o = np.append(o, obs[k]['obs'])
MAR = np.append(MAR, obs[k]['MAR'])
RACMO = np.append(RACMO, obs[k]['RACMO'])
df = pd.DataFrame((o,MAR,RACMO), index=['obs','MAR','RACMO']).T
df.attrs['name'] = "noGEM"
obs_noGEM = df
<<py_init>>
<<py_init_graphics>>
# plt.close(1)
fig = plt.figure(1, figsize=(8,3.5)) # w,h
# get_current_fig_manager().window.move(0,0)
fig.clf()
fig.set_tight_layout(True)
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)
<<load_all_obs>>
# Plot all basins alone
for k in obs.keys():
df = obs[k]
df = df.replace(0, np.nan).dropna()
df = np.log10(df)
ax1.scatter(df['obs'], df['MAR'], marker='.', alpha=0.1,
label=df.attrs['name'], edgecolor='none', clip_on=False)
ax2.scatter(df['obs'], df['RACMO'], marker='.', alpha=0.1,
label=df.attrs['name'], edgecolor='none', clip_on=False)
# fit to all basins together
df = obs_all
df = np.log10(df)
df = df[~df.isin([np.nan, np.inf, -np.inf]).any(1)]
# # drop 5/95 outliers
# q = df['obs'].quantile([0.05, 0.95])
# df = df[(df['obs'] > q[0.05]) & (df['obs'] < q[0.95])]
df.sort_values(by='obs', inplace=True)
x = df['obs']
y_MAR = df['MAR']
y_RACMO = df['RACMO']
X = sm.add_constant(x)
# X = x
model = sm.OLS(y_MAR, X)
results = model.fit()
prstd, iv_l, iv_u = wls_prediction_std(results)
ax1.fill_between(x, iv_u, iv_l, color="grey", alpha=0.25)
ax1.text(0.6, 0.05, 'r$^{2}$:' + str(round(results.rsquared,2)), transform=ax1.transAxes, horizontalalignment='left')
model = sm.OLS(y_RACMO, X)
results = model.fit()
prstd, iv_l, iv_u = wls_prediction_std(results)
ax2.fill_between(x, iv_u, iv_l, color="grey", alpha=0.25)
ax2.text(0.6, 0.05, 'r$^{2}$:' + str(round(results.rsquared,2)), transform=ax2.transAxes, horizontalalignment='left')
# repeat but without GEM basins
df = obs_noGEM
df = np.log10(df)
df = df[~df.isin([np.nan, np.inf, -np.inf]).any(1)]
# # # drop 5/95 outliers
# df['diff'] = df['obs'] - df['MAR']
# q = df['obs'].quantile([0.05, 0.95])
# df = df[(df['obs'] > q[0.05])]
df.sort_values(by='obs', inplace=True)
x = df['obs']
y_MAR = df['MAR']
y_RACMO = df['RACMO']
X = sm.add_constant(x)
# X = x
model = sm.OLS(y_MAR, X)
results = model.fit()
prstd, iv_l, iv_u = wls_prediction_std(results)
ax1.fill_between(x, iv_u, iv_l, color="red", alpha=0.1)
ax1.text(0.6, 0.13, 'r$^{2}$:' + str(round(results.rsquared,2)), transform=ax1.transAxes, horizontalalignment='left', color='red')
model = sm.OLS(y_RACMO, X)
results = model.fit()
prstd, iv_l, iv_u = wls_prediction_std(results)
ax2.fill_between(x, iv_u, iv_l, color="red", alpha=0.1)
ax2.text(0.6, 0.13, 'r$^{2}$:' + str(round(results.rsquared,2)), transform=ax2.transAxes, horizontalalignment='left', color='red')
# coords = np.array((ax1.get_xlim(),ax1.get_ylim(),ax2.get_xlim(),ax2.get_ylim())).flatten()
coords = np.log10([1E-3, 1E4])
for ax in [ax1,ax2]:
# ax.set_yscale('log')
# ax.set_xscale('log')
# ax.set_xlim(2E-4,1E3)
# ax.set_ylim(ax.get_xlim())
ax.set_xlabel('Observed [m$^{3}$ s$^{-1}$]')
kw = {'alpha':0.5, 'linewidth':1, 'color':'k', 'linestyle':'-'}
ax.plot(np.log10([1E-3,1E4]), np.log10([1E-3,1E4]), **kw)
ax.plot(np.log10([1E-3,1E4]), np.log10([1E-3/5,1E4/5]), **kw)
ax.plot(np.log10([1E-3,1E4]), np.log10([1E-3*5,1E4*5]), **kw)
ax.set_ylim([-3,4])
ax.set_xlim(ax.get_ylim())
ax.set_yticks([-3,-2,-1, 0, 1,2,3,4])
ax.set_yticklabels(['10$^{-3}$','10$^{-2}$','10$^{-1}$','10$^{0}$','10$^{1}$','10$^{2}$','10$^{3}$','10$^{4}$'])
ax.set_xticks(ax.get_yticks())
ax.set_xticklabels(ax.get_yticklabels())
# locmaj = matplotlib.ticker.LogLocator(base=10,numticks=12)
# ax.xaxis.set_major_locator(locmaj)
# ax.yaxis.set_major_locator(locmaj)
# kwargs = {'rotation':40, 'horizontalalignment':'center', 'fontsize':8, 'verticalalignment':'center'}
# if ax == ax1:
# loc=4E-3
# ax.text(loc, (loc/2)*0.4, "RCM = 1/2 * Obs", **kwargs)
# # ax.text(loc, loc*1.3, "RCM = Obs", **kwargs)
# loc=1.5E-3
# ax.text(loc, (loc*2)*1.6, "RCM = 2 * Obs", **kwargs)
adjust_spines(ax1, ['left','bottom'])
adjust_spines(ax2, ['right','bottom'])
ax1.set_ylabel('MAR [m$^{^3}$ s$^{-1}$]')
ax2.set_ylabel('RACMO [m$^{^3}$ s$^{-1}$]')
leg = ax1.legend(fontsize=8, frameon=False, bbox_to_anchor=(0.9,0.18), loc='lower left', mode="expand")
ax2.set_zorder(-1)
for lh in leg.legendHandles:
lh.set_alpha(1)
plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45)
plt.setp(ax2.xaxis.get_majorticklabels(), rotation=45)
mticks = np.array([np.log10(np.linspace(2*_, 9*_, num=8)) for _ in [0.001, 0.01, 0.1,1,10,100,1000]]).ravel()
for ax in [ax1,ax2]:
ax.set_xticks(mticks, minor=True)
ax.set_yticks(mticks, minor=True)
plt.savefig("./fig/scatter_daily.png", bbox_inches='tight', dpi=300)
plt.savefig("./fig/scatter_daily.pdf", bbox_inches='tight', dpi=300)
plt.savefig("./fig/scatter_daily.svg", bbox_inches='tight', dpi=300)
<<py_init>>
<<py_init_graphics>>
# plt.close(1)
fig = plt.figure(1, figsize=(8,3.5)) # w,h
# get_current_fig_manager().window.move(0,0)
fig.clf()
fig.set_tight_layout(True)
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)
<<load_all_obs>>
df = obs_noGEM
df = np.log10(df)
df = df[~df.isin([np.nan, np.inf, -np.inf]).any(1)]
# q = df['obs'].quantile([0.05, 0.95])
# df = df[(df['obs'] > q[0.05]) & (df['obs'] < q[0.95])]
# kw = {'alpha': 0.2, 'marker':'.', 'edgecolor':'none', 'clip_on':False, 'color':'orange'}
# sm.graphics.mean_diff_plot(x, y_MAR, ax=ax1, scatter_kwds=kw)
# sm.graphics.mean_diff_plot(x, y_RACMO, ax=ax2, scatter_kwds=kw)
# Tukey parameters
tx_MAR = (df['obs']+df['MAR'])/2; ty_MAR = df['obs']-df['MAR']
tx_RACMO = (df['obs']+df['RACMO'])/2; ty_RACMO = df['obs']-df['RACMO']
kw = {'mincnt':1, 'bins':'log', 'clip_on':True, 'gridsize':20, 'extent':[-3,3,-3,3], 'cmap':cm.cividis}
# plot all to get max of both for colorbar range
h_MAR = ax1.hexbin(tx_MAR, ty_MAR, alpha=0, **kw)
h_RACMO = ax2.hexbin(tx_RACMO, ty_RACMO, alpha=0, **kw)
hmax = max([h_MAR.get_array().max(),h_RACMO.get_array().max()])
h_MAR = ax1.hexbin(tx_MAR, ty_MAR, vmax=hmax, **kw)
h_RACMO = ax2.hexbin(tx_RACMO, ty_RACMO, vmax=hmax, **kw)
kwtext = {'path_effects':[pe.withStroke(linewidth=4, foreground="white")], 'color':'k'}
kwtext['horizontalalignment'] = 'left'
kwline = {'color':'k'}
xpos = -3
for ty,ax in [[ty_MAR,ax1],[ty_RACMO,ax2]]:
y = ty.mean()
_ = ax.axhline(y=y, **kwline)
_ = ax.text(xpos, y, str(round(10**y,2)), verticalalignment='center', **kwtext)
y = ty.mean() + 1.96 * ty.std()
_ = ax.axhline(y=y, linestyle='--', **kwline)
_ = ax.text(xpos, y, str(round(10**y,2)), verticalalignment='bottom', **kwtext)
y = ty.mean() - 1.96 * ty.std()
_ = ax.axhline(y=y, linestyle='--', **kwline)
_ = ax.text(xpos, y-0.15, str(round(10**y,2)), verticalalignment='top', **kwtext)
ax1.set_xlabel(r'$\frac{\mathrm{Observed} + \mathrm{MAR}}{2}$ [m$^{3}$ s$^{-1}$]')
ax1.set_ylabel(r'$\mathrm{Observed} - \mathrm{MAR}$ [m$^{3}$ s$^{-1}$]')
ax2.set_xlabel(r'$\frac{\mathrm{Observed} + \mathrm{RACMO}}{2}$ [m$^{3}$ s$^{-1}$]')
ax2.set_ylabel(r'$\mathrm{Observed} - \mathrm{RACMO}$ [m$^{3}$ s$^{-1}$]')
lims = [np.min([ax1.get_xlim()[0], ax1.get_ylim()[0], ax2.get_xlim()[0], ax2.get_ylim()[0]]),
np.max([ax1.get_xlim()[1], ax1.get_ylim()[1], ax2.get_xlim()[1], ax2.get_ylim()[1]])]
ticks = np.arange(round(lims[0]), round(lims[1])+1)
# ax.set_ylim(lims[0], lims[1])
# ax.set_xlim(lims[0], lims[1])
for ax in [ax1,ax2]:
ax.set_xticks(ticks)
ax.set_yticks(ticks)
labels = ['10$^{' + str(int(_)) + '}$' for _ in ticks]
ax.set_yticklabels(labels)
ax.set_xticks(ax.get_yticks())
ax.set_xticklabels(ax.get_yticklabels())
cax = fig.add_axes([0.40, 0.39, 0.2, 0.04])
cb = fig.colorbar(h_MAR, cax=cax, orientation='horizontal')
# cb.set_label('N')
# _ = adjust_spines(ax1, ['left','bottom'])
# _ = adjust_spines(ax2, ['right','bottom'])
_ = adjust_spines(ax1, ['left','bottom'])
_ = adjust_spines(ax2, ['right','bottom'])
_ = plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45)
_ = plt.setp(ax2.xaxis.get_majorticklabels(), rotation=45)
plt.savefig("./fig/tukey_daily.png", bbox_inches='tight', dpi=300)
# # plt.savefig("./fig/scatter_daily.pdf", bbox_inches='tight', dpi=300)
# # plt.savefig("./fig/scatter_daily.svg", bbox_inches='tight', dpi=300)
<<py_init>>
<<py_init_graphics>>
# plt.close(1)
fig = plt.figure(1, figsize=(8,3.5)) # w,h
# get_current_fig_manager().window.move(0,0)
fig.clf()
fig.set_tight_layout(True)
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)
<<load_all_obs>>
df = obs_noGEM
df['x'] = df['obs']
df['y_MAR'] = df['MAR'] / df['obs']
df['y_RACMO'] = df['RACMO'] / df['obs']
df = df.replace(0,np.nan).dropna()
df = np.log10(df)
df = df[~df.isin([np.nan, np.inf, -np.inf]).any(1)]
THRESH=3
df['y_MAR'] = df['y_MAR'].apply(lambda x: x if abs(x) < THRESH else np.sign(x)*THRESH)
df['y_RACMO'] = df['y_RACMO'].apply(lambda x: x if abs(x) < THRESH else np.sign(x)*THRESH)
kw = {'mincnt':1,
'bins':'log',
'clip_on':True,
'gridsize':20,
'extent':[-THRESH,THRESH,-THRESH,THRESH],
'cmap':plt.cm.cividis}
# plot all to get max of both for colorbar range
h_MAR = ax1.hexbin(df['x'], df['y_MAR'], alpha=0, **kw)
h_RACMO = ax2.hexbin(df['x'], df['y_RACMO'], alpha=0, **kw)
hmax = max([h_MAR.get_array().max(),h_RACMO.get_array().max()])
h_MAR = ax1.hexbin(df['x'], df['y_MAR'], vmax=hmax, **kw)
h_RACMO = ax2.hexbin(df['x'], df['y_RACMO'], vmax=hmax, **kw)
df_top = df[df['obs'] > df['obs'].quantile(0.33)]
df_bot = df[df['obs'] < df['obs'].quantile(0.33)]
kwline = {'color':'k'}
kwtext = {'path_effects':[pe.withStroke(linewidth=2, foreground="white")],
'color':'k',
'fontsize':10,
'verticalalignment':'center'}
for d in [df_top, df_bot]:
for ax in [ax2,ax1]:
if d is df_top:
xpos = 3.2
kwtext['horizontalalignment'] = 'left'
elif d is df_bot:
xpos = -2
kwtext['horizontalalignment'] = 'right'
if ax is ax1: yy = d['y_MAR']
if ax is ax2: yy = d['y_RACMO']
y = yy.mean()
ax.plot([d['x'].min(),d['x'].max()], [y,y], **kwline)
ax.text(xpos, y, str(round(10**y,2)), **kwtext)
# y = yy.mean() + 1.96 * yy.std()
y = yy.quantile(0.95)
ax.plot([d['x'].min(),d['x'].max()], [y,y], linestyle='--', **kwline)
ax.text(xpos, y, str(round(10**y,2)), **kwtext)
# y = yy.mean() - 1.96 * yy.std()
y = yy.quantile(0.05)
ax.plot([d['x'].min(),d['x'].max()], [y,y], linestyle='--', **kwline)
ax.text(xpos, y, str(round(10**y,2)), **kwtext)
ax1.set_xlabel('Observed [m$^{3}$ s$^{-1}$]')
ax2.set_xlabel('Observed [m$^{3}$ s$^{-1}$]')
ax1.set_ylabel('MAR / Observed')
ax2.set_ylabel('RACMO / Observed')
lims = [-3.5,3.5]
ticks = np.arange(-3,3+1)
for ax in [ax1,ax2]:
ax.set_xlim(lims[0], lims[1])
ax.set_xticks(ticks)
labels = ['10$^{' + str(int(_)) + '}$' for _ in ticks]
ax.set_xticks(ticks)
ax.set_xticklabels(labels)
ax.set_ylim(lims[0], lims[1])
ax.set_yticks(ticks)
ax.set_yticklabels(labels)
cax = fig.add_axes([0.37, 0.85, 0.2, 0.04])
cb = fig.colorbar(h_MAR, cax=cax, orientation='horizontal')
adjust_spines(ax1, ['left','bottom'])
adjust_spines(ax2, ['right','bottom'])
_ = plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45)
_ = plt.setp(ax2.xaxis.get_majorticklabels(), rotation=45)
mticks = np.array([np.log10(np.linspace(2*_, 9*_, num=8)) for _ in [0.001, 0.01, 0.1,1,10,100]]).ravel()
for ax in [ax1,ax2]:
ax.set_xticks(mticks, minor=True)
ax.set_yticks(mticks, minor=True)
plt.savefig("./fig/tukey_daily3.png", bbox_inches='tight', dpi=300)
# plt.savefig("./fig/scatter_daily.pdf", bbox_inches='tight', dpi=300)
# plt.savefig("./fig/scatter_daily.svg", bbox_inches='tight', dpi=300)
convert ./fig/tukey_daily.png ./fig/tukey_daily3.png -gravity center -append fig/tukey.png
o ./fig/tukey.png
=> instead of giving a range +500%/-80%, I suggest you to rather ompute the mean error in % for each measurement you have in Fig4 by removing 5% of highest model-obs differences (by keep only percentile 95) and by weighting the mean by the measurement values to not give the same weight to the very low runoff value which are not representative for me when the errors is given in %.
<<py_init>>
<<py_init_graphics>>
<<load_all_obs>>
# plt.close(1)
fig = plt.figure(1, figsize=(8,3.5)) # w,h
# get_current_fig_manager().window.move(0,0)
fig.clf()
fig.set_tight_layout(True)
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)
for k in obs.keys():
df = obs[k]
name = df.attrs['name']
df = df.replace(0, np.nan).dropna()
df = np.log10(df)
ax1.scatter(df['obs'], df['MAR'], marker='.', alpha=0.1,
label=name, edgecolor='none', clip_on=False)
ax2.scatter(df['obs'], df['RACMO'], marker='.', alpha=0.1,
label=name, edgecolor='none', clip_on=False)
df = obs_all
df = np.log10(df)
df = df[~df.isin([np.nan, np.inf, -np.inf]).any(1)]
df.sort_values(by='obs', inplace=True)
df['diff'] = df['obs'] - df['MAR']
df['diff %'] = df['obs'] / 10**df['diff'] * 100
# drop 5/95 outliers
q = df['diff %'].quantile([0.05, 0.5, 0.95])
# df = df[(df['diff %'] > q[0.05]) & (df['diff %'] < q[0.95])]
df = df[(df['diff %'] > q[0.5])]
x = df['obs']
y_MAR = df['MAR']
y_RACMO = df['RACMO']
weights = 10**x.values; weights = weights - np.min(weights)+1
weights = x.values * 0 + 1
# weights = x.values; weights = weights - np.min(weights)+1
# weights = (weights - np.min(weights)) / (np.max(weights) - np.min(weights))+0.01
X = sm.add_constant(x)
model = sm.OLS(y_MAR, X)
results = model.fit()
print(results.summary())
prstd, iv_l, iv_u = wls_prediction_std(results, weights=weights)
ax1.fill_between(x, iv_u, iv_l, color="grey", alpha=0.25)
model = sm.OLS(y_RACMO, X, weights=weights)
results = model.fit()
print(results.summary())
prstd, iv_l, iv_u = wls_prediction_std(results, weights=weights)
ax2.fill_between(x, iv_u, iv_l, color="grey", alpha=0.25)
coords = np.log10([1E-3, 1E4])
for ax in [ax1,ax2]:
ax.set_xlabel('Observed [m$^{3}$ s$^{-1}$]')
# kw = {'alpha':0.5, 'linewidth':1, 'color':'k', 'linestyle':'-'}
# ax.plot(np.log10([1E-3,1E4]), np.log10([1E-3,1E4]), **kw)
# ax.plot(np.log10([1E-3,1E4]), np.log10([1E-3/5,1E4/5]), **kw)
# ax.plot(np.log10([1E-3,1E4]), np.log10([1E-3*5,1E4*5]), **kw)
ax.set_ylim([-3,4])
ax.set_xlim(ax.get_ylim())
ax.set_yticks([-3,-2,-1, 0, 1,2,3,4])
ax.set_yticklabels(['10$^{-3}$','10$^{-2}$','10$^{-1}$','10$^{0}$','10$^{1}$','10$^{2}$','10$^{3}$','10$^{4}$'])
ax.set_xticks(ax.get_yticks())
ax.set_xticklabels(ax.get_yticklabels())
adjust_spines(ax1, ['left','bottom'])
adjust_spines(ax2, ['right','bottom'])
ax1.set_ylabel('MAR [m$^{^3}$ s$^{-1}$]')
ax2.set_ylabel('RACMO [m$^{^3}$ s$^{-1}$]')
leg = ax1.legend(fontsize=8, frameon=False, bbox_to_anchor=(0.8,0), loc='lower left', mode="expand")
ax2.set_zorder(-1)
for lh in leg.legendHandles:
lh.set_alpha(1)
plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45)
plt.setp(ax2.xaxis.get_majorticklabels(), rotation=45)
# plt.savefig("./fig/scatter_daily.png", bbox_inches='tight', dpi=300)
# plt.savefig("./fig/scatter_daily.pdf", bbox_inches='tight', dpi=300)
# plt.savefig("./fig/scatter_daily.svg", bbox_inches='tight', dpi=300)
<<py_init>>
<<py_init_graphics>>
# plt.close(1)
fig = plt.figure(1, figsize=(8,3.5)) # w,h
# get_current_fig_manager().window.move(0,0)
fig.clf()
fig.set_tight_layout(True)
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)
<<load_all_obs>>
for k in obs.keys():
df = obs[k]
name = df.attrs['name']
df = df.replace(0, np.nan).dropna()
# m^3/s summed by year -> km^3/yr
df = df.resample('A').sum() * 86400
df = np.log10(df)
ax1.scatter(df['obs'], df['MAR'], marker='$\mathrm{'+k+'}$', alpha=0.9,
label=name, clip_on=False, zorder=99)
ax2.scatter(df['obs'], df['RACMO'], marker='$\mathrm{'+k+'}$', alpha=0.9,
clip_on=False, zorder=99)
# combine all into one for confidence intervals
# one entry with everything, no time index, just all observation and model points
o,MAR,RACMO = [],[],[]
for k in obs.keys():
o = np.append(o, obs[k]['obs'].resample('A').sum())
MAR = np.append(MAR, obs[k]['MAR'].resample('A').sum())
RACMO = np.append(RACMO, obs[k]['RACMO'].resample('A').sum())
# m^3/s -> m^3/yr
df = pd.DataFrame((o,MAR,RACMO), index=['obs','MAR','RACMO']).T * 86400
df = np.log10(df)
df = df[~df.isin([np.nan, np.inf, -np.inf]).any(1)]
df.sort_values(by='obs', inplace=True)
x = df['obs']
y_MAR = df['MAR']
y_RACMO = df['RACMO']
X = sm.add_constant(x)
# X = x
model = sm.OLS(y_MAR, X)
results = model.fit()
prstd, iv_l, iv_u = wls_prediction_std(results)
ax1.fill_between(x, iv_u, iv_l, color="grey", alpha=0.25)
ax1.text(0.6, 0.05, 'r$^{2}$:' + str(round(results.rsquared,2)), transform=ax1.transAxes, horizontalalignment='left')
model = sm.OLS(y_RACMO, X)
results = model.fit()
prstd, iv_l, iv_u = wls_prediction_std(results)
ax2.fill_between(x, iv_u, iv_l, color="grey", alpha=0.25)
ax2.text(0.6, 0.05, 'r$^{2}$:' + str(round(results.rsquared,2)), transform=ax2.transAxes, horizontalalignment='left')
for ax in [ax1,ax2]:
# ax.set_yscale('log')
# ax.set_xscale('log')
# ax.set_xlim(1E1,1E5)
# ax.set_ylim(ax.get_xlim())
ax.set_xlabel('Observed [m$^{3}$]')
kw = {'alpha':0.5, 'linewidth':1, 'color':'k', 'linestyle':'-'}
ax.plot(np.log10([1E6,1E10]), np.log10([1E6,1E10]), **kw)
ax.plot(np.log10([1E6,1E10]), np.log10([1E6/2,1E10/2]), **kw)
ax.plot(np.log10([1E6,1E10]), np.log10([1E6*2,1E10*2]), **kw)
ax.set_ylim([6,10])
ax.set_xlim(ax.get_ylim())
ax.set_yticks([6,7,8,9,10])
ax.set_yticklabels(['10$^{6}$','10$^{7}$','10$^{8}$','10$^{9}$','10$^{10}$'])
ax.set_xticks(ax.get_yticks())
ax.set_xticklabels(ax.get_yticklabels())
# coords = ax.get_xlim()
# kw = {'alpha':0.5, 'linewidth':1}
# ax.plot([0,np.max(coords)],[0,np.max(coords)], 'k-', **kw)
# ax.plot([0,np.max(coords)],[0,np.max(coords)*2], 'k--', **kw)
# ax.plot([0,np.max(coords)],[0,np.max(coords)*0.5], 'k--', **kw)
# locmaj = matplotlib.ticker.LogLocator(base=10,numticks=12)
# ax.xaxis.set_major_locator(locmaj)
# ax.yaxis.set_major_locator(locmaj)
# kwargs = {'rotation':40, 'horizontalalignment':'center', 'fontsize':8, 'verticalalignment':'center'}
# if ax == ax1:
# loc=1200
# ax.text(loc, (loc/2)*0.6, "RCM = 1/2 * Obs", **kwargs)
# # ax.text(loc, loc*1.3, "RCM = Obs", **kwargs)
# loc=100
# ax.text(loc, (loc*2)*1.4, "RCM = 2 * Obs", **kwargs)
adjust_spines(ax1, ['left','bottom'])
adjust_spines(ax2, ['right','bottom'])
ax1.set_ylabel('MAR [m$^{^3}$]')
ax2.set_ylabel('RACMO [m$^{^3}$]')
leg = ax1.legend(fontsize=8, frameon=False, bbox_to_anchor=(0.9,0.1), loc='lower left', mode="expand")
ax2.set_zorder(-2)
for lh in leg.legendHandles:
lh.set_alpha(1)
for i,l in enumerate(leg.texts):
l.set_y(-1.5)
# l.set_x(-i*18+20)
# for i,l in enumerate(leg.legendHandles):
# l.set_offsets([[-i*12.5+10+20,4],[-i*12.5+10+20,4]])
# plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45)
# plt.setp(ax2.xaxis.get_majorticklabels(), rotation=45)
plt.savefig("./fig/scatter_yearsum.png", bbox_inches='tight', dpi=300)
# plt.savefig("./fig/scatter_yearsum.pdf", bbox_inches='tight', dpi=300)
# plt.savefig("./fig/scatter_yearsum.svg", bbox_inches='tight', dpi=300)
# <<load_all_obs>>
for k in obs.keys():
df = obs[k].dropna()
if 'MAR_ice_upstream' not in df.columns: continue
df['model'] = df['MAR_ice_upstream'] + df['MAR_land']
NSE_MAR = 1 - (np.sum((df['model'] - df['obs'])**2) / np.sum((df['obs'] - df['obs'].mean())**2))
df['model'] = df['RACMO_ice_upstream'] + df['RACMO_land']
NSE_RACMO = 1 - (np.sum((df['model'] - df['obs'])**2) / np.sum((df['obs'] - df['obs'].mean())**2))
print(k, NSE_MAR, NSE_RACMO)