From a4d9a210eb9e6973f591da959b8e0ab02b8e6aaf Mon Sep 17 00:00:00 2001 From: Kefei Mo Date: Thu, 2 May 2024 17:06:10 +0000 Subject: [PATCH 1/2] added VAP quicklook notebooks, added entry point to _toc.yml, setup _config.yml to skip VAP/quicklook/* into build. --- .gitignore | 1 + .../aaf2dsh.c1-checkpoint.ipynb | 1271 +++ .../aaf2dsv.c1-checkpoint.ipynb | 1798 ++++ VAPs/quicklook/2DS-AIR/2DS-AIR_tutorial.ipynb | 838 ++ VAPs/quicklook/2DS-AIR/aaf2dsh.c1.ipynb | 297 + VAPs/quicklook/2DS-AIR/aaf2dsv.c1.ipynb | 297 + .../ACSMCDCE/ACSMCDCE_tutorial.ipynb | 838 ++ VAPs/quicklook/ACSMCDCE/acsmcdce.c1.ipynb | 339 + VAPs/quicklook/ACSMCDCE/acsmcdce.c2.ipynb | 339 + VAPs/quicklook/ACSMCDCE/acsmtofcdce.c1.ipynb | 339 + VAPs/quicklook/AERINF/AERINF_tutorial.ipynb | 838 ++ VAPs/quicklook/AERINF/aerich1nf1turn.c1.ipynb | 297 + VAPs/quicklook/AERINF/aerich2nf1turn.c1.ipynb | 297 + VAPs/quicklook/AERIOE/AERIOE_tutorial.ipynb | 838 ++ VAPs/quicklook/AERIOE/aerioe1turn.c1.ipynb | 297 + .../AERIPROF/AERIPROF_tutorial.ipynb | 838 ++ .../AERIPROF/aeri01prof3feltz.c1.ipynb | 339 + .../AERIPROF/aeriprof3feltz.c1.ipynb | 339 + VAPs/quicklook/AERIPROF/qmeaeriprof.c1.ipynb | 297 + .../AEROSOLBE/AEROSOLBE_tutorial.ipynb | 838 ++ .../AEROSOLBE/aerosolbe1turn.c1.ipynb | 339 + VAPs/quicklook/AIP/AIP_tutorial.ipynb | 838 ++ VAPs/quicklook/AIP/aip1ogren.c1.ipynb | 339 + VAPs/quicklook/AIP/aipavg1ogren.c1.ipynb | 339 + VAPs/quicklook/AIP/aipfitrh1ogren.c1.ipynb | 339 + .../AOD-MFRSR/AOD-MFRSR_tutorial.ipynb | 838 ++ .../AOD-MFRSR/mfrsr7nchaod1mich.c1.ipynb | 339 + .../quicklook/AOD-MFRSR/mfrsr7nchcal.c1.ipynb | 339 + .../AOD-MFRSR/mfrsraod1mich.c1.ipynb | 339 + VAPs/quicklook/AOD-MFRSR/mfrsrcal.c1.ipynb | 339 + .../AOD-NIMFR/AOD-NIMFR_tutorial.ipynb | 838 ++ .../AOD-NIMFR/nimfraod1mich.c1.ipynb | 339 + VAPs/quicklook/AOD/AOD_tutorial.ipynb | 838 ++ VAPs/quicklook/AOD/sasheniraod.c1.ipynb | 339 + VAPs/quicklook/AOD/sashevisaod.c1.ipynb | 339 + .../aoppsap1flynn1m.c1-checkpoint.ipynb | 8265 +++++++++++++++++ VAPs/quicklook/AOP/AOP_tutorial.ipynb | 838 ++ VAPs/quicklook/AOP/aopclap1flynn1m.c1.ipynb | 339 + VAPs/quicklook/AOP/aoppsap1flynn1h.c1.ipynb | 339 + VAPs/quicklook/AOP/aoppsap1flynn1m.c1.ipynb | 339 + .../AOSCCNAVG/AOSCCNAVG_tutorial.ipynb | 838 ++ VAPs/quicklook/AOSCCNAVG/aosccnavg.c1.ipynb | 339 + VAPs/quicklook/AOSCCNAVG/aosccnavg.c2.ipynb | 339 + .../AOSSP2BC/AOSSP2BC_tutorial.ipynb | 838 ++ VAPs/quicklook/AOSSP2BC/aossp2rbc1m.c1.ipynb | 339 + VAPs/quicklook/ARMBE/ARMBE_tutorial.ipynb | 838 ++ VAPs/quicklook/ARMBE/armbeatm.c1.ipynb | 297 + VAPs/quicklook/ARMBE/armbecldrad.c1.ipynb | 339 + .../arscl1cloth.c1-checkpoint.ipynb | 2631 ++++++ .../arsclbnd1cloth.c1-checkpoint.ipynb | 1937 ++++ VAPs/quicklook/ARSCL/ARSCL_tutorial.ipynb | 838 ++ VAPs/quicklook/ARSCL/arscl1cloth.c1.ipynb | 339 + VAPs/quicklook/ARSCL/arsclbnd1cloth.c1.ipynb | 339 + .../ASDBE-AIR/ASDBE-AIR_tutorial.ipynb | 838 ++ .../ASDBE-AIR/aafmergedaerosolsd.c1.ipynb | 297 + VAPs/quicklook/BAEBBR/30baebbr.c1.ipynb | 339 + VAPs/quicklook/BAEBBR/BAEBBR_tutorial.ipynb | 838 ++ .../bbhrpavg1mlawer.c1-checkpoint.ipynb | 3768 ++++++++ .../BBHRP/1bbhrpripbe1mcfarlane.c1.ipynb | 339 + .../BBHRP/30bbhrpripbe1mcfarlane.c1.ipynb | 339 + VAPs/quicklook/BBHRP/BBHRP_tutorial.ipynb | 838 ++ VAPs/quicklook/BBHRP/bbhrpavg1mlawer.c1.ipynb | 339 + VAPs/quicklook/BEFLUX/BEFLUX_tutorial.ipynb | 838 ++ VAPs/quicklook/BEFLUX/beflux1long.c1.ipynb | 297 + VAPs/quicklook/BEFLUX/qcflux1long.c1.ipynb | 297 + .../CCNKAPPA/CCNKAPPA_tutorial.ipynb | 838 ++ .../CCNKAPPA/aosccnsmpskappa.c1.ipynb | 339 + .../rlccnprof1ghan.c1-checkpoint.ipynb | 4109 ++++++++ VAPs/quicklook/CCNPROF/CCNPROF_tutorial.ipynb | 838 ++ .../quicklook/CCNPROF/rlccnprof1ghan.c1.ipynb | 339 + VAPs/quicklook/CLAP/CLAP_tutorial.ipynb | 838 ++ VAPs/quicklook/CLAP/aosclap3w.c1.ipynb | 339 + VAPs/quicklook/CLDTYPE/CLDTYPE_tutorial.ipynb | 838 ++ VAPs/quicklook/CLDTYPE/cldtype.c1.ipynb | 339 + .../cmac2.c1-checkpoint.ipynb | 3537 +++++++ VAPs/quicklook/CMAC2/CMAC2_tutorial.ipynb | 838 ++ VAPs/quicklook/CMAC2/cmac2.c1.ipynb | 297 + VAPs/quicklook/CO-AIR/CO-AIR_tutorial.ipynb | 838 ++ VAPs/quicklook/CO-AIR/aafco.c1.ipynb | 297 + VAPs/quicklook/COGS/COGS_tutorial.ipynb | 838 ++ VAPs/quicklook/COGS/cogs.c1.ipynb | 297 + VAPs/quicklook/DIFFCOR/DIFFCOR_tutorial.ipynb | 838 ++ VAPs/quicklook/DIFFCOR/brs1dutt.c1.ipynb | 297 + VAPs/quicklook/DIFFCOR/siros1dutt.c1.ipynb | 297 + VAPs/quicklook/DIFFCOR/sirs1dutt.c1.ipynb | 297 + .../DLPROF-WIND/DLPROF-WIND_tutorial.ipynb | 838 ++ .../DLPROF-WIND/dlprofwind4news.c1.ipynb | 297 + .../DLPROF-WSTATS_tutorial.ipynb | 838 ++ .../DLPROF-WSTATS/dlprofwstats4news.c1.ipynb | 297 + .../FCDP-AIR/FCDP-AIR_tutorial.ipynb | 838 ++ VAPs/quicklook/FCDP-AIR/aaffcdp.c1.ipynb | 297 + VAPs/quicklook/GVR/GVR_tutorial.ipynb | 838 ++ VAPs/quicklook/GVR/gvr.c1.ipynb | 339 + .../HVPS-AIR/HVPS-AIR_tutorial.ipynb | 838 ++ VAPs/quicklook/HVPS-AIR/aafhvps.c1.ipynb | 297 + .../INLETCVI-AIR/INLETCVI-AIR_tutorial.ipynb | 838 ++ .../INLETCVI-AIR/aafinletcvi.c1.ipynb | 339 + .../INTERPSONDE/INTERPSONDE_tutorial.ipynb | 838 ++ .../INTERPSONDE/interpolatedsonde.c1.ipynb | 339 + .../KAZRARSCL/KAZRARSCL_tutorial.ipynb | 838 ++ .../KAZRARSCL/arsclkazr1kollias.c1.ipynb | 339 + .../KAZRARSCL/arsclkazrbnd1kollias.c1.ipynb | 297 + .../KAZRARSCLCLOUDSAT_tutorial.ipynb | 838 ++ .../arsclkazrcloudsat.c1.ipynb | 339 + .../KAZRCFRCOR/KAZRCFRCOR_tutorial.ipynb | 838 ++ .../KAZRCFRCOR/kazrcfrcorge.c1.ipynb | 339 + .../KAZRCFRCOR/kazrcfrcormd.c1.ipynb | 339 + .../kazrcorge.c1-checkpoint.ipynb | 445 + .../kazrcorhi.c1-checkpoint.ipynb | 1856 ++++ .../kazrcormd.c1-checkpoint.ipynb | 2667 ++++++ VAPs/quicklook/KAZRCOR/KAZRCOR_tutorial.ipynb | 838 ++ VAPs/quicklook/KAZRCOR/kazrcorge.c1.ipynb | 339 + VAPs/quicklook/KAZRCOR/kazrcorhi.c1.ipynb | 339 + VAPs/quicklook/KAZRCOR/kazrcormd.c1.ipynb | 339 + .../LCLHEIGHT/LCLHEIGHT_tutorial.ipynb | 838 ++ VAPs/quicklook/LCLHEIGHT/lcl.c1.ipynb | 339 + .../LDQUANTS/LDQUANTS_tutorial.ipynb | 838 ++ VAPs/quicklook/LDQUANTS/ldquants.c1.ipynb | 297 + VAPs/quicklook/LSSONDE/LSSONDE_tutorial.ipynb | 838 ++ VAPs/quicklook/LSSONDE/lssonde.c1.ipynb | 297 + .../MASCPARTICLES_tutorial.ipynb | 838 ++ .../MASCPARTICLES/mascparticles.c1.ipynb | 339 + .../MASCPARTICLES/mascparticlesavg.c1.ipynb | 339 + .../MERGED-COMMON_tutorial.ipynb | 838 ++ .../MERGED-COMMON/aafmergedcldsd.c1.ipynb | 297 + .../MERGEDSMPSAPS_tutorial.ipynb | 838 ++ .../MERGEDSMPSAPS/mergedsmpsaps.c1.ipynb | 339 + .../MERGESONDE/MERGESONDE_tutorial.ipynb | 838 ++ .../MERGESONDE/mergesonde1mace.c1.ipynb | 339 + .../MERGESONDE/mergesonde2mace.c1.ipynb | 339 + .../mfrsrcldod1min.c1-checkpoint.ipynb | 799 ++ .../MFRSRCLDOD/MFRSRCLDOD_tutorial.ipynb | 838 ++ .../MFRSRCLDOD/mfrsrcldod1min.c1.ipynb | 339 + .../microbasepi2.c1-checkpoint.ipynb | 468 + .../microbasepiavg.c1-checkpoint.ipynb | 1757 ++++ .../MICROBASE/MICROBASE_tutorial.ipynb | 838 ++ VAPs/quicklook/MICROBASE/microbasepi.c1.ipynb | 297 + .../quicklook/MICROBASE/microbasepi2.c1.ipynb | 297 + .../MICROBASE/microbasepiavg.c1.ipynb | 297 + VAPs/quicklook/MPLAVG/MPLAVG_tutorial.ipynb | 838 ++ VAPs/quicklook/MPLAVG/mplpolavg.c1.ipynb | 339 + .../MPLCMASK/30smplcmask1zwang.c1.ipynb | 368 + .../MPLCMASK/MPLCMASK_tutorial.ipynb | 867 ++ .../MPLCMASKML/MPLCMASKML_tutorial.ipynb | 838 ++ VAPs/quicklook/MPLCMASKML/mplcmaskml.c1.ipynb | 339 + .../mplnor1camp.c1-checkpoint.ipynb | 1732 ++++ VAPs/quicklook/MPLNOR/MPLNOR_tutorial.ipynb | 838 ++ VAPs/quicklook/MPLNOR/mplnor1camp.c1.ipynb | 297 + VAPs/quicklook/MWRRET/MWRRET_tutorial.ipynb | 838 ++ .../quicklook/MWRRET/mwrret1liljclou.c1.ipynb | 339 + .../quicklook/MWRRET/mwrret1liljclou.c2.ipynb | 339 + .../MWRRETV2/MWRRETV2_tutorial.ipynb | 838 ++ VAPs/quicklook/MWRRETV2/mwrret2turn.c1.ipynb | 339 + .../NAVMET-AIR/NAVMET-AIR_tutorial.ipynb | 838 ++ VAPs/quicklook/NAVMET-AIR/aafnaviwg.c1.ipynb | 297 + VAPs/quicklook/NDROP/NDROP_tutorial.ipynb | 838 ++ VAPs/quicklook/NDROP/ndropmfrsr.c1.ipynb | 339 + .../NEPHELOMETER/NEPHELOMETER_tutorial.ipynb | 838 ++ .../NEPHELOMETER/aosnephdry.c1.ipynb | 339 + .../NEPHELOMETER/aosnephwet.c1.ipynb | 339 + .../okmsoil.c1-checkpoint.ipynb | 2048 ++++ VAPs/quicklook/OKMSOIL/OKMSOIL_tutorial.ipynb | 838 ++ VAPs/quicklook/OKMSOIL/okmsoil.c1.ipynb | 339 + .../OZONE-AIR/OZONE-AIR_tutorial.ipynb | 838 ++ VAPs/quicklook/OZONE-AIR/aafo3.c1.ipynb | 297 + .../pblhtsonde1mcfarl.c1-checkpoint.ipynb | 679 ++ VAPs/quicklook/PBLHT/PBLHT_tutorial.ipynb | 838 ++ .../PBLHT/pblhtsonde1mcfarl.c1.ipynb | 297 + .../PBLHT/pblhtsondeyr1mcfarl.c1.ipynb | 339 + VAPs/quicklook/PCCP/PCCP_tutorial.ipynb | 943 ++ VAPs/quicklook/PCCP/pccp.c1.ipynb | 310 + .../aospsap3w.c1-checkpoint.ipynb | 1841 ++++ VAPs/quicklook/PSAP/PSAP_tutorial.ipynb | 838 ++ VAPs/quicklook/PSAP/aospsap3w.c1.ipynb | 339 + VAPs/quicklook/QCRAD/QCRAD_tutorial.ipynb | 838 ++ VAPs/quicklook/QCRAD/qcrad1long.c1.ipynb | 339 + VAPs/quicklook/QCRAD/qcrad1long.c2.ipynb | 339 + .../quicklook/QCRAD/qcradbeflux1long.c1.ipynb | 339 + .../quicklook/QCRAD/qcradbeflux1long.c2.ipynb | 339 + VAPs/quicklook/QCRAD/qcradbrs1long.c1.ipynb | 339 + VAPs/quicklook/QCRAD/qcradbrs1long.c2.ipynb | 339 + .../radflux1long.c1-checkpoint.ipynb | 3763 ++++++++ .../RADFLUXANAL/RADFLUXANAL_tutorial.ipynb | 838 ++ .../RADFLUXANAL/radflux1long.c1.ipynb | 339 + .../RADFLUXANAL/radflux1long.c2.ipynb | 339 + .../RADFLUXANAL/radfluxbrs1long.c2.ipynb | 339 + .../RIPBE/30ripbe1mcfarlane.c1.ipynb | 339 + VAPs/quicklook/RIPBE/RIPBE_tutorial.ipynb | 838 ++ VAPs/quicklook/RIPBE/ripbe1mcfarlane.c1.ipynb | 339 + .../quicklook/RLPROF/10rlprofbe1news.c1.ipynb | 339 + VAPs/quicklook/RLPROF/RLPROF_tutorial.ipynb | 838 ++ .../kasacradv3d3c.c1-checkpoint.ipynb | 2574 +++++ .../SACRADV3D3C/SACRADV3D3C_tutorial.ipynb | 838 ++ .../SACRADV3D3C/kasacradv3d3c.c1.ipynb | 297 + .../SACRADVVAD/SACRADVVAD_tutorial.ipynb | 838 ++ .../SACRADVVAD/kasacradvvad.c1.ipynb | 297 + .../15swfcldgrid1long.c1-checkpoint.ipynb | 2384 +++++ ...fccldgrid2longcaracena.c1-checkpoint.ipynb | 5150 ++++++++++ .../SFCCLDGRID/15swfcldgrid1long.c1.ipynb | 339 + .../SFCCLDGRID/SFCCLDGRID_tutorial.ipynb | 838 ++ .../sfccldgrid2longcaracena.c1.ipynb | 339 + .../sfccldgrid2longstation.c1.ipynb | 339 + .../SHALLOWCUMULUS_tutorial.ipynb | 838 ++ .../SHALLOWCUMULUS/shallowcumulus.c1.ipynb | 297 + .../SHALLOWCUMULUS/shcusummary.c1.ipynb | 297 + VAPs/quicklook/SO2-AIR/SO2-AIR_tutorial.ipynb | 838 ++ VAPs/quicklook/SO2-AIR/aafso2.c1.ipynb | 297 + .../SONDEADJUST_tutorial-checkpoint.ipynb | 4058 ++++++++ .../SONDEADJUST/SONDEADJUST_tutorial.ipynb | 884 ++ .../SONDEADJUST/sondeadjust.c1.ipynb | 385 + .../SONDEPARAM/SONDEPARAM_tutorial.ipynb | 838 ++ VAPs/quicklook/SONDEPARAM/sondeparam.c1.ipynb | 297 + VAPs/quicklook/SP2-AIR/SP2-AIR_tutorial.ipynb | 838 ++ VAPs/quicklook/SP2-AIR/aafsp2rbc10s.c1.ipynb | 339 + .../SPHOTCOD/SPHOTCOD_tutorial.ipynb | 838 ++ .../quicklook/SPHOTCOD/sphotcod2chiu.c1.ipynb | 297 + .../SURFSPECALB/SURFSPECALB_tutorial.ipynb | 838 ++ .../SURFSPECALB/surfspecalb1mlawer.c1.ipynb | 339 + .../surfspecalb7nch1mlawer.c1.ipynb | 339 + .../1swfanalsirs1long.c1-checkpoint.ipynb | 2654 ++++++ .../TBSMERGED/TBSMERGED_tutorial.ipynb | 838 ++ VAPs/quicklook/TBSMERGED/tbsmerged.c1.ipynb | 339 + .../TBSMERGED/tbsmergedincloud.c1.ipynb | 339 + VAPs/quicklook/TDMA/TDMA_tutorial.ipynb | 838 ++ VAPs/quicklook/TDMA/tdmaapssize.c1.ipynb | 339 + VAPs/quicklook/TWRMR/1twrmr.c1.ipynb | 339 + VAPs/quicklook/TWRMR/30twrmr.c1.ipynb | 297 + VAPs/quicklook/TWRMR/TWRMR_tutorial.ipynb | 838 ++ .../quicklook/VARANAL/180varanaecmwf.c1.ipynb | 297 + .../VARANAL/180varanamerra001.c1.ipynb | 297 + VAPs/quicklook/VARANAL/VARANAL_tutorial.ipynb | 838 ++ .../VARANAL3D/180varanal3dera5.c1.ipynb | 297 + .../VARANAL3D/180varanal3dncep.c1.ipynb | 297 + .../VARANAL3D/VARANAL3D_tutorial.ipynb | 838 ++ .../VDISQUANTS/VDISQUANTS_tutorial.ipynb | 838 ++ VAPs/quicklook/VDISQUANTS/vdisquants.c1.ipynb | 297 + .../WACRARSCL/WACRARSCL_tutorial.ipynb | 838 ++ .../WACRARSCL/arsclwacr1kollias.c1.ipynb | 339 + .../WACRARSCL/arsclwacrbnd1kollias.c1.ipynb | 339 + VAPs/vap_notebook_list.md | 1010 ++ _config.yml | 1 + _toc.yml | 1 + 242 files changed, 175046 insertions(+) create mode 100644 VAPs/quicklook/2DS-AIR/.ipynb_checkpoints/aaf2dsh.c1-checkpoint.ipynb create mode 100644 VAPs/quicklook/2DS-AIR/.ipynb_checkpoints/aaf2dsv.c1-checkpoint.ipynb create mode 100644 VAPs/quicklook/2DS-AIR/2DS-AIR_tutorial.ipynb create mode 100644 VAPs/quicklook/2DS-AIR/aaf2dsh.c1.ipynb create mode 100644 VAPs/quicklook/2DS-AIR/aaf2dsv.c1.ipynb create mode 100644 VAPs/quicklook/ACSMCDCE/ACSMCDCE_tutorial.ipynb create mode 100644 VAPs/quicklook/ACSMCDCE/acsmcdce.c1.ipynb create mode 100644 VAPs/quicklook/ACSMCDCE/acsmcdce.c2.ipynb create mode 100644 VAPs/quicklook/ACSMCDCE/acsmtofcdce.c1.ipynb create mode 100644 VAPs/quicklook/AERINF/AERINF_tutorial.ipynb create mode 100644 VAPs/quicklook/AERINF/aerich1nf1turn.c1.ipynb create mode 100644 VAPs/quicklook/AERINF/aerich2nf1turn.c1.ipynb create mode 100644 VAPs/quicklook/AERIOE/AERIOE_tutorial.ipynb create mode 100644 VAPs/quicklook/AERIOE/aerioe1turn.c1.ipynb create mode 100644 VAPs/quicklook/AERIPROF/AERIPROF_tutorial.ipynb create mode 100644 VAPs/quicklook/AERIPROF/aeri01prof3feltz.c1.ipynb create mode 100644 VAPs/quicklook/AERIPROF/aeriprof3feltz.c1.ipynb create mode 100644 VAPs/quicklook/AERIPROF/qmeaeriprof.c1.ipynb create mode 100644 VAPs/quicklook/AEROSOLBE/AEROSOLBE_tutorial.ipynb create mode 100644 VAPs/quicklook/AEROSOLBE/aerosolbe1turn.c1.ipynb create mode 100644 VAPs/quicklook/AIP/AIP_tutorial.ipynb create mode 100644 VAPs/quicklook/AIP/aip1ogren.c1.ipynb create mode 100644 VAPs/quicklook/AIP/aipavg1ogren.c1.ipynb create mode 100644 VAPs/quicklook/AIP/aipfitrh1ogren.c1.ipynb create mode 100644 VAPs/quicklook/AOD-MFRSR/AOD-MFRSR_tutorial.ipynb create mode 100644 VAPs/quicklook/AOD-MFRSR/mfrsr7nchaod1mich.c1.ipynb create mode 100644 VAPs/quicklook/AOD-MFRSR/mfrsr7nchcal.c1.ipynb create mode 100644 VAPs/quicklook/AOD-MFRSR/mfrsraod1mich.c1.ipynb create mode 100644 VAPs/quicklook/AOD-MFRSR/mfrsrcal.c1.ipynb create mode 100644 VAPs/quicklook/AOD-NIMFR/AOD-NIMFR_tutorial.ipynb create mode 100644 VAPs/quicklook/AOD-NIMFR/nimfraod1mich.c1.ipynb create mode 100644 VAPs/quicklook/AOD/AOD_tutorial.ipynb create mode 100644 VAPs/quicklook/AOD/sasheniraod.c1.ipynb create mode 100644 VAPs/quicklook/AOD/sashevisaod.c1.ipynb create mode 100644 VAPs/quicklook/AOP/.ipynb_checkpoints/aoppsap1flynn1m.c1-checkpoint.ipynb create mode 100644 VAPs/quicklook/AOP/AOP_tutorial.ipynb create mode 100644 VAPs/quicklook/AOP/aopclap1flynn1m.c1.ipynb create mode 100644 VAPs/quicklook/AOP/aoppsap1flynn1h.c1.ipynb create mode 100644 VAPs/quicklook/AOP/aoppsap1flynn1m.c1.ipynb create mode 100644 VAPs/quicklook/AOSCCNAVG/AOSCCNAVG_tutorial.ipynb create mode 100644 VAPs/quicklook/AOSCCNAVG/aosccnavg.c1.ipynb create mode 100644 VAPs/quicklook/AOSCCNAVG/aosccnavg.c2.ipynb create mode 100644 VAPs/quicklook/AOSSP2BC/AOSSP2BC_tutorial.ipynb create mode 100644 VAPs/quicklook/AOSSP2BC/aossp2rbc1m.c1.ipynb create mode 100644 VAPs/quicklook/ARMBE/ARMBE_tutorial.ipynb create mode 100644 VAPs/quicklook/ARMBE/armbeatm.c1.ipynb create mode 100644 VAPs/quicklook/ARMBE/armbecldrad.c1.ipynb create mode 100644 VAPs/quicklook/ARSCL/.ipynb_checkpoints/arscl1cloth.c1-checkpoint.ipynb create mode 100644 VAPs/quicklook/ARSCL/.ipynb_checkpoints/arsclbnd1cloth.c1-checkpoint.ipynb create mode 100644 VAPs/quicklook/ARSCL/ARSCL_tutorial.ipynb create mode 100644 VAPs/quicklook/ARSCL/arscl1cloth.c1.ipynb create mode 100644 VAPs/quicklook/ARSCL/arsclbnd1cloth.c1.ipynb create mode 100644 VAPs/quicklook/ASDBE-AIR/ASDBE-AIR_tutorial.ipynb create mode 100644 VAPs/quicklook/ASDBE-AIR/aafmergedaerosolsd.c1.ipynb create mode 100644 VAPs/quicklook/BAEBBR/30baebbr.c1.ipynb create mode 100644 VAPs/quicklook/BAEBBR/BAEBBR_tutorial.ipynb create mode 100644 VAPs/quicklook/BBHRP/.ipynb_checkpoints/bbhrpavg1mlawer.c1-checkpoint.ipynb create mode 100644 VAPs/quicklook/BBHRP/1bbhrpripbe1mcfarlane.c1.ipynb create mode 100644 VAPs/quicklook/BBHRP/30bbhrpripbe1mcfarlane.c1.ipynb create mode 100644 VAPs/quicklook/BBHRP/BBHRP_tutorial.ipynb create mode 100644 VAPs/quicklook/BBHRP/bbhrpavg1mlawer.c1.ipynb create mode 100644 VAPs/quicklook/BEFLUX/BEFLUX_tutorial.ipynb create mode 100644 VAPs/quicklook/BEFLUX/beflux1long.c1.ipynb create mode 100644 VAPs/quicklook/BEFLUX/qcflux1long.c1.ipynb create mode 100644 VAPs/quicklook/CCNKAPPA/CCNKAPPA_tutorial.ipynb create mode 100644 VAPs/quicklook/CCNKAPPA/aosccnsmpskappa.c1.ipynb create mode 100644 VAPs/quicklook/CCNPROF/.ipynb_checkpoints/rlccnprof1ghan.c1-checkpoint.ipynb create mode 100644 VAPs/quicklook/CCNPROF/CCNPROF_tutorial.ipynb create mode 100644 VAPs/quicklook/CCNPROF/rlccnprof1ghan.c1.ipynb create mode 100644 VAPs/quicklook/CLAP/CLAP_tutorial.ipynb create mode 100644 VAPs/quicklook/CLAP/aosclap3w.c1.ipynb create mode 100644 VAPs/quicklook/CLDTYPE/CLDTYPE_tutorial.ipynb create mode 100644 VAPs/quicklook/CLDTYPE/cldtype.c1.ipynb create mode 100644 VAPs/quicklook/CMAC2/.ipynb_checkpoints/cmac2.c1-checkpoint.ipynb create mode 100644 VAPs/quicklook/CMAC2/CMAC2_tutorial.ipynb create mode 100644 VAPs/quicklook/CMAC2/cmac2.c1.ipynb create mode 100644 VAPs/quicklook/CO-AIR/CO-AIR_tutorial.ipynb create mode 100644 VAPs/quicklook/CO-AIR/aafco.c1.ipynb create mode 100644 VAPs/quicklook/COGS/COGS_tutorial.ipynb create mode 100644 VAPs/quicklook/COGS/cogs.c1.ipynb create mode 100644 VAPs/quicklook/DIFFCOR/DIFFCOR_tutorial.ipynb create mode 100644 VAPs/quicklook/DIFFCOR/brs1dutt.c1.ipynb create mode 100644 VAPs/quicklook/DIFFCOR/siros1dutt.c1.ipynb create mode 100644 VAPs/quicklook/DIFFCOR/sirs1dutt.c1.ipynb create mode 100644 VAPs/quicklook/DLPROF-WIND/DLPROF-WIND_tutorial.ipynb create mode 100644 VAPs/quicklook/DLPROF-WIND/dlprofwind4news.c1.ipynb create mode 100644 VAPs/quicklook/DLPROF-WSTATS/DLPROF-WSTATS_tutorial.ipynb create mode 100644 VAPs/quicklook/DLPROF-WSTATS/dlprofwstats4news.c1.ipynb create mode 100644 VAPs/quicklook/FCDP-AIR/FCDP-AIR_tutorial.ipynb create mode 100644 VAPs/quicklook/FCDP-AIR/aaffcdp.c1.ipynb create mode 100644 VAPs/quicklook/GVR/GVR_tutorial.ipynb create mode 100644 VAPs/quicklook/GVR/gvr.c1.ipynb create mode 100644 VAPs/quicklook/HVPS-AIR/HVPS-AIR_tutorial.ipynb create mode 100644 VAPs/quicklook/HVPS-AIR/aafhvps.c1.ipynb create mode 100644 VAPs/quicklook/INLETCVI-AIR/INLETCVI-AIR_tutorial.ipynb create mode 100644 VAPs/quicklook/INLETCVI-AIR/aafinletcvi.c1.ipynb create mode 100644 VAPs/quicklook/INTERPSONDE/INTERPSONDE_tutorial.ipynb create mode 100644 VAPs/quicklook/INTERPSONDE/interpolatedsonde.c1.ipynb create mode 100644 VAPs/quicklook/KAZRARSCL/KAZRARSCL_tutorial.ipynb create mode 100644 VAPs/quicklook/KAZRARSCL/arsclkazr1kollias.c1.ipynb create mode 100644 VAPs/quicklook/KAZRARSCL/arsclkazrbnd1kollias.c1.ipynb create mode 100644 VAPs/quicklook/KAZRARSCLCLOUDSAT/KAZRARSCLCLOUDSAT_tutorial.ipynb create mode 100644 VAPs/quicklook/KAZRARSCLCLOUDSAT/arsclkazrcloudsat.c1.ipynb create mode 100644 VAPs/quicklook/KAZRCFRCOR/KAZRCFRCOR_tutorial.ipynb create mode 100644 VAPs/quicklook/KAZRCFRCOR/kazrcfrcorge.c1.ipynb create mode 100644 VAPs/quicklook/KAZRCFRCOR/kazrcfrcormd.c1.ipynb create mode 100644 VAPs/quicklook/KAZRCOR/.ipynb_checkpoints/kazrcorge.c1-checkpoint.ipynb create mode 100644 VAPs/quicklook/KAZRCOR/.ipynb_checkpoints/kazrcorhi.c1-checkpoint.ipynb create mode 100644 VAPs/quicklook/KAZRCOR/.ipynb_checkpoints/kazrcormd.c1-checkpoint.ipynb create mode 100644 VAPs/quicklook/KAZRCOR/KAZRCOR_tutorial.ipynb create mode 100644 VAPs/quicklook/KAZRCOR/kazrcorge.c1.ipynb create mode 100644 VAPs/quicklook/KAZRCOR/kazrcorhi.c1.ipynb create mode 100644 VAPs/quicklook/KAZRCOR/kazrcormd.c1.ipynb create mode 100644 VAPs/quicklook/LCLHEIGHT/LCLHEIGHT_tutorial.ipynb create mode 100644 VAPs/quicklook/LCLHEIGHT/lcl.c1.ipynb create mode 100644 VAPs/quicklook/LDQUANTS/LDQUANTS_tutorial.ipynb create mode 100644 VAPs/quicklook/LDQUANTS/ldquants.c1.ipynb create mode 100644 VAPs/quicklook/LSSONDE/LSSONDE_tutorial.ipynb create mode 100644 VAPs/quicklook/LSSONDE/lssonde.c1.ipynb create mode 100644 VAPs/quicklook/MASCPARTICLES/MASCPARTICLES_tutorial.ipynb create mode 100644 VAPs/quicklook/MASCPARTICLES/mascparticles.c1.ipynb create mode 100644 VAPs/quicklook/MASCPARTICLES/mascparticlesavg.c1.ipynb create mode 100644 VAPs/quicklook/MERGED-COMMON/MERGED-COMMON_tutorial.ipynb create mode 100644 VAPs/quicklook/MERGED-COMMON/aafmergedcldsd.c1.ipynb create mode 100644 VAPs/quicklook/MERGEDSMPSAPS/MERGEDSMPSAPS_tutorial.ipynb create mode 100644 VAPs/quicklook/MERGEDSMPSAPS/mergedsmpsaps.c1.ipynb create mode 100644 VAPs/quicklook/MERGESONDE/MERGESONDE_tutorial.ipynb create mode 100644 VAPs/quicklook/MERGESONDE/mergesonde1mace.c1.ipynb create mode 100644 VAPs/quicklook/MERGESONDE/mergesonde2mace.c1.ipynb create mode 100644 VAPs/quicklook/MFRSRCLDOD/.ipynb_checkpoints/mfrsrcldod1min.c1-checkpoint.ipynb create mode 100644 VAPs/quicklook/MFRSRCLDOD/MFRSRCLDOD_tutorial.ipynb create mode 100644 VAPs/quicklook/MFRSRCLDOD/mfrsrcldod1min.c1.ipynb create mode 100644 VAPs/quicklook/MICROBASE/.ipynb_checkpoints/microbasepi2.c1-checkpoint.ipynb create mode 100644 VAPs/quicklook/MICROBASE/.ipynb_checkpoints/microbasepiavg.c1-checkpoint.ipynb create mode 100644 VAPs/quicklook/MICROBASE/MICROBASE_tutorial.ipynb create mode 100644 VAPs/quicklook/MICROBASE/microbasepi.c1.ipynb create mode 100644 VAPs/quicklook/MICROBASE/microbasepi2.c1.ipynb create mode 100644 VAPs/quicklook/MICROBASE/microbasepiavg.c1.ipynb create mode 100644 VAPs/quicklook/MPLAVG/MPLAVG_tutorial.ipynb create mode 100644 VAPs/quicklook/MPLAVG/mplpolavg.c1.ipynb create mode 100644 VAPs/quicklook/MPLCMASK/30smplcmask1zwang.c1.ipynb create mode 100644 VAPs/quicklook/MPLCMASK/MPLCMASK_tutorial.ipynb create mode 100644 VAPs/quicklook/MPLCMASKML/MPLCMASKML_tutorial.ipynb create mode 100644 VAPs/quicklook/MPLCMASKML/mplcmaskml.c1.ipynb create mode 100644 VAPs/quicklook/MPLNOR/.ipynb_checkpoints/mplnor1camp.c1-checkpoint.ipynb create mode 100644 VAPs/quicklook/MPLNOR/MPLNOR_tutorial.ipynb create mode 100644 VAPs/quicklook/MPLNOR/mplnor1camp.c1.ipynb create mode 100644 VAPs/quicklook/MWRRET/MWRRET_tutorial.ipynb create mode 100644 VAPs/quicklook/MWRRET/mwrret1liljclou.c1.ipynb create mode 100644 VAPs/quicklook/MWRRET/mwrret1liljclou.c2.ipynb create mode 100644 VAPs/quicklook/MWRRETV2/MWRRETV2_tutorial.ipynb create mode 100644 VAPs/quicklook/MWRRETV2/mwrret2turn.c1.ipynb create mode 100644 VAPs/quicklook/NAVMET-AIR/NAVMET-AIR_tutorial.ipynb create mode 100644 VAPs/quicklook/NAVMET-AIR/aafnaviwg.c1.ipynb create mode 100644 VAPs/quicklook/NDROP/NDROP_tutorial.ipynb create mode 100644 VAPs/quicklook/NDROP/ndropmfrsr.c1.ipynb create mode 100644 VAPs/quicklook/NEPHELOMETER/NEPHELOMETER_tutorial.ipynb create mode 100644 VAPs/quicklook/NEPHELOMETER/aosnephdry.c1.ipynb create mode 100644 VAPs/quicklook/NEPHELOMETER/aosnephwet.c1.ipynb create mode 100644 VAPs/quicklook/OKMSOIL/.ipynb_checkpoints/okmsoil.c1-checkpoint.ipynb create mode 100644 VAPs/quicklook/OKMSOIL/OKMSOIL_tutorial.ipynb create mode 100644 VAPs/quicklook/OKMSOIL/okmsoil.c1.ipynb create mode 100644 VAPs/quicklook/OZONE-AIR/OZONE-AIR_tutorial.ipynb create mode 100644 VAPs/quicklook/OZONE-AIR/aafo3.c1.ipynb create mode 100644 VAPs/quicklook/PBLHT/.ipynb_checkpoints/pblhtsonde1mcfarl.c1-checkpoint.ipynb create mode 100644 VAPs/quicklook/PBLHT/PBLHT_tutorial.ipynb create mode 100644 VAPs/quicklook/PBLHT/pblhtsonde1mcfarl.c1.ipynb create mode 100644 VAPs/quicklook/PBLHT/pblhtsondeyr1mcfarl.c1.ipynb create mode 100644 VAPs/quicklook/PCCP/PCCP_tutorial.ipynb create mode 100644 VAPs/quicklook/PCCP/pccp.c1.ipynb create mode 100644 VAPs/quicklook/PSAP/.ipynb_checkpoints/aospsap3w.c1-checkpoint.ipynb create mode 100644 VAPs/quicklook/PSAP/PSAP_tutorial.ipynb create mode 100644 VAPs/quicklook/PSAP/aospsap3w.c1.ipynb create mode 100644 VAPs/quicklook/QCRAD/QCRAD_tutorial.ipynb create mode 100644 VAPs/quicklook/QCRAD/qcrad1long.c1.ipynb create mode 100644 VAPs/quicklook/QCRAD/qcrad1long.c2.ipynb create mode 100644 VAPs/quicklook/QCRAD/qcradbeflux1long.c1.ipynb create mode 100644 VAPs/quicklook/QCRAD/qcradbeflux1long.c2.ipynb create mode 100644 VAPs/quicklook/QCRAD/qcradbrs1long.c1.ipynb create mode 100644 VAPs/quicklook/QCRAD/qcradbrs1long.c2.ipynb create mode 100644 VAPs/quicklook/RADFLUXANAL/.ipynb_checkpoints/radflux1long.c1-checkpoint.ipynb create mode 100644 VAPs/quicklook/RADFLUXANAL/RADFLUXANAL_tutorial.ipynb create mode 100644 VAPs/quicklook/RADFLUXANAL/radflux1long.c1.ipynb create mode 100644 VAPs/quicklook/RADFLUXANAL/radflux1long.c2.ipynb create mode 100644 VAPs/quicklook/RADFLUXANAL/radfluxbrs1long.c2.ipynb create mode 100644 VAPs/quicklook/RIPBE/30ripbe1mcfarlane.c1.ipynb create mode 100644 VAPs/quicklook/RIPBE/RIPBE_tutorial.ipynb create mode 100644 VAPs/quicklook/RIPBE/ripbe1mcfarlane.c1.ipynb create mode 100644 VAPs/quicklook/RLPROF/10rlprofbe1news.c1.ipynb create mode 100644 VAPs/quicklook/RLPROF/RLPROF_tutorial.ipynb create mode 100644 VAPs/quicklook/SACRADV3D3C/.ipynb_checkpoints/kasacradv3d3c.c1-checkpoint.ipynb create mode 100644 VAPs/quicklook/SACRADV3D3C/SACRADV3D3C_tutorial.ipynb create mode 100644 VAPs/quicklook/SACRADV3D3C/kasacradv3d3c.c1.ipynb create mode 100644 VAPs/quicklook/SACRADVVAD/SACRADVVAD_tutorial.ipynb create mode 100644 VAPs/quicklook/SACRADVVAD/kasacradvvad.c1.ipynb create mode 100644 VAPs/quicklook/SFCCLDGRID/.ipynb_checkpoints/15swfcldgrid1long.c1-checkpoint.ipynb create mode 100644 VAPs/quicklook/SFCCLDGRID/.ipynb_checkpoints/sfccldgrid2longcaracena.c1-checkpoint.ipynb create mode 100644 VAPs/quicklook/SFCCLDGRID/15swfcldgrid1long.c1.ipynb create mode 100644 VAPs/quicklook/SFCCLDGRID/SFCCLDGRID_tutorial.ipynb create mode 100644 VAPs/quicklook/SFCCLDGRID/sfccldgrid2longcaracena.c1.ipynb create mode 100644 VAPs/quicklook/SFCCLDGRID/sfccldgrid2longstation.c1.ipynb create mode 100644 VAPs/quicklook/SHALLOWCUMULUS/SHALLOWCUMULUS_tutorial.ipynb create mode 100644 VAPs/quicklook/SHALLOWCUMULUS/shallowcumulus.c1.ipynb create mode 100644 VAPs/quicklook/SHALLOWCUMULUS/shcusummary.c1.ipynb create mode 100644 VAPs/quicklook/SO2-AIR/SO2-AIR_tutorial.ipynb create mode 100644 VAPs/quicklook/SO2-AIR/aafso2.c1.ipynb create mode 100644 VAPs/quicklook/SONDEADJUST/.ipynb_checkpoints/SONDEADJUST_tutorial-checkpoint.ipynb create mode 100644 VAPs/quicklook/SONDEADJUST/SONDEADJUST_tutorial.ipynb create mode 100644 VAPs/quicklook/SONDEADJUST/sondeadjust.c1.ipynb create mode 100644 VAPs/quicklook/SONDEPARAM/SONDEPARAM_tutorial.ipynb create mode 100644 VAPs/quicklook/SONDEPARAM/sondeparam.c1.ipynb create mode 100644 VAPs/quicklook/SP2-AIR/SP2-AIR_tutorial.ipynb create mode 100644 VAPs/quicklook/SP2-AIR/aafsp2rbc10s.c1.ipynb create mode 100644 VAPs/quicklook/SPHOTCOD/SPHOTCOD_tutorial.ipynb create mode 100644 VAPs/quicklook/SPHOTCOD/sphotcod2chiu.c1.ipynb create mode 100644 VAPs/quicklook/SURFSPECALB/SURFSPECALB_tutorial.ipynb create mode 100644 VAPs/quicklook/SURFSPECALB/surfspecalb1mlawer.c1.ipynb create mode 100644 VAPs/quicklook/SURFSPECALB/surfspecalb7nch1mlawer.c1.ipynb create mode 100644 VAPs/quicklook/SWFLUXANAL/.ipynb_checkpoints/1swfanalsirs1long.c1-checkpoint.ipynb create mode 100644 VAPs/quicklook/TBSMERGED/TBSMERGED_tutorial.ipynb create mode 100644 VAPs/quicklook/TBSMERGED/tbsmerged.c1.ipynb create mode 100644 VAPs/quicklook/TBSMERGED/tbsmergedincloud.c1.ipynb create mode 100644 VAPs/quicklook/TDMA/TDMA_tutorial.ipynb create mode 100644 VAPs/quicklook/TDMA/tdmaapssize.c1.ipynb create mode 100644 VAPs/quicklook/TWRMR/1twrmr.c1.ipynb create mode 100644 VAPs/quicklook/TWRMR/30twrmr.c1.ipynb create mode 100644 VAPs/quicklook/TWRMR/TWRMR_tutorial.ipynb create mode 100644 VAPs/quicklook/VARANAL/180varanaecmwf.c1.ipynb create mode 100644 VAPs/quicklook/VARANAL/180varanamerra001.c1.ipynb create mode 100644 VAPs/quicklook/VARANAL/VARANAL_tutorial.ipynb create mode 100644 VAPs/quicklook/VARANAL3D/180varanal3dera5.c1.ipynb create mode 100644 VAPs/quicklook/VARANAL3D/180varanal3dncep.c1.ipynb create mode 100644 VAPs/quicklook/VARANAL3D/VARANAL3D_tutorial.ipynb create mode 100644 VAPs/quicklook/VDISQUANTS/VDISQUANTS_tutorial.ipynb create mode 100644 VAPs/quicklook/VDISQUANTS/vdisquants.c1.ipynb create mode 100644 VAPs/quicklook/WACRARSCL/WACRARSCL_tutorial.ipynb create mode 100644 VAPs/quicklook/WACRARSCL/arsclwacr1kollias.c1.ipynb create mode 100644 VAPs/quicklook/WACRARSCL/arsclwacrbnd1kollias.c1.ipynb create mode 100644 VAPs/vap_notebook_list.md diff --git a/.gitignore b/.gitignore index e43b0f98..c71e30af 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ .DS_Store +_build/ \ No newline at end of file diff --git a/VAPs/quicklook/2DS-AIR/.ipynb_checkpoints/aaf2dsh.c1-checkpoint.ipynb b/VAPs/quicklook/2DS-AIR/.ipynb_checkpoints/aaf2dsh.c1-checkpoint.ipynb new file mode 100644 index 00000000..66d434d8 --- /dev/null +++ b/VAPs/quicklook/2DS-AIR/.ipynb_checkpoints/aaf2dsh.c1-checkpoint.ipynb @@ -0,0 +1,1271 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AAF2DSH.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/2ds-air) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'aaf2dsh'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2018-12-08', 'facility': 'F1', 'site': 'cor', 'start_date': '2018-11-04'}, {'end_date': '2018-02-19', 'facility': 'F1', 'site': 'ena', 'start_date': '2017-06-21'}, {'end_date': '2016-09-22', 'facility': 'F1', 'site': 'sgp', 'start_date': '2016-04-25'}]" + ] + }, + { + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ac6764f5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The following locations and date ranges are available for this VAP:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sitefacilitystart_dateend_date
0corF12018-11-042018-12-08
1enaF12017-06-212018-02-19
2sgpF12016-04-252016-09-22
\n", + "
" + ], + "text/plain": [ + " site facility start_date end_date\n", + "0 cor F1 2018-11-04 2018-12-08\n", + "1 ena F1 2017-06-21 2018-02-19\n", + "2 sgp F1 2016-04-25 2016-09-22" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'F1' )\n", + "\n", + "date_start = '2016-09-21'\n", + "date_end = '2016-09-22'" + ] + }, + { + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/data/archive/sgp/sgpaaf2dshF1.c1'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['20160921', '20160922']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "51feea2e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['/data/archive/sgp/sgpaaf2dshF1.c1/sgpaaf2dshF1.c1.20160921.163940.nc',\n", + " '/data/archive/sgp/sgpaaf2dshF1.c1/sgpaaf2dshF1.c1.20160922.160625.nc']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2 files loaded\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:                     (time: 18380, optical_diameter: 61, bound: 2)\n",
+       "Coordinates:\n",
+       "  * time                        (time) datetime64[ns] 2016-09-21T16:39:40 ......\n",
+       "  * optical_diameter            (optical_diameter) float32 10.0 20.0 ... inf\n",
+       "Dimensions without coordinates: bound\n",
+       "Data variables:\n",
+       "    base_time                   (time) datetime64[ns] 2016-09-21 ... 2016-09-22\n",
+       "    time_offset                 (time) datetime64[ns] 2016-09-21T16:39:40 ......\n",
+       "    optical_diameter_bounds     (time, optical_diameter, bound) float32 dask.array<chunksize=(8283, 61, 2), meta=np.ndarray>\n",
+       "    total_number_concentration  (time) float32 dask.array<chunksize=(8283,), meta=np.ndarray>\n",
+       "    number_concentration        (time, optical_diameter) float32 dask.array<chunksize=(8283, 61), meta=np.ndarray>\n",
+       "    lat                         (time) float32 dask.array<chunksize=(8283,), meta=np.ndarray>\n",
+       "    lon                         (time) float32 dask.array<chunksize=(8283,), meta=np.ndarray>\n",
+       "    alt                         (time) float32 dask.array<chunksize=(8283,), meta=np.ndarray>\n",
+       "Attributes: (12/17)\n",
+       "    command_line:          aaf2dsme_ingest -s sgp -f F1 -D -R\n",
+       "    Conventions:           ARM-1.3\n",
+       "    process_version:       ingest-aaf2dsme-1.2-0.el7\n",
+       "    dod_version:           aaf2dsh-c1-1.1\n",
+       "    input_source:          /data/project/ENG0004504/collection/sgp/sgpaaf2dsF...\n",
+       "    site_id:               sgp\n",
+       "    ...                    ...\n",
+       "    doi:                   10.5439/1419322\n",
+       "    history:               created by user burk on machine prod-proc5.adc.arm...\n",
+       "    _file_dates:           ['20160921', '20160922']\n",
+       "    _file_times:           ['163940', '160625']\n",
+       "    _datastream:           sgpaaf2dshF1.c1\n",
+       "    _arm_standards_flag:   1
" + ], + "text/plain": [ + "\n", + "Dimensions: (time: 18380, optical_diameter: 61, bound: 2)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 2016-09-21T16:39:40 ......\n", + " * optical_diameter (optical_diameter) float32 10.0 20.0 ... inf\n", + "Dimensions without coordinates: bound\n", + "Data variables:\n", + " base_time (time) datetime64[ns] 2016-09-21 ... 2016-09-22\n", + " time_offset (time) datetime64[ns] 2016-09-21T16:39:40 ......\n", + " optical_diameter_bounds (time, optical_diameter, bound) float32 dask.array\n", + " total_number_concentration (time) float32 dask.array\n", + " number_concentration (time, optical_diameter) float32 dask.array\n", + " lat (time) float32 dask.array\n", + " lon (time) float32 dask.array\n", + " alt (time) float32 dask.array\n", + "Attributes: (12/17)\n", + " command_line: aaf2dsme_ingest -s sgp -f F1 -D -R\n", + " Conventions: ARM-1.3\n", + " process_version: ingest-aaf2dsme-1.2-0.el7\n", + " dod_version: aaf2dsh-c1-1.1\n", + " input_source: /data/project/ENG0004504/collection/sgp/sgpaaf2dsF...\n", + " site_id: sgp\n", + " ... ...\n", + " doi: 10.5439/1419322\n", + " history: created by user burk on machine prod-proc5.adc.arm...\n", + " _file_dates: ['20160921', '20160922']\n", + " _file_times: ['163940', '160625']\n", + " _datastream: sgpaaf2dshF1.c1\n", + " _arm_standards_flag: 1" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['total_number_concentration', 'number_concentration']" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "x and y arguments to pcolormesh cannot have non-finite values or be of type numpy.ma.core.MaskedArray with masked values", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[9], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m ts_display\u001b[38;5;241m.\u001b[39madd_subplots((\u001b[38;5;28mlen\u001b[39m(variables_to_plot),), figsize \u001b[38;5;241m=\u001b[39m (\u001b[38;5;241m9.5\u001b[39m,\u001b[38;5;241m4\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mlen\u001b[39m(variables_to_plot)))\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i,v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(variables_to_plot):\n\u001b[0;32m----> 5\u001b[0m ts_ax \u001b[38;5;241m=\u001b[39m \u001b[43mts_display\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mplot\u001b[49m\u001b[43m(\u001b[49m\u001b[43mv\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msubplot_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mi\u001b[49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mset_title\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mds\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvariables\u001b[49m\u001b[43m[\u001b[49m\u001b[43mv\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mattrs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mlong_name\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6\u001b[0m ts_ax\u001b[38;5;241m.\u001b[39mgrid()\n\u001b[1;32m 8\u001b[0m plt\u001b[38;5;241m.\u001b[39mshow()\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/plotting/timeseriesdisplay.py:588\u001b[0m, in \u001b[0;36mTimeSeriesDisplay.plot\u001b[0;34m(self, field, dsname, subplot_index, cmap, set_title, add_nan, day_night_background, invert_y_axis, abs_limits, time_rng, y_rng, use_var_for_y, set_shading, assessment_overplot, overplot_marker, overplot_behind, overplot_markersize, assessment_overplot_category, assessment_overplot_category_color, force_line_plot, labels, cbar_label, cbar_h_adjust, secondary_y, y_axis_flag_meanings, colorbar_labels, cb_friendly, **kwargs)\u001b[0m\n\u001b[1;32m 586\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 587\u001b[0m kwargs[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mface\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m--> 588\u001b[0m mesh \u001b[38;5;241m=\u001b[39m \u001b[43max\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpcolormesh\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 589\u001b[0m \u001b[43m \u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43masarray\u001b[49m\u001b[43m(\u001b[49m\u001b[43mxdata\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 590\u001b[0m \u001b[43m \u001b[49m\u001b[43mydata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 591\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtranspose\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 592\u001b[0m \u001b[43m \u001b[49m\u001b[43mshading\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mset_shading\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 593\u001b[0m \u001b[43m \u001b[49m\u001b[43mcmap\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcmap\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 594\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 595\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 597\u001b[0m \u001b[38;5;66;03m# Set Title\u001b[39;00m\n\u001b[1;32m 598\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m set_title \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/__init__.py:1442\u001b[0m, in \u001b[0;36m_preprocess_data..inner\u001b[0;34m(ax, data, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1439\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 1440\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minner\u001b[39m(ax, \u001b[38;5;241m*\u001b[39margs, data\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 1441\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m data \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1442\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43max\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mmap\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43msanitize_sequence\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1444\u001b[0m bound \u001b[38;5;241m=\u001b[39m new_sig\u001b[38;5;241m.\u001b[39mbind(ax, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 1445\u001b[0m auto_label \u001b[38;5;241m=\u001b[39m (bound\u001b[38;5;241m.\u001b[39marguments\u001b[38;5;241m.\u001b[39mget(label_namer)\n\u001b[1;32m 1446\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m bound\u001b[38;5;241m.\u001b[39mkwargs\u001b[38;5;241m.\u001b[39mget(label_namer))\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/axes/_axes.py:6220\u001b[0m, in \u001b[0;36mAxes.pcolormesh\u001b[0;34m(self, alpha, norm, cmap, vmin, vmax, shading, antialiased, *args, **kwargs)\u001b[0m\n\u001b[1;32m 6217\u001b[0m shading \u001b[38;5;241m=\u001b[39m shading\u001b[38;5;241m.\u001b[39mlower()\n\u001b[1;32m 6218\u001b[0m kwargs\u001b[38;5;241m.\u001b[39msetdefault(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnone\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m-> 6220\u001b[0m X, Y, C, shading \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_pcolorargs\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mpcolormesh\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6221\u001b[0m \u001b[43m \u001b[49m\u001b[43mshading\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mshading\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6222\u001b[0m coords \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mstack([X, Y], axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m 6223\u001b[0m \u001b[38;5;66;03m# convert to one dimensional array, except for 3D RGB(A) arrays\u001b[39;00m\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/axes/_axes.py:5717\u001b[0m, in \u001b[0;36mAxes._pcolorargs\u001b[0;34m(self, funcname, shading, *args, **kwargs)\u001b[0m\n\u001b[1;32m 5715\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m funcname \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpcolormesh\u001b[39m\u001b[38;5;124m'\u001b[39m:\n\u001b[1;32m 5716\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m np\u001b[38;5;241m.\u001b[39mma\u001b[38;5;241m.\u001b[39mis_masked(X) \u001b[38;5;129;01mor\u001b[39;00m np\u001b[38;5;241m.\u001b[39mma\u001b[38;5;241m.\u001b[39mis_masked(Y):\n\u001b[0;32m-> 5717\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 5718\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mx and y arguments to pcolormesh cannot have \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 5719\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnon-finite values or be of type \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 5720\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnumpy.ma.core.MaskedArray with masked values\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 5721\u001b[0m \u001b[38;5;66;03m# safe_masked_invalid() returns an ndarray for dtypes other\u001b[39;00m\n\u001b[1;32m 5722\u001b[0m \u001b[38;5;66;03m# than floating point.\u001b[39;00m\n\u001b[1;32m 5723\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(X, np\u001b[38;5;241m.\u001b[39mma\u001b[38;5;241m.\u001b[39mcore\u001b[38;5;241m.\u001b[39mMaskedArray):\n", + "\u001b[0;31mValueError\u001b[0m: x and y arguments to pcolormesh cannot have non-finite values or be of type numpy.ma.core.MaskedArray with masked values" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "eae83e9e4798482083ecdb12aeb73cf7", + "version_major": 2, + "version_minor": 0 + }, + "image/png": "", + "text/html": [ + "\n", + "
\n", + "
\n", + " Figure\n", + "
\n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'total_number_concentration'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/2DS-AIR/.ipynb_checkpoints/aaf2dsv.c1-checkpoint.ipynb b/VAPs/quicklook/2DS-AIR/.ipynb_checkpoints/aaf2dsv.c1-checkpoint.ipynb new file mode 100644 index 00000000..9a856041 --- /dev/null +++ b/VAPs/quicklook/2DS-AIR/.ipynb_checkpoints/aaf2dsv.c1-checkpoint.ipynb @@ -0,0 +1,1798 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AAF2DSV.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/2ds-air) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'aaf2dsv'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2018-12-08', 'facility': 'F1', 'site': 'cor', 'start_date': '2018-11-04'}, {'end_date': '2018-02-19', 'facility': 'F1', 'site': 'ena', 'start_date': '2017-06-21'}, {'end_date': '2016-09-22', 'facility': 'F1', 'site': 'sgp', 'start_date': '2016-04-25'}]" + ] + }, + { + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ac6764f5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The following locations and date ranges are available for this VAP:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sitefacilitystart_dateend_date
0corF12018-11-042018-12-08
1enaF12017-06-212018-02-19
2sgpF12016-04-252016-09-22
\n", + "
" + ], + "text/plain": [ + " site facility start_date end_date\n", + "0 cor F1 2018-11-04 2018-12-08\n", + "1 ena F1 2017-06-21 2018-02-19\n", + "2 sgp F1 2016-04-25 2016-09-22" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'F1' )\n", + "\n", + "date_start = '2016-09-21'\n", + "date_end = '2016-09-22'" + ] + }, + { + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/data/archive/sgp/sgpaaf2dsvF1.c1'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['20160921', '20160922']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "51feea2e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['/data/archive/sgp/sgpaaf2dsvF1.c1/sgpaaf2dsvF1.c1.20160921.163940.nc',\n", + " '/data/archive/sgp/sgpaaf2dsvF1.c1/sgpaaf2dsvF1.c1.20160922.160625.nc']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "226f29ae", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:                     (time: 10097, optical_diameter: 61, bound: 2)\n",
+       "Coordinates:\n",
+       "  * time                        (time) datetime64[ns] 2016-09-22T16:06:25 ......\n",
+       "  * optical_diameter            (optical_diameter) float32 10.0 20.0 ... inf\n",
+       "Dimensions without coordinates: bound\n",
+       "Data variables:\n",
+       "    base_time                   datetime64[ns] 2016-09-22\n",
+       "    time_offset                 (time) datetime64[ns] 2016-09-22T16:06:25 ......\n",
+       "    optical_diameter_bounds     (optical_diameter, bound) float32 5.0 ... inf\n",
+       "    total_number_concentration  (time) float32 26.59 0.0 22.4 ... 0.0 0.0 nan\n",
+       "    number_concentration        (time, optical_diameter) float32 2.24 ... nan\n",
+       "    lat                         (time) float32 36.76 36.76 36.76 ... 36.74 36.74\n",
+       "    lon                         (time) float32 -96.01 -96.01 ... -96.02 -96.02\n",
+       "    alt                         (time) float32 220.0 224.0 228.0 ... 677.0 674.0\n",
+       "Attributes: (12/13)\n",
+       "    command_line:          aaf2dsme_ingest -s sgp -f F1 -D -R\n",
+       "    Conventions:           ARM-1.3\n",
+       "    process_version:       ingest-aaf2dsme-1.2-0.el7\n",
+       "    dod_version:           aaf2dsv-c1-1.1\n",
+       "    input_source:          /data/project/ENG0004504/collection/sgp/sgpaaf2dsF...\n",
+       "    site_id:               sgp\n",
+       "    ...                    ...\n",
+       "    facility_id:           F1\n",
+       "    data_level:            c1\n",
+       "    location_description:  Southern Great Plains (SGP), Gulfstream 159 ("G1")...\n",
+       "    datastream:            sgpaaf2dsvF1.c1\n",
+       "    doi:                   10.5439/1419323\n",
+       "    history:               created by user burk on machine prod-proc5.adc.arm...
" + ], + "text/plain": [ + "\n", + "Dimensions: (time: 10097, optical_diameter: 61, bound: 2)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 2016-09-22T16:06:25 ......\n", + " * optical_diameter (optical_diameter) float32 10.0 20.0 ... inf\n", + "Dimensions without coordinates: bound\n", + "Data variables:\n", + " base_time datetime64[ns] 2016-09-22\n", + " time_offset (time) datetime64[ns] 2016-09-22T16:06:25 ......\n", + " optical_diameter_bounds (optical_diameter, bound) float32 5.0 ... inf\n", + " total_number_concentration (time) float32 26.59 0.0 22.4 ... 0.0 0.0 nan\n", + " number_concentration (time, optical_diameter) float32 2.24 ... nan\n", + " lat (time) float32 36.76 36.76 36.76 ... 36.74 36.74\n", + " lon (time) float32 -96.01 -96.01 ... -96.02 -96.02\n", + " alt (time) float32 220.0 224.0 228.0 ... 677.0 674.0\n", + "Attributes: (12/13)\n", + " command_line: aaf2dsme_ingest -s sgp -f F1 -D -R\n", + " Conventions: ARM-1.3\n", + " process_version: ingest-aaf2dsme-1.2-0.el7\n", + " dod_version: aaf2dsv-c1-1.1\n", + " input_source: /data/project/ENG0004504/collection/sgp/sgpaaf2dsF...\n", + " site_id: sgp\n", + " ... ...\n", + " facility_id: F1\n", + " data_level: c1\n", + " location_description: Southern Great Plains (SGP), Gulfstream 159 (\"G1\")...\n", + " datastream: sgpaaf2dsvF1.c1\n", + " doi: 10.5439/1419323\n", + " history: created by user burk on machine prod-proc5.adc.arm..." + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_single_1 = xr.load_dataset(\"/data/archive/sgp/sgpaaf2dsvF1.c1/sgpaaf2dsvF1.c1.20160921.163940.nc\")\n", + "ds_single_1\n", + "\n", + "ds_single_2 = xr.load_dataset(\"/data/archive/sgp/sgpaaf2dsvF1.c1/sgpaaf2dsvF1.c1.20160922.160625.nc\")\n", + "ds_single_2" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2 files loaded\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:                     (time: 18380, optical_diameter: 61, bound: 2)\n",
+       "Coordinates:\n",
+       "  * time                        (time) datetime64[ns] 2016-09-21T16:39:40 ......\n",
+       "  * optical_diameter            (optical_diameter) float32 10.0 20.0 ... inf\n",
+       "Dimensions without coordinates: bound\n",
+       "Data variables:\n",
+       "    base_time                   (time) datetime64[ns] 2016-09-21 ... 2016-09-22\n",
+       "    time_offset                 (time) datetime64[ns] 2016-09-21T16:39:40 ......\n",
+       "    optical_diameter_bounds     (time, optical_diameter, bound) float32 dask.array<chunksize=(8283, 61, 2), meta=np.ndarray>\n",
+       "    total_number_concentration  (time) float32 dask.array<chunksize=(8283,), meta=np.ndarray>\n",
+       "    number_concentration        (time, optical_diameter) float32 dask.array<chunksize=(8283, 61), meta=np.ndarray>\n",
+       "    lat                         (time) float32 dask.array<chunksize=(8283,), meta=np.ndarray>\n",
+       "    lon                         (time) float32 dask.array<chunksize=(8283,), meta=np.ndarray>\n",
+       "    alt                         (time) float32 dask.array<chunksize=(8283,), meta=np.ndarray>\n",
+       "Attributes: (12/17)\n",
+       "    command_line:          aaf2dsme_ingest -s sgp -f F1 -D -R\n",
+       "    Conventions:           ARM-1.3\n",
+       "    process_version:       ingest-aaf2dsme-1.2-0.el7\n",
+       "    dod_version:           aaf2dsv-c1-1.1\n",
+       "    input_source:          /data/project/ENG0004504/collection/sgp/sgpaaf2dsF...\n",
+       "    site_id:               sgp\n",
+       "    ...                    ...\n",
+       "    doi:                   10.5439/1419323\n",
+       "    history:               created by user burk on machine prod-proc5.adc.arm...\n",
+       "    _file_dates:           ['20160921', '20160922']\n",
+       "    _file_times:           ['163940', '160625']\n",
+       "    _datastream:           sgpaaf2dsvF1.c1\n",
+       "    _arm_standards_flag:   1
" + ], + "text/plain": [ + "\n", + "Dimensions: (time: 18380, optical_diameter: 61, bound: 2)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 2016-09-21T16:39:40 ......\n", + " * optical_diameter (optical_diameter) float32 10.0 20.0 ... inf\n", + "Dimensions without coordinates: bound\n", + "Data variables:\n", + " base_time (time) datetime64[ns] 2016-09-21 ... 2016-09-22\n", + " time_offset (time) datetime64[ns] 2016-09-21T16:39:40 ......\n", + " optical_diameter_bounds (time, optical_diameter, bound) float32 dask.array\n", + " total_number_concentration (time) float32 dask.array\n", + " number_concentration (time, optical_diameter) float32 dask.array\n", + " lat (time) float32 dask.array\n", + " lon (time) float32 dask.array\n", + " alt (time) float32 dask.array\n", + "Attributes: (12/17)\n", + " command_line: aaf2dsme_ingest -s sgp -f F1 -D -R\n", + " Conventions: ARM-1.3\n", + " process_version: ingest-aaf2dsme-1.2-0.el7\n", + " dod_version: aaf2dsv-c1-1.1\n", + " input_source: /data/project/ENG0004504/collection/sgp/sgpaaf2dsF...\n", + " site_id: sgp\n", + " ... ...\n", + " doi: 10.5439/1419323\n", + " history: created by user burk on machine prod-proc5.adc.arm...\n", + " _file_dates: ['20160921', '20160922']\n", + " _file_times: ['163940', '160625']\n", + " _datastream: sgpaaf2dsvF1.c1\n", + " _arm_standards_flag: 1" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['total_number_concentration', 'number_concentration']" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "x and y arguments to pcolormesh cannot have non-finite values or be of type numpy.ma.core.MaskedArray with masked values", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[9], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m ts_display\u001b[38;5;241m.\u001b[39madd_subplots((\u001b[38;5;28mlen\u001b[39m(variables_to_plot),), figsize \u001b[38;5;241m=\u001b[39m (\u001b[38;5;241m9.5\u001b[39m,\u001b[38;5;241m4\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mlen\u001b[39m(variables_to_plot)))\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i,v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(variables_to_plot):\n\u001b[0;32m----> 5\u001b[0m ts_ax \u001b[38;5;241m=\u001b[39m \u001b[43mts_display\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mplot\u001b[49m\u001b[43m(\u001b[49m\u001b[43mv\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msubplot_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mi\u001b[49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mset_title\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mds\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvariables\u001b[49m\u001b[43m[\u001b[49m\u001b[43mv\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mattrs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mlong_name\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6\u001b[0m ts_ax\u001b[38;5;241m.\u001b[39mgrid()\n\u001b[1;32m 8\u001b[0m plt\u001b[38;5;241m.\u001b[39mshow()\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/plotting/timeseriesdisplay.py:588\u001b[0m, in \u001b[0;36mTimeSeriesDisplay.plot\u001b[0;34m(self, field, dsname, subplot_index, cmap, set_title, add_nan, day_night_background, invert_y_axis, abs_limits, time_rng, y_rng, use_var_for_y, set_shading, assessment_overplot, overplot_marker, overplot_behind, overplot_markersize, assessment_overplot_category, assessment_overplot_category_color, force_line_plot, labels, cbar_label, cbar_h_adjust, secondary_y, y_axis_flag_meanings, colorbar_labels, cb_friendly, **kwargs)\u001b[0m\n\u001b[1;32m 586\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 587\u001b[0m kwargs[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mface\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m--> 588\u001b[0m mesh \u001b[38;5;241m=\u001b[39m \u001b[43max\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpcolormesh\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 589\u001b[0m \u001b[43m \u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43masarray\u001b[49m\u001b[43m(\u001b[49m\u001b[43mxdata\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 590\u001b[0m \u001b[43m \u001b[49m\u001b[43mydata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 591\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtranspose\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 592\u001b[0m \u001b[43m \u001b[49m\u001b[43mshading\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mset_shading\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 593\u001b[0m \u001b[43m \u001b[49m\u001b[43mcmap\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcmap\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 594\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 595\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 597\u001b[0m \u001b[38;5;66;03m# Set Title\u001b[39;00m\n\u001b[1;32m 598\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m set_title \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/__init__.py:1442\u001b[0m, in \u001b[0;36m_preprocess_data..inner\u001b[0;34m(ax, data, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1439\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 1440\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minner\u001b[39m(ax, \u001b[38;5;241m*\u001b[39margs, data\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 1441\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m data \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1442\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43max\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mmap\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43msanitize_sequence\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1444\u001b[0m bound \u001b[38;5;241m=\u001b[39m new_sig\u001b[38;5;241m.\u001b[39mbind(ax, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 1445\u001b[0m auto_label \u001b[38;5;241m=\u001b[39m (bound\u001b[38;5;241m.\u001b[39marguments\u001b[38;5;241m.\u001b[39mget(label_namer)\n\u001b[1;32m 1446\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m bound\u001b[38;5;241m.\u001b[39mkwargs\u001b[38;5;241m.\u001b[39mget(label_namer))\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/axes/_axes.py:6220\u001b[0m, in \u001b[0;36mAxes.pcolormesh\u001b[0;34m(self, alpha, norm, cmap, vmin, vmax, shading, antialiased, *args, **kwargs)\u001b[0m\n\u001b[1;32m 6217\u001b[0m shading \u001b[38;5;241m=\u001b[39m shading\u001b[38;5;241m.\u001b[39mlower()\n\u001b[1;32m 6218\u001b[0m kwargs\u001b[38;5;241m.\u001b[39msetdefault(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnone\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m-> 6220\u001b[0m X, Y, C, shading \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_pcolorargs\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mpcolormesh\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6221\u001b[0m \u001b[43m \u001b[49m\u001b[43mshading\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mshading\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6222\u001b[0m coords \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mstack([X, Y], axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m 6223\u001b[0m \u001b[38;5;66;03m# convert to one dimensional array, except for 3D RGB(A) arrays\u001b[39;00m\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/axes/_axes.py:5717\u001b[0m, in \u001b[0;36mAxes._pcolorargs\u001b[0;34m(self, funcname, shading, *args, **kwargs)\u001b[0m\n\u001b[1;32m 5715\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m funcname \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpcolormesh\u001b[39m\u001b[38;5;124m'\u001b[39m:\n\u001b[1;32m 5716\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m np\u001b[38;5;241m.\u001b[39mma\u001b[38;5;241m.\u001b[39mis_masked(X) \u001b[38;5;129;01mor\u001b[39;00m np\u001b[38;5;241m.\u001b[39mma\u001b[38;5;241m.\u001b[39mis_masked(Y):\n\u001b[0;32m-> 5717\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 5718\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mx and y arguments to pcolormesh cannot have \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 5719\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnon-finite values or be of type \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 5720\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnumpy.ma.core.MaskedArray with masked values\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 5721\u001b[0m \u001b[38;5;66;03m# safe_masked_invalid() returns an ndarray for dtypes other\u001b[39;00m\n\u001b[1;32m 5722\u001b[0m \u001b[38;5;66;03m# than floating point.\u001b[39;00m\n\u001b[1;32m 5723\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(X, np\u001b[38;5;241m.\u001b[39mma\u001b[38;5;241m.\u001b[39mcore\u001b[38;5;241m.\u001b[39mMaskedArray):\n", + "\u001b[0;31mValueError\u001b[0m: x and y arguments to pcolormesh cannot have non-finite values or be of type numpy.ma.core.MaskedArray with masked values" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b4ce530bc3b34d038b85608090b4b719", + "version_major": 2, + "version_minor": 0 + }, + "image/png": "", + "text/html": [ + "\n", + "
\n", + "
\n", + " Figure\n", + "
\n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'total_number_concentration'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/2DS-AIR/2DS-AIR_tutorial.ipynb b/VAPs/quicklook/2DS-AIR/2DS-AIR_tutorial.ipynb new file mode 100644 index 00000000..bcefbb2f --- /dev/null +++ b/VAPs/quicklook/2DS-AIR/2DS-AIR_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AAF2DSH.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/2ds-air) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using aaf2dsh as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `aaf2dsh.c1`, where `aaf2dsh` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `cor` and facility `F1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/cor/coraaf2dshF1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"aaf2dsh\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"cor\"\n", + "facility = \"F1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/2DS-AIR/aaf2dsh.c1.ipynb b/VAPs/quicklook/2DS-AIR/aaf2dsh.c1.ipynb new file mode 100644 index 00000000..e033e77a --- /dev/null +++ b/VAPs/quicklook/2DS-AIR/aaf2dsh.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AAF2DSH.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/2ds-air) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'aaf2dsh'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2018-12-08', 'facility': 'F1', 'site': 'cor', 'start_date': '2018-11-04'}, {'end_date': '2018-02-19', 'facility': 'F1', 'site': 'ena', 'start_date': '2017-06-21'}, {'end_date': '2016-09-22', 'facility': 'F1', 'site': 'sgp', 'start_date': '2016-04-25'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'F1' )\n", + "\n", + "date_start = '2016-09-21'\n", + "date_end = '2016-09-22'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['total_number_concentration', 'number_concentration']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'total_number_concentration'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/2DS-AIR/aaf2dsv.c1.ipynb b/VAPs/quicklook/2DS-AIR/aaf2dsv.c1.ipynb new file mode 100644 index 00000000..376bf7f4 --- /dev/null +++ b/VAPs/quicklook/2DS-AIR/aaf2dsv.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AAF2DSV.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/2ds-air) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'aaf2dsv'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2018-12-08', 'facility': 'F1', 'site': 'cor', 'start_date': '2018-11-04'}, {'end_date': '2018-02-19', 'facility': 'F1', 'site': 'ena', 'start_date': '2017-06-21'}, {'end_date': '2016-09-22', 'facility': 'F1', 'site': 'sgp', 'start_date': '2016-04-25'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'F1' )\n", + "\n", + "date_start = '2016-09-21'\n", + "date_end = '2016-09-22'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['total_number_concentration', 'number_concentration']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'total_number_concentration'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/ACSMCDCE/ACSMCDCE_tutorial.ipynb b/VAPs/quicklook/ACSMCDCE/ACSMCDCE_tutorial.ipynb new file mode 100644 index 00000000..87937544 --- /dev/null +++ b/VAPs/quicklook/ACSMCDCE/ACSMCDCE_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# ACSMCDCE.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/acsmcdce) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using acsmcdce as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `acsmcdce.c1`, where `acsmcdce` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `epc` and facility `M1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/epc/epcacsmcdceM1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"acsmcdce\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"epc\"\n", + "facility = \"M1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/ACSMCDCE/acsmcdce.c1.ipynb b/VAPs/quicklook/ACSMCDCE/acsmcdce.c1.ipynb new file mode 100644 index 00000000..5d56f5bf --- /dev/null +++ b/VAPs/quicklook/ACSMCDCE/acsmcdce.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# ACSMCDCE.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/acsmcdce) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'acsmcdce'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2023-12-17', 'facility': 'M1', 'site': 'epc', 'start_date': '2023-01-18'}, {'end_date': '2022-09-29', 'facility': 'M1', 'site': 'hou', 'start_date': '2021-09-10'}, {'end_date': '2023-12-10', 'facility': 'E13', 'site': 'sgp', 'start_date': '2019-10-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'E13' )\n", + "\n", + "date_start = '2023-12-08'\n", + "date_end = '2023-12-10'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['total_organics', 'ammonium', 'sulfate']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'total_organics'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'total_organics'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/ACSMCDCE/acsmcdce.c2.ipynb b/VAPs/quicklook/ACSMCDCE/acsmcdce.c2.ipynb new file mode 100644 index 00000000..785494db --- /dev/null +++ b/VAPs/quicklook/ACSMCDCE/acsmcdce.c2.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# ACSMCDCE.C2 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/acsmcdce) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'acsmcdce'\n", + "DATA_LEVEL = 'c2'\n", + "LOCATIONS = [{'end_date': '2019-05-01', 'facility': 'M1', 'site': 'cor', 'start_date': '2018-09-28'}, {'end_date': '2017-11-01', 'facility': 'M1', 'site': 'asi', 'start_date': '2016-06-02'}, {'end_date': '2023-04-21', 'facility': 'M1', 'site': 'epc', 'start_date': '2023-02-09'}, {'end_date': '2022-10-01', 'facility': 'M1', 'site': 'hou', 'start_date': '2021-09-26'}, {'end_date': '2022-09-29', 'facility': 'S3', 'site': 'hou', 'start_date': '2022-05-28'}, {'end_date': '2023-04-21', 'facility': 'C1', 'site': 'ena', 'start_date': '2014-05-16'}, {'end_date': '2023-04-22', 'facility': 'S2', 'site': 'guc', 'start_date': '2022-04-02'}, {'end_date': '2016-10-03', 'facility': 'C1', 'site': 'sgp', 'start_date': '2010-11-18'}, {'end_date': '2023-04-21', 'facility': 'E13', 'site': 'sgp', 'start_date': '2016-11-29'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2016-10-01'\n", + "date_end = '2016-10-03'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['total_organics_CDCE', 'sulfate_CDCE', 'ammonium_CDCE']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'total_organics_CDCE'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'total_organics_CDCE'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/ACSMCDCE/acsmtofcdce.c1.ipynb b/VAPs/quicklook/ACSMCDCE/acsmtofcdce.c1.ipynb new file mode 100644 index 00000000..17a132ec --- /dev/null +++ b/VAPs/quicklook/ACSMCDCE/acsmtofcdce.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# ACSMTOFCDCE.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/acsmcdce) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'acsmtofcdce'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2022-09-28', 'facility': 'S3', 'site': 'hou', 'start_date': '2022-06-01'}, {'end_date': '2023-06-13', 'facility': 'S2', 'site': 'guc', 'start_date': '2022-05-12'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'hou', 'S3' )\n", + "\n", + "date_start = '2022-09-26'\n", + "date_end = '2022-09-28'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['chloride', 'ammonium', 'nitrate']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'chloride'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'chloride'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/AERINF/AERINF_tutorial.ipynb b/VAPs/quicklook/AERINF/AERINF_tutorial.ipynb new file mode 100644 index 00000000..bf583d23 --- /dev/null +++ b/VAPs/quicklook/AERINF/AERINF_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AERICH1NF1TURN.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/aerinf) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using aerich1nf1turn as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `aerich1nf1turn.c1`, where `aerich1nf1turn` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `cor` and facility `M1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/cor/coraerich1nf1turnM1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"aerich1nf1turn\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"cor\"\n", + "facility = \"M1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/AERINF/aerich1nf1turn.c1.ipynb b/VAPs/quicklook/AERINF/aerich1nf1turn.c1.ipynb new file mode 100644 index 00000000..22cb378b --- /dev/null +++ b/VAPs/quicklook/AERINF/aerich1nf1turn.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AERICH1NF1TURN.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/aerinf) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'aerich1nf1turn'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2019-04-30', 'facility': 'M1', 'site': 'cor', 'start_date': '2018-09-27'}, {'end_date': '2016-12-20', 'facility': 'M1', 'site': 'awr', 'start_date': '2015-12-01'}, {'end_date': '2020-05-20', 'facility': 'M1', 'site': 'anx', 'start_date': '2019-12-01'}, {'end_date': '2008-12-27', 'facility': 'M1', 'site': 'hfe', 'start_date': '2008-05-06'}, {'end_date': '2023-12-09', 'facility': 'M1', 'site': 'epc', 'start_date': '2023-01-15'}, {'end_date': '2007-12-31', 'facility': 'M1', 'site': 'fkb', 'start_date': '2007-03-28'}, {'end_date': '2022-10-01', 'facility': 'M1', 'site': 'hou', 'start_date': '2021-08-09'}, {'end_date': '2023-12-10', 'facility': 'C1', 'site': 'ena', 'start_date': '2016-07-21'}, {'end_date': '2015-10-10', 'facility': 'M1', 'site': 'mao', 'start_date': '2014-01-01'}, {'end_date': '2023-06-10', 'facility': 'M1', 'site': 'guc', 'start_date': '2021-09-01'}, {'end_date': '2011-01-05', 'facility': 'M1', 'site': 'grw', 'start_date': '2009-04-12'}, {'end_date': '2023-12-10', 'facility': 'C1', 'site': 'sgp', 'start_date': '2006-02-20'}, {'end_date': '2022-09-20', 'facility': 'E32', 'site': 'sgp', 'start_date': '2016-04-11'}, {'end_date': '2023-10-20', 'facility': 'E37', 'site': 'sgp', 'start_date': '2016-04-11'}, {'end_date': '2023-10-20', 'facility': 'E39', 'site': 'sgp', 'start_date': '2016-04-11'}, {'end_date': '2005-09-13', 'facility': 'M1', 'site': 'pye', 'start_date': '2005-03-23'}, {'end_date': '2020-09-10', 'facility': 'M1', 'site': 'mos', 'start_date': '2019-11-11'}, {'end_date': '2007-01-05', 'facility': 'M1', 'site': 'nim', 'start_date': '2006-01-07'}, {'end_date': '2012-03-24', 'facility': 'M1', 'site': 'pgh', 'start_date': '2011-07-17'}, {'end_date': '2013-04-22', 'facility': 'M1', 'site': 'pvc', 'start_date': '2012-06-22'}, {'end_date': '2021-01-20', 'facility': 'M1', 'site': 'oli', 'start_date': '2014-09-01'}, {'end_date': '2023-12-10', 'facility': 'C1', 'site': 'nsa', 'start_date': '2006-02-20'}, {'end_date': '2014-07-06', 'facility': 'C1', 'site': 'twp', 'start_date': '2011-06-04'}, {'end_date': '2009-02-10', 'facility': 'C2', 'site': 'twp', 'start_date': '2006-02-01'}, {'end_date': '2014-12-31', 'facility': 'C3', 'site': 'twp', 'start_date': '2006-02-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2023-12-08'\n", + "date_end = '2023-12-10'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['mean_rad']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'mean_rad'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/AERINF/aerich2nf1turn.c1.ipynb b/VAPs/quicklook/AERINF/aerich2nf1turn.c1.ipynb new file mode 100644 index 00000000..2cf15c4d --- /dev/null +++ b/VAPs/quicklook/AERINF/aerich2nf1turn.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AERICH2NF1TURN.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/aerinf) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'aerich2nf1turn'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2019-04-30', 'facility': 'M1', 'site': 'cor', 'start_date': '2018-09-27'}, {'end_date': '2016-12-20', 'facility': 'M1', 'site': 'awr', 'start_date': '2015-12-01'}, {'end_date': '2020-05-20', 'facility': 'M1', 'site': 'anx', 'start_date': '2019-12-01'}, {'end_date': '2008-12-27', 'facility': 'M1', 'site': 'hfe', 'start_date': '2008-05-06'}, {'end_date': '2023-12-09', 'facility': 'M1', 'site': 'epc', 'start_date': '2023-01-15'}, {'end_date': '2007-12-31', 'facility': 'M1', 'site': 'fkb', 'start_date': '2007-03-28'}, {'end_date': '2022-10-01', 'facility': 'M1', 'site': 'hou', 'start_date': '2021-08-09'}, {'end_date': '2023-12-10', 'facility': 'C1', 'site': 'ena', 'start_date': '2016-07-21'}, {'end_date': '2015-10-10', 'facility': 'M1', 'site': 'mao', 'start_date': '2014-01-01'}, {'end_date': '2023-06-10', 'facility': 'M1', 'site': 'guc', 'start_date': '2021-09-01'}, {'end_date': '2011-01-05', 'facility': 'M1', 'site': 'grw', 'start_date': '2009-04-12'}, {'end_date': '2023-12-10', 'facility': 'C1', 'site': 'sgp', 'start_date': '2006-02-20'}, {'end_date': '2022-09-20', 'facility': 'E32', 'site': 'sgp', 'start_date': '2016-04-11'}, {'end_date': '2023-10-20', 'facility': 'E37', 'site': 'sgp', 'start_date': '2016-04-11'}, {'end_date': '2023-10-20', 'facility': 'E39', 'site': 'sgp', 'start_date': '2016-04-11'}, {'end_date': '2005-09-13', 'facility': 'M1', 'site': 'pye', 'start_date': '2005-03-23'}, {'end_date': '2020-09-10', 'facility': 'M1', 'site': 'mos', 'start_date': '2019-11-11'}, {'end_date': '2007-01-05', 'facility': 'M1', 'site': 'nim', 'start_date': '2006-01-07'}, {'end_date': '2012-03-24', 'facility': 'M1', 'site': 'pgh', 'start_date': '2011-07-11'}, {'end_date': '2013-04-22', 'facility': 'M1', 'site': 'pvc', 'start_date': '2012-06-22'}, {'end_date': '2021-01-20', 'facility': 'M1', 'site': 'oli', 'start_date': '2014-09-01'}, {'end_date': '2023-12-10', 'facility': 'C1', 'site': 'nsa', 'start_date': '2006-02-20'}, {'end_date': '2014-07-06', 'facility': 'C1', 'site': 'twp', 'start_date': '2011-06-04'}, {'end_date': '2009-02-10', 'facility': 'C2', 'site': 'twp', 'start_date': '2006-02-01'}, {'end_date': '2014-12-31', 'facility': 'C3', 'site': 'twp', 'start_date': '2006-02-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2023-12-08'\n", + "date_end = '2023-12-10'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['mean_rad']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'mean_rad'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/AERIOE/AERIOE_tutorial.ipynb b/VAPs/quicklook/AERIOE/AERIOE_tutorial.ipynb new file mode 100644 index 00000000..89687394 --- /dev/null +++ b/VAPs/quicklook/AERIOE/AERIOE_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AERIOE1TURN.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/aerioe) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using aerioe1turn as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `aerioe1turn.c1`, where `aerioe1turn` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `sgp` and facility `C1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/sgp/sgpaerioe1turnC1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"aerioe1turn\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"sgp\"\n", + "facility = \"C1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/AERIOE/aerioe1turn.c1.ipynb b/VAPs/quicklook/AERIOE/aerioe1turn.c1.ipynb new file mode 100644 index 00000000..45c47071 --- /dev/null +++ b/VAPs/quicklook/AERIOE/aerioe1turn.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AERIOE1TURN.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/aerioe) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'aerioe1turn'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2023-06-13', 'facility': 'C1', 'site': 'sgp', 'start_date': '2016-01-01'}, {'end_date': '2021-06-01', 'facility': 'E32', 'site': 'sgp', 'start_date': '2016-08-10'}, {'end_date': '2021-05-18', 'facility': 'E37', 'site': 'sgp', 'start_date': '2016-08-10'}, {'end_date': '2022-07-09', 'facility': 'E39', 'site': 'sgp', 'start_date': '2016-08-12'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2023-06-11'\n", + "date_end = '2023-06-13'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['temperature', 'waterVapor', 'lwp']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'temperature'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/AERIPROF/AERIPROF_tutorial.ipynb b/VAPs/quicklook/AERIPROF/AERIPROF_tutorial.ipynb new file mode 100644 index 00000000..5ffe92ed --- /dev/null +++ b/VAPs/quicklook/AERIPROF/AERIPROF_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AERI01PROF3FELTZ.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/aeriprof) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using aeri01prof3feltz as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `aeri01prof3feltz.c1`, where `aeri01prof3feltz` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `sgp` and facility `C1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/sgp/sgpaeri01prof3feltzC1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"aeri01prof3feltz\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"sgp\"\n", + "facility = \"C1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/AERIPROF/aeri01prof3feltz.c1.ipynb b/VAPs/quicklook/AERIPROF/aeri01prof3feltz.c1.ipynb new file mode 100644 index 00000000..4513e4a6 --- /dev/null +++ b/VAPs/quicklook/AERIPROF/aeri01prof3feltz.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AERI01PROF3FELTZ.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/aeriprof) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'aeri01prof3feltz'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2014-03-11', 'facility': 'C1', 'site': 'sgp', 'start_date': '2002-04-18'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2014-03-09'\n", + "date_end = '2014-03-11'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['pressure', 'temperature', 'waterVaporMixingRatio']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'pressure'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'pressure'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/AERIPROF/aeriprof3feltz.c1.ipynb b/VAPs/quicklook/AERIPROF/aeriprof3feltz.c1.ipynb new file mode 100644 index 00000000..d7b4348d --- /dev/null +++ b/VAPs/quicklook/AERIPROF/aeriprof3feltz.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AERIPROF3FELTZ.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/aeriprof) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'aeriprof3feltz'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2023-12-17', 'facility': 'C1', 'site': 'sgp', 'start_date': '2007-01-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2023-12-15'\n", + "date_end = '2023-12-17'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['pressure', 'temperature', 'waterVaporMixingRatio']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'pressure'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'pressure'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/AERIPROF/qmeaeriprof.c1.ipynb b/VAPs/quicklook/AERIPROF/qmeaeriprof.c1.ipynb new file mode 100644 index 00000000..4a3bc150 --- /dev/null +++ b/VAPs/quicklook/AERIPROF/qmeaeriprof.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# QMEAERIPROF.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/aeriprof) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'qmeaeriprof'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2004-01-26', 'facility': 'C1', 'site': 'sgp', 'start_date': '1996-06-14'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2004-01-26'\n", + "date_end = '2004-01-26'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['temperature_resid', 'dewpoint_resid', 'mixing_ratio_resid']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'temperature_resid'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/AEROSOLBE/AEROSOLBE_tutorial.ipynb b/VAPs/quicklook/AEROSOLBE/AEROSOLBE_tutorial.ipynb new file mode 100644 index 00000000..1dfbc5a5 --- /dev/null +++ b/VAPs/quicklook/AEROSOLBE/AEROSOLBE_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AEROSOLBE1TURN.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/aerosolbe) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using aerosolbe1turn as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `aerosolbe1turn.c1`, where `aerosolbe1turn` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `sgp` and facility `C1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/sgp/sgpaerosolbe1turnC1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"aerosolbe1turn\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"sgp\"\n", + "facility = \"C1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/AEROSOLBE/aerosolbe1turn.c1.ipynb b/VAPs/quicklook/AEROSOLBE/aerosolbe1turn.c1.ipynb new file mode 100644 index 00000000..cc96202b --- /dev/null +++ b/VAPs/quicklook/AEROSOLBE/aerosolbe1turn.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AEROSOLBE1TURN.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/aerosolbe) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'aerosolbe1turn'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2021-04-30', 'facility': 'C1', 'site': 'sgp', 'start_date': '2001-01-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2021-03-30'\n", + "date_end = '2021-04-01'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['be_aod_500', 'be_aod_355', 'be_angstrom_exponent']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'be_aod_500'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'be_aod_500'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/AIP/AIP_tutorial.ipynb b/VAPs/quicklook/AIP/AIP_tutorial.ipynb new file mode 100644 index 00000000..091e5bf8 --- /dev/null +++ b/VAPs/quicklook/AIP/AIP_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AIP1OGREN.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/aip) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using aip1ogren as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `aip1ogren.c1`, where `aip1ogren` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `hfe` and facility `M1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/hfe/hfeaip1ogrenM1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"aip1ogren\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"hfe\"\n", + "facility = \"M1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/AIP/aip1ogren.c1.ipynb b/VAPs/quicklook/AIP/aip1ogren.c1.ipynb new file mode 100644 index 00000000..7eebfb91 --- /dev/null +++ b/VAPs/quicklook/AIP/aip1ogren.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AIP1OGREN.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/aip) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'aip1ogren'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2008-12-27', 'facility': 'M1', 'site': 'hfe', 'start_date': '2008-05-09'}, {'end_date': '2007-12-30', 'facility': 'M1', 'site': 'fkb', 'start_date': '2007-03-19'}, {'end_date': '2015-11-30', 'facility': 'M1', 'site': 'mao', 'start_date': '2014-01-01'}, {'end_date': '2010-12-30', 'facility': 'M1', 'site': 'grw', 'start_date': '2009-04-14'}, {'end_date': '2017-03-30', 'facility': 'C1', 'site': 'sgp', 'start_date': '1996-07-02'}, {'end_date': '2005-09-14', 'facility': 'M1', 'site': 'pye', 'start_date': '2005-03-09'}, {'end_date': '2006-12-30', 'facility': 'M1', 'site': 'nim', 'start_date': '2005-11-19'}, {'end_date': '2012-03-26', 'facility': 'M1', 'site': 'pgh', 'start_date': '2011-06-10'}, {'end_date': '2013-06-23', 'facility': 'M1', 'site': 'pvc', 'start_date': '2012-07-16'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2017-03-28'\n", + "date_end = '2017-03-30'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['Ba_G_Dry_10um_PSAP1W_1', 'Ba_G_Dry_1um_PSAP1W_1', 'Ba_R_Dry_10um_PSAP3W_1']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'Ba_G_Dry_10um_PSAP1W_1'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'Ba_G_Dry_10um_PSAP1W_1'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/AIP/aipavg1ogren.c1.ipynb b/VAPs/quicklook/AIP/aipavg1ogren.c1.ipynb new file mode 100644 index 00000000..8796fa97 --- /dev/null +++ b/VAPs/quicklook/AIP/aipavg1ogren.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AIPAVG1OGREN.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/aip) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'aipavg1ogren'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2008-12-27', 'facility': 'M1', 'site': 'hfe', 'start_date': '2008-05-09'}, {'end_date': '2007-12-30', 'facility': 'M1', 'site': 'fkb', 'start_date': '2007-03-19'}, {'end_date': '2015-11-30', 'facility': 'M1', 'site': 'mao', 'start_date': '2014-01-01'}, {'end_date': '2010-12-30', 'facility': 'M1', 'site': 'grw', 'start_date': '2009-04-14'}, {'end_date': '2017-03-30', 'facility': 'C1', 'site': 'sgp', 'start_date': '1996-07-02'}, {'end_date': '2005-09-14', 'facility': 'M1', 'site': 'pye', 'start_date': '2005-03-09'}, {'end_date': '2006-12-30', 'facility': 'M1', 'site': 'nim', 'start_date': '2005-11-19'}, {'end_date': '2012-03-26', 'facility': 'M1', 'site': 'pgh', 'start_date': '2011-06-10'}, {'end_date': '2013-06-23', 'facility': 'M1', 'site': 'pvc', 'start_date': '2012-07-16'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2017-03-28'\n", + "date_end = '2017-03-30'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['Ba_G_Dry_1um_PSAP1W_1', 'Ba_G_Dry_10um_PSAP1W_1', 'Ba_R_Dry_10um_PSAP3W_1']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'Ba_G_Dry_1um_PSAP1W_1'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'Ba_G_Dry_1um_PSAP1W_1'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/AIP/aipfitrh1ogren.c1.ipynb b/VAPs/quicklook/AIP/aipfitrh1ogren.c1.ipynb new file mode 100644 index 00000000..cb8ac9cf --- /dev/null +++ b/VAPs/quicklook/AIP/aipfitrh1ogren.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AIPFITRH1OGREN.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/aip) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'aipfitrh1ogren'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2008-12-26', 'facility': 'M1', 'site': 'hfe', 'start_date': '2008-05-09'}, {'end_date': '2007-12-09', 'facility': 'M1', 'site': 'fkb', 'start_date': '2007-03-27'}, {'end_date': '2015-05-01', 'facility': 'M1', 'site': 'mao', 'start_date': '2014-08-20'}, {'end_date': '2010-10-24', 'facility': 'M1', 'site': 'grw', 'start_date': '2009-04-16'}, {'end_date': '2016-09-29', 'facility': 'C1', 'site': 'sgp', 'start_date': '1998-12-19'}, {'end_date': '2005-09-13', 'facility': 'M1', 'site': 'pye', 'start_date': '2005-07-01'}, {'end_date': '2006-12-29', 'facility': 'M1', 'site': 'nim', 'start_date': '2005-12-04'}, {'end_date': '2011-12-31', 'facility': 'M1', 'site': 'pgh', 'start_date': '2011-06-10'}, {'end_date': '2013-06-22', 'facility': 'M1', 'site': 'pvc', 'start_date': '2012-09-29'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2016-09-27'\n", + "date_end = '2016-09-29'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['ratio_85by40_Bs_R_10um_3p', 'fRH_Bs_R_10um_3p', 'ratio_85by40_Bs_R_10um_2p']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'ratio_85by40_Bs_R_10um_3p'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'ratio_85by40_Bs_R_10um_3p'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/AOD-MFRSR/AOD-MFRSR_tutorial.ipynb b/VAPs/quicklook/AOD-MFRSR/AOD-MFRSR_tutorial.ipynb new file mode 100644 index 00000000..6b61ddb7 --- /dev/null +++ b/VAPs/quicklook/AOD-MFRSR/AOD-MFRSR_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# MFRSR7NCHAOD1MICH.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/aod-mfrsr) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using mfrsr7nchaod1mich as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `mfrsr7nchaod1mich.c1`, where `mfrsr7nchaod1mich` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `oli` and facility `M1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/oli/olimfrsr7nchaod1michM1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"mfrsr7nchaod1mich\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"oli\"\n", + "facility = \"M1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/AOD-MFRSR/mfrsr7nchaod1mich.c1.ipynb b/VAPs/quicklook/AOD-MFRSR/mfrsr7nchaod1mich.c1.ipynb new file mode 100644 index 00000000..a7546b36 --- /dev/null +++ b/VAPs/quicklook/AOD-MFRSR/mfrsr7nchaod1mich.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# MFRSR7NCHAOD1MICH.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/aod-mfrsr) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'mfrsr7nchaod1mich'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2021-06-14', 'facility': 'M1', 'site': 'oli', 'start_date': '2021-04-03'}, {'end_date': '2022-10-02', 'facility': 'M1', 'site': 'hou', 'start_date': '2021-08-11'}, {'end_date': '2022-10-19', 'facility': 'C1', 'site': 'sgp', 'start_date': '2021-05-10'}, {'end_date': '2022-09-26', 'facility': 'E11', 'site': 'sgp', 'start_date': '2020-12-16'}, {'end_date': '2022-09-26', 'facility': 'E12', 'site': 'sgp', 'start_date': '2020-12-14'}, {'end_date': '2023-12-05', 'facility': 'E13', 'site': 'sgp', 'start_date': '2021-01-12'}, {'end_date': '2022-09-26', 'facility': 'E15', 'site': 'sgp', 'start_date': '2020-12-04'}, {'end_date': '2021-09-21', 'facility': 'E31', 'site': 'sgp', 'start_date': '2020-12-10'}, {'end_date': '2022-09-26', 'facility': 'E32', 'site': 'sgp', 'start_date': '2020-11-19'}, {'end_date': '2022-09-26', 'facility': 'E33', 'site': 'sgp', 'start_date': '2020-11-25'}, {'end_date': '2022-09-26', 'facility': 'E34', 'site': 'sgp', 'start_date': '2020-12-17'}, {'end_date': '2022-09-26', 'facility': 'E35', 'site': 'sgp', 'start_date': '2020-12-19'}, {'end_date': '2022-09-26', 'facility': 'E36', 'site': 'sgp', 'start_date': '2020-12-08'}, {'end_date': '2022-09-26', 'facility': 'E37', 'site': 'sgp', 'start_date': '2020-11-30'}, {'end_date': '2021-06-07', 'facility': 'E38', 'site': 'sgp', 'start_date': '2017-11-15'}, {'end_date': '2022-09-26', 'facility': 'E39', 'site': 'sgp', 'start_date': '2020-12-04'}, {'end_date': '2022-09-26', 'facility': 'E40', 'site': 'sgp', 'start_date': '2020-12-17'}, {'end_date': '2021-12-09', 'facility': 'E41', 'site': 'sgp', 'start_date': '2020-12-05'}, {'end_date': '2022-09-26', 'facility': 'E9', 'site': 'sgp', 'start_date': '2021-02-13'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2022-10-19'\n", + "date_end = '2022-10-19'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['hemisp_narrowband_filter1', 'hemisp_narrowband_filter2', 'hemisp_narrowband_filter3']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'hemisp_narrowband_filter1'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'hemisp_narrowband_filter1'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/AOD-MFRSR/mfrsr7nchcal.c1.ipynb b/VAPs/quicklook/AOD-MFRSR/mfrsr7nchcal.c1.ipynb new file mode 100644 index 00000000..66f7fd27 --- /dev/null +++ b/VAPs/quicklook/AOD-MFRSR/mfrsr7nchcal.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# MFRSR7NCHCAL.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/aod-mfrsr) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'mfrsr7nchcal'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2021-06-14', 'facility': 'M1', 'site': 'oli', 'start_date': '2021-04-03'}, {'end_date': '2022-10-02', 'facility': 'M1', 'site': 'hou', 'start_date': '2021-08-11'}, {'end_date': '2022-10-19', 'facility': 'C1', 'site': 'sgp', 'start_date': '2021-05-10'}, {'end_date': '2022-09-26', 'facility': 'E11', 'site': 'sgp', 'start_date': '2020-12-16'}, {'end_date': '2022-09-26', 'facility': 'E12', 'site': 'sgp', 'start_date': '2020-12-14'}, {'end_date': '2023-12-05', 'facility': 'E13', 'site': 'sgp', 'start_date': '2021-01-12'}, {'end_date': '2022-09-26', 'facility': 'E15', 'site': 'sgp', 'start_date': '2020-12-04'}, {'end_date': '2021-09-21', 'facility': 'E31', 'site': 'sgp', 'start_date': '2020-12-10'}, {'end_date': '2022-09-26', 'facility': 'E32', 'site': 'sgp', 'start_date': '2020-11-19'}, {'end_date': '2022-09-26', 'facility': 'E33', 'site': 'sgp', 'start_date': '2020-11-25'}, {'end_date': '2022-09-26', 'facility': 'E34', 'site': 'sgp', 'start_date': '2020-12-17'}, {'end_date': '2022-09-26', 'facility': 'E35', 'site': 'sgp', 'start_date': '2020-12-19'}, {'end_date': '2022-09-26', 'facility': 'E36', 'site': 'sgp', 'start_date': '2020-12-08'}, {'end_date': '2022-09-26', 'facility': 'E37', 'site': 'sgp', 'start_date': '2020-11-30'}, {'end_date': '2021-06-07', 'facility': 'E38', 'site': 'sgp', 'start_date': '2017-11-15'}, {'end_date': '2022-09-26', 'facility': 'E39', 'site': 'sgp', 'start_date': '2020-12-04'}, {'end_date': '2022-09-26', 'facility': 'E40', 'site': 'sgp', 'start_date': '2020-12-17'}, {'end_date': '2021-12-09', 'facility': 'E41', 'site': 'sgp', 'start_date': '2020-12-05'}, {'end_date': '2022-09-26', 'facility': 'E9', 'site': 'sgp', 'start_date': '2021-02-13'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2022-10-19'\n", + "date_end = '2022-10-19'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['Ozone_column_amount']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'Io_filter1'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'Ozone_column_amount'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/AOD-MFRSR/mfrsraod1mich.c1.ipynb b/VAPs/quicklook/AOD-MFRSR/mfrsraod1mich.c1.ipynb new file mode 100644 index 00000000..1ab57070 --- /dev/null +++ b/VAPs/quicklook/AOD-MFRSR/mfrsraod1mich.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# MFRSRAOD1MICH.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/aod-mfrsr) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'mfrsraod1mich'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2017-08-09', 'facility': 'M1', 'site': 'asi', 'start_date': '2016-05-02'}, {'end_date': '2021-03-30', 'facility': 'M1', 'site': 'oli', 'start_date': '2014-04-20'}, {'end_date': '2019-04-30', 'facility': 'M1', 'site': 'cor', 'start_date': '2018-10-01'}, {'end_date': '2020-01-25', 'facility': 'C1', 'site': 'ena', 'start_date': '2016-01-01'}, {'end_date': '2007-12-29', 'facility': 'M1', 'site': 'fkb', 'start_date': '2007-04-19'}, {'end_date': '2015-10-24', 'facility': 'M1', 'site': 'mao', 'start_date': '2014-01-01'}, {'end_date': '2018-03-13', 'facility': 'S1', 'site': 'mcq', 'start_date': '2016-04-01'}, {'end_date': '2008-12-26', 'facility': 'M1', 'site': 'hfe', 'start_date': '2008-05-30'}, {'end_date': '2012-02-06', 'facility': 'M1', 'site': 'gan', 'start_date': '2011-09-24'}, {'end_date': '2011-01-03', 'facility': 'M1', 'site': 'grw', 'start_date': '2009-05-05'}, {'end_date': '2013-04-04', 'facility': 'M1', 'site': 'pvc', 'start_date': '2012-07-09'}, {'end_date': '2005-09-04', 'facility': 'M1', 'site': 'pye', 'start_date': '2005-02-20'}, {'end_date': '2011-04-26', 'facility': 'M1', 'site': 'sbs', 'start_date': '2011-01-26'}, {'end_date': '2012-03-27', 'facility': 'M1', 'site': 'pgh', 'start_date': '2011-06-25'}, {'end_date': '2020-08-03', 'facility': 'C1', 'site': 'nsa', 'start_date': '1998-04-02'}, {'end_date': '2010-11-13', 'facility': 'C2', 'site': 'nsa', 'start_date': '1999-09-05'}, {'end_date': '2021-04-30', 'facility': 'C1', 'site': 'sgp', 'start_date': '1997-01-16'}, {'end_date': '2011-10-19', 'facility': 'E10', 'site': 'sgp', 'start_date': '1997-10-31'}, {'end_date': '2020-12-08', 'facility': 'E11', 'site': 'sgp', 'start_date': '1997-09-01'}, {'end_date': '2020-12-08', 'facility': 'E12', 'site': 'sgp', 'start_date': '1997-11-10'}, {'end_date': '2021-01-08', 'facility': 'E13', 'site': 'sgp', 'start_date': '1997-09-10'}, {'end_date': '2020-11-30', 'facility': 'E15', 'site': 'sgp', 'start_date': '1997-09-10'}, {'end_date': '2011-11-15', 'facility': 'E16', 'site': 'sgp', 'start_date': '1997-08-21'}, {'end_date': '2009-11-17', 'facility': 'E18', 'site': 'sgp', 'start_date': '1997-10-17'}, {'end_date': '2011-04-27', 'facility': 'E19', 'site': 'sgp', 'start_date': '1998-07-27'}, {'end_date': '2009-10-14', 'facility': 'E1', 'site': 'sgp', 'start_date': '1997-12-22'}, {'end_date': '2011-11-17', 'facility': 'E20', 'site': 'sgp', 'start_date': '1997-12-26'}, {'end_date': '2009-12-01', 'facility': 'E22', 'site': 'sgp', 'start_date': '1997-11-28'}, {'end_date': '2009-11-14', 'facility': 'E24', 'site': 'sgp', 'start_date': '1997-11-28'}, {'end_date': '2002-04-08', 'facility': 'E25', 'site': 'sgp', 'start_date': '1997-10-22'}, {'end_date': '2009-12-04', 'facility': 'E27', 'site': 'sgp', 'start_date': '2004-01-22'}, {'end_date': '2009-10-20', 'facility': 'E2', 'site': 'sgp', 'start_date': '1997-11-25'}, {'end_date': '2020-12-07', 'facility': 'E31', 'site': 'sgp', 'start_date': '2011-10-13'}, {'end_date': '2020-11-17', 'facility': 'E32', 'site': 'sgp', 'start_date': '2011-12-07'}, {'end_date': '2020-11-23', 'facility': 'E33', 'site': 'sgp', 'start_date': '2011-09-27'}, {'end_date': '2020-12-08', 'facility': 'E34', 'site': 'sgp', 'start_date': '2011-09-28'}, {'end_date': '2020-12-14', 'facility': 'E35', 'site': 'sgp', 'start_date': '2011-09-28'}, {'end_date': '2020-12-03', 'facility': 'E36', 'site': 'sgp', 'start_date': '2011-10-18'}, {'end_date': '2020-11-24', 'facility': 'E37', 'site': 'sgp', 'start_date': '2011-11-03'}, {'end_date': '2017-09-29', 'facility': 'E38', 'site': 'sgp', 'start_date': '2011-11-01'}, {'end_date': '2009-10-28', 'facility': 'E3', 'site': 'sgp', 'start_date': '1997-11-11'}, {'end_date': '2020-12-14', 'facility': 'E40', 'site': 'sgp', 'start_date': '2015-11-13'}, {'end_date': '2011-09-26', 'facility': 'E4', 'site': 'sgp', 'start_date': '1997-12-18'}, {'end_date': '2009-10-14', 'facility': 'E5', 'site': 'sgp', 'start_date': '1997-11-10'}, {'end_date': '2011-09-30', 'facility': 'E6', 'site': 'sgp', 'start_date': '1997-11-10'}, {'end_date': '2011-11-14', 'facility': 'E7', 'site': 'sgp', 'start_date': '1997-11-01'}, {'end_date': '2009-11-10', 'facility': 'E8', 'site': 'sgp', 'start_date': '1997-09-04'}, {'end_date': '2020-11-16', 'facility': 'E9', 'site': 'sgp', 'start_date': '1998-03-01'}, {'end_date': '2014-09-15', 'facility': 'M1', 'site': 'tmp', 'start_date': '2014-02-01'}, {'end_date': '2014-06-25', 'facility': 'C1', 'site': 'twp', 'start_date': '1997-08-28'}, {'end_date': '2013-09-09', 'facility': 'C2', 'site': 'twp', 'start_date': '1998-11-10'}, {'end_date': '2015-01-06', 'facility': 'C3', 'site': 'twp', 'start_date': '2002-03-22'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2021-04-30'\n", + "date_end = '2021-04-30'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['hemisp_narrowband_filter1', 'hemisp_narrowband_filter2', 'hemisp_narrowband_filter3']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'hemisp_broadband'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'hemisp_narrowband_filter1'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/AOD-MFRSR/mfrsrcal.c1.ipynb b/VAPs/quicklook/AOD-MFRSR/mfrsrcal.c1.ipynb new file mode 100644 index 00000000..2e35fc3a --- /dev/null +++ b/VAPs/quicklook/AOD-MFRSR/mfrsrcal.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# MFRSRCAL.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/aod-mfrsr) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'mfrsrcal'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2017-08-09', 'facility': 'M1', 'site': 'asi', 'start_date': '2016-05-02'}, {'end_date': '2021-03-30', 'facility': 'M1', 'site': 'oli', 'start_date': '2016-09-12'}, {'end_date': '2020-01-25', 'facility': 'C1', 'site': 'ena', 'start_date': '2016-01-01'}, {'end_date': '2018-03-13', 'facility': 'S1', 'site': 'mcq', 'start_date': '2016-04-01'}, {'end_date': '2019-04-30', 'facility': 'M1', 'site': 'cor', 'start_date': '2018-10-01'}, {'end_date': '2020-08-03', 'facility': 'C1', 'site': 'nsa', 'start_date': '2017-06-18'}, {'end_date': '2021-04-30', 'facility': 'C1', 'site': 'sgp', 'start_date': '2014-09-24'}, {'end_date': '2011-10-19', 'facility': 'E10', 'site': 'sgp', 'start_date': '1997-10-31'}, {'end_date': '2020-12-08', 'facility': 'E11', 'site': 'sgp', 'start_date': '1997-09-01'}, {'end_date': '2020-12-08', 'facility': 'E12', 'site': 'sgp', 'start_date': '1997-11-10'}, {'end_date': '2021-01-08', 'facility': 'E13', 'site': 'sgp', 'start_date': '2017-12-14'}, {'end_date': '2020-11-30', 'facility': 'E15', 'site': 'sgp', 'start_date': '1997-09-10'}, {'end_date': '2011-11-15', 'facility': 'E16', 'site': 'sgp', 'start_date': '1997-08-21'}, {'end_date': '2009-11-17', 'facility': 'E18', 'site': 'sgp', 'start_date': '1997-10-17'}, {'end_date': '2009-10-14', 'facility': 'E1', 'site': 'sgp', 'start_date': '1997-12-22'}, {'end_date': '2011-11-17', 'facility': 'E20', 'site': 'sgp', 'start_date': '1997-12-26'}, {'end_date': '2009-12-01', 'facility': 'E22', 'site': 'sgp', 'start_date': '1997-11-28'}, {'end_date': '2009-11-14', 'facility': 'E24', 'site': 'sgp', 'start_date': '1997-11-28'}, {'end_date': '2002-04-08', 'facility': 'E25', 'site': 'sgp', 'start_date': '1997-10-22'}, {'end_date': '2009-12-04', 'facility': 'E27', 'site': 'sgp', 'start_date': '2004-01-22'}, {'end_date': '2009-10-20', 'facility': 'E2', 'site': 'sgp', 'start_date': '1997-11-25'}, {'end_date': '2020-12-07', 'facility': 'E31', 'site': 'sgp', 'start_date': '2011-10-13'}, {'end_date': '2020-11-17', 'facility': 'E32', 'site': 'sgp', 'start_date': '2011-12-07'}, {'end_date': '2020-11-23', 'facility': 'E33', 'site': 'sgp', 'start_date': '2011-09-27'}, {'end_date': '2020-12-08', 'facility': 'E34', 'site': 'sgp', 'start_date': '2011-09-28'}, {'end_date': '2020-12-14', 'facility': 'E35', 'site': 'sgp', 'start_date': '2011-09-28'}, {'end_date': '2020-04-21', 'facility': 'E36', 'site': 'sgp', 'start_date': '2020-04-21'}, {'end_date': '2020-11-24', 'facility': 'E37', 'site': 'sgp', 'start_date': '2011-11-03'}, {'end_date': '2017-10-30', 'facility': 'E38', 'site': 'sgp', 'start_date': '2011-11-01'}, {'end_date': '2009-10-28', 'facility': 'E3', 'site': 'sgp', 'start_date': '1997-11-11'}, {'end_date': '2020-12-14', 'facility': 'E40', 'site': 'sgp', 'start_date': '2015-11-13'}, {'end_date': '2011-09-26', 'facility': 'E4', 'site': 'sgp', 'start_date': '1997-12-18'}, {'end_date': '2009-10-14', 'facility': 'E5', 'site': 'sgp', 'start_date': '1997-11-10'}, {'end_date': '2011-09-30', 'facility': 'E6', 'site': 'sgp', 'start_date': '1997-11-10'}, {'end_date': '2011-11-14', 'facility': 'E7', 'site': 'sgp', 'start_date': '1997-11-01'}, {'end_date': '2009-11-10', 'facility': 'E8', 'site': 'sgp', 'start_date': '1997-09-04'}, {'end_date': '2020-11-16', 'facility': 'E9', 'site': 'sgp', 'start_date': '2017-12-15'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2021-04-30'\n", + "date_end = '2021-04-30'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['Ozone_column_amount']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'Io_filter1'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'Ozone_column_amount'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/AOD-NIMFR/AOD-NIMFR_tutorial.ipynb b/VAPs/quicklook/AOD-NIMFR/AOD-NIMFR_tutorial.ipynb new file mode 100644 index 00000000..562b3cc5 --- /dev/null +++ b/VAPs/quicklook/AOD-NIMFR/AOD-NIMFR_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# NIMFRAOD1MICH.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/aod-nimfr) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using nimfraod1mich as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `nimfraod1mich.c1`, where `nimfraod1mich` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `nsa` and facility `C1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/nsa/nsanimfraod1michC1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"nimfraod1mich\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"nsa\"\n", + "facility = \"C1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/AOD-NIMFR/nimfraod1mich.c1.ipynb b/VAPs/quicklook/AOD-NIMFR/nimfraod1mich.c1.ipynb new file mode 100644 index 00000000..5753a21c --- /dev/null +++ b/VAPs/quicklook/AOD-NIMFR/nimfraod1mich.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# NIMFRAOD1MICH.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/aod-nimfr) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'nimfraod1mich'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2017-07-27', 'facility': 'C1', 'site': 'nsa', 'start_date': '1998-04-28'}, {'end_date': '2010-11-12', 'facility': 'C2', 'site': 'nsa', 'start_date': '2000-04-28'}, {'end_date': '2017-12-14', 'facility': 'C1', 'site': 'sgp', 'start_date': '2000-05-03'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2017-12-14'\n", + "date_end = '2017-12-14'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['direct_normal_narrowband_filter1', 'direct_normal_narrowband_filter2', 'direct_normal_narrowband_filter3']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'direct_normal_broadband'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'direct_normal_narrowband_filter1'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/AOD/AOD_tutorial.ipynb b/VAPs/quicklook/AOD/AOD_tutorial.ipynb new file mode 100644 index 00000000..14938a88 --- /dev/null +++ b/VAPs/quicklook/AOD/AOD_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# SASHENIRAOD.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/aod) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using sasheniraod as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `sasheniraod.c1`, where `sasheniraod` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `hou` and facility `M1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/hou/housasheniraodM1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"sasheniraod\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"hou\"\n", + "facility = \"M1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/AOD/sasheniraod.c1.ipynb b/VAPs/quicklook/AOD/sasheniraod.c1.ipynb new file mode 100644 index 00000000..7ab77627 --- /dev/null +++ b/VAPs/quicklook/AOD/sasheniraod.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# SASHENIRAOD.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/aod) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'sasheniraod'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2022-10-01', 'facility': 'M1', 'site': 'hou', 'start_date': '2021-09-21'}, {'end_date': '2015-12-01', 'facility': 'M1', 'site': 'mao', 'start_date': '2014-01-01'}, {'end_date': '2013-06-21', 'facility': 'M1', 'site': 'pvc', 'start_date': '2012-06-27'}, {'end_date': '2019-07-27', 'facility': 'C1', 'site': 'sgp', 'start_date': '2011-03-22'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2019-07-26'\n", + "date_end = '2019-07-26'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['diffuse_transmittance', 'direct_normal_transmittance', 'aerosol_optical_depth']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'aerosol_optical_depth'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'diffuse_transmittance'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/AOD/sashevisaod.c1.ipynb b/VAPs/quicklook/AOD/sashevisaod.c1.ipynb new file mode 100644 index 00000000..62f677dd --- /dev/null +++ b/VAPs/quicklook/AOD/sashevisaod.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# SASHEVISAOD.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/aod) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'sashevisaod'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2022-10-01', 'facility': 'M1', 'site': 'hou', 'start_date': '2021-09-21'}, {'end_date': '2015-12-01', 'facility': 'M1', 'site': 'mao', 'start_date': '2014-01-01'}, {'end_date': '2013-06-21', 'facility': 'M1', 'site': 'pvc', 'start_date': '2012-06-27'}, {'end_date': '2019-07-27', 'facility': 'C1', 'site': 'sgp', 'start_date': '2011-03-22'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2019-07-26'\n", + "date_end = '2019-07-26'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['diffuse_transmittance', 'direct_normal_transmittance', 'aerosol_optical_depth']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'aerosol_optical_depth'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'diffuse_transmittance'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/AOP/.ipynb_checkpoints/aoppsap1flynn1m.c1-checkpoint.ipynb b/VAPs/quicklook/AOP/.ipynb_checkpoints/aoppsap1flynn1m.c1-checkpoint.ipynb new file mode 100644 index 00000000..927b1391 --- /dev/null +++ b/VAPs/quicklook/AOP/.ipynb_checkpoints/aoppsap1flynn1m.c1-checkpoint.ipynb @@ -0,0 +1,8265 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AOPPSAP1FLYNN1M.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/aop) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'aoppsap1flynn1m'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2017-11-01', 'facility': 'M1', 'site': 'asi', 'start_date': '2016-04-23'}, {'end_date': '2020-06-01', 'facility': 'M1', 'site': 'anx', 'start_date': '2019-12-01'}, {'end_date': '2021-10-14', 'facility': 'M1', 'site': 'guc', 'start_date': '2021-09-02'}, {'end_date': '2023-06-15', 'facility': 'S2', 'site': 'guc', 'start_date': '2021-10-27'}, {'end_date': '2021-06-14', 'facility': 'M1', 'site': 'oli', 'start_date': '2016-08-06'}, {'end_date': '2019-04-30', 'facility': 'M1', 'site': 'cor', 'start_date': '2018-09-23'}, {'end_date': '2015-12-01', 'facility': 'S1', 'site': 'mao', 'start_date': '2014-02-06'}, {'end_date': '2018-01-11', 'facility': 'M1', 'site': 'mar', 'start_date': '2017-10-29'}, {'end_date': '2020-10-01', 'facility': 'M1', 'site': 'mos', 'start_date': '2019-10-11'}, {'end_date': '2023-12-05', 'facility': 'M1', 'site': 'epc', 'start_date': '2023-01-15'}, {'end_date': '2022-09-30', 'facility': 'M1', 'site': 'hou', 'start_date': '2021-09-08'}, {'end_date': '2023-12-11', 'facility': 'C1', 'site': 'ena', 'start_date': '2013-10-09'}, {'end_date': '2017-09-29', 'facility': 'C1', 'site': 'sgp', 'start_date': '2015-10-01'}, {'end_date': '2023-12-12', 'facility': 'E13', 'site': 'sgp', 'start_date': '2016-11-15'}]" + ] + }, + { + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ac6764f5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The following locations and date ranges are available for this VAP:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sitefacilitystart_dateend_date
0asiM12016-04-232017-11-01
1anxM12019-12-012020-06-01
2gucM12021-09-022021-10-14
3gucS22021-10-272023-06-15
4oliM12016-08-062021-06-14
5corM12018-09-232019-04-30
6maoS12014-02-062015-12-01
7marM12017-10-292018-01-11
8mosM12019-10-112020-10-01
9epcM12023-01-152023-12-05
10houM12021-09-082022-09-30
11enaC12013-10-092023-12-11
12sgpC12015-10-012017-09-29
13sgpE132016-11-152023-12-12
\n", + "
" + ], + "text/plain": [ + " site facility start_date end_date\n", + "0 asi M1 2016-04-23 2017-11-01\n", + "1 anx M1 2019-12-01 2020-06-01\n", + "2 guc M1 2021-09-02 2021-10-14\n", + "3 guc S2 2021-10-27 2023-06-15\n", + "4 oli M1 2016-08-06 2021-06-14\n", + "5 cor M1 2018-09-23 2019-04-30\n", + "6 mao S1 2014-02-06 2015-12-01\n", + "7 mar M1 2017-10-29 2018-01-11\n", + "8 mos M1 2019-10-11 2020-10-01\n", + "9 epc M1 2023-01-15 2023-12-05\n", + "10 hou M1 2021-09-08 2022-09-30\n", + "11 ena C1 2013-10-09 2023-12-11\n", + "12 sgp C1 2015-10-01 2017-09-29\n", + "13 sgp E13 2016-11-15 2023-12-12" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2017-09-27'\n", + "date_end = '2017-09-29'" + ] + }, + { + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/data/archive/sgp/sgpaoppsap1flynn1mC1.c1'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['20170927', '20170928', '20170929']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "51feea2e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['/data/archive/sgp/sgpaoppsap1flynn1mC1.c1/sgpaoppsap1flynn1mC1.c1.20170927.000030.nc',\n", + " '/data/archive/sgp/sgpaoppsap1flynn1mC1.c1/sgpaoppsap1flynn1mC1.c1.20170928.000030.nc',\n", + " '/data/archive/sgp/sgpaoppsap1flynn1mC1.c1/sgpaoppsap1flynn1mC1.c1.20170929.000030.nc']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3 files loaded\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:                         (time: 4320, bound: 2)\n",
+       "Coordinates:\n",
+       "  * time                            (time) datetime64[ns] 2017-09-27T00:00:30...\n",
+       "Dimensions without coordinates: bound\n",
+       "Data variables: (12/132)\n",
+       "    base_time                       (time) datetime64[ns] 2017-09-27 ... 2017...\n",
+       "    time_offset                     (time) datetime64[ns] 2017-09-27T00:00:30...\n",
+       "    time_bounds                     (time, bound) object dask.array<chunksize=(1440, 2), meta=np.ndarray>\n",
+       "    impactor_state                  (time) int32 10 10 10 ... -9999 -9999 -9999\n",
+       "    Bs_B                            (time) float32 dask.array<chunksize=(1440,), meta=np.ndarray>\n",
+       "    qc_Bs_B                         (time) int32 dask.array<chunksize=(1440,), meta=np.ndarray>\n",
+       "    ...                              ...\n",
+       "    K1_B                            (time) float32 dask.array<chunksize=(1440,), meta=np.ndarray>\n",
+       "    K1_G                            (time) float32 dask.array<chunksize=(1440,), meta=np.ndarray>\n",
+       "    K1_R                            (time) float32 dask.array<chunksize=(1440,), meta=np.ndarray>\n",
+       "    lat                             (time) float32 36.6 36.6 36.6 ... 36.6 36.6\n",
+       "    lon                             (time) float32 -97.49 -97.49 ... -97.49\n",
+       "    alt                             (time) float32 318.0 318.0 ... 318.0 318.0\n",
+       "Attributes: (12/20)\n",
+       "    command_line:                    aosaop -n aosaoppsap -s sgp -f C1 -D -b ...\n",
+       "    Conventions:                     ARM-1.2\n",
+       "    process_version:                 vap-aosaop-1.2-0.el6\n",
+       "    dod_version:                     aoppsap1flynn1m-c1-1.2\n",
+       "    input_datastreams:               sgpaosnephdry1mC1.b1 : 1.0 : 20170927.00...\n",
+       "    site_id:                         sgp\n",
+       "    ...                              ...\n",
+       "    doi:                             10.5439/1369240\n",
+       "    history:                         created by user dsmgr on machine ruby at...\n",
+       "    _file_dates:                     ['20170927', '20170928', '20170929']\n",
+       "    _file_times:                     ['000030', '000030', '000030']\n",
+       "    _datastream:                     sgpaoppsap1flynn1mC1.c1\n",
+       "    _arm_standards_flag:             1
" + ], + "text/plain": [ + "\n", + "Dimensions: (time: 4320, bound: 2)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 2017-09-27T00:00:30...\n", + "Dimensions without coordinates: bound\n", + "Data variables: (12/132)\n", + " base_time (time) datetime64[ns] 2017-09-27 ... 2017...\n", + " time_offset (time) datetime64[ns] 2017-09-27T00:00:30...\n", + " time_bounds (time, bound) object dask.array\n", + " impactor_state (time) int32 10 10 10 ... -9999 -9999 -9999\n", + " Bs_B (time) float32 dask.array\n", + " qc_Bs_B (time) int32 dask.array\n", + " ... ...\n", + " K1_B (time) float32 dask.array\n", + " K1_G (time) float32 dask.array\n", + " K1_R (time) float32 dask.array\n", + " lat (time) float32 36.6 36.6 36.6 ... 36.6 36.6\n", + " lon (time) float32 -97.49 -97.49 ... -97.49\n", + " alt (time) float32 318.0 318.0 ... 318.0 318.0\n", + "Attributes: (12/20)\n", + " command_line: aosaop -n aosaoppsap -s sgp -f C1 -D -b ...\n", + " Conventions: ARM-1.2\n", + " process_version: vap-aosaop-1.2-0.el6\n", + " dod_version: aoppsap1flynn1m-c1-1.2\n", + " input_datastreams: sgpaosnephdry1mC1.b1 : 1.0 : 20170927.00...\n", + " site_id: sgp\n", + " ... ...\n", + " doi: 10.5439/1369240\n", + " history: created by user dsmgr on machine ruby at...\n", + " _file_dates: ['20170927', '20170928', '20170929']\n", + " _file_times: ['000030', '000030', '000030']\n", + " _datastream: sgpaoppsap1flynn1mC1.c1\n", + " _arm_standards_flag: 1" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['Bs_B', 'Bs_G', 'Bs_R']" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "49f2c7463c164a46aa13bd6286315713", + "version_major": 2, + "version_minor": 0 + }, + "image/png": "", + "text/html": [ + "\n", + "
\n", + "
\n", + " Figure\n", + "
\n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'Bs_B'" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/kefeimo/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/utils/datetime_utils.py:136: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning.\n", + " mode = stats.mode(np.diff(time))\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "1b8f0eadc61146b1b9944c7cadc77104", + "version_major": 2, + "version_minor": 0 + }, + "image/png": "", + "text/html": [ + "\n", + "
\n", + "
\n", + " Figure\n", + "
\n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "3d3082e5808440dc89d3713bcdf41d09", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "AppLayout(children=(Dropdown(description='Field:', index=1, layout=Layout(grid_area='header', margin='0px 30% …" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'Bs_B'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3c0aba93", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/AOP/AOP_tutorial.ipynb b/VAPs/quicklook/AOP/AOP_tutorial.ipynb new file mode 100644 index 00000000..1a07cebb --- /dev/null +++ b/VAPs/quicklook/AOP/AOP_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AOPCLAP1FLYNN1M.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/aop) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using aopclap1flynn1m as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `aopclap1flynn1m.c1`, where `aopclap1flynn1m` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `sgp` and facility `C1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/sgp/sgpaopclap1flynn1mC1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"aopclap1flynn1m\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"sgp\"\n", + "facility = \"C1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/AOP/aopclap1flynn1m.c1.ipynb b/VAPs/quicklook/AOP/aopclap1flynn1m.c1.ipynb new file mode 100644 index 00000000..9c99a80e --- /dev/null +++ b/VAPs/quicklook/AOP/aopclap1flynn1m.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AOPCLAP1FLYNN1M.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/aop) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'aopclap1flynn1m'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2017-09-29', 'facility': 'C1', 'site': 'sgp', 'start_date': '2016-10-31'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2017-09-27'\n", + "date_end = '2017-09-29'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['Bs_B', 'Bs_G', 'Bs_R']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'Bs_B'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'Bs_B'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/AOP/aoppsap1flynn1h.c1.ipynb b/VAPs/quicklook/AOP/aoppsap1flynn1h.c1.ipynb new file mode 100644 index 00000000..8fa832c9 --- /dev/null +++ b/VAPs/quicklook/AOP/aoppsap1flynn1h.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AOPPSAP1FLYNN1H.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/aop) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'aoppsap1flynn1h'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2019-04-30', 'facility': 'M1', 'site': 'cor', 'start_date': '2018-09-23'}, {'end_date': '2017-11-01', 'facility': 'M1', 'site': 'asi', 'start_date': '2016-04-23'}, {'end_date': '2020-06-01', 'facility': 'M1', 'site': 'anx', 'start_date': '2019-12-01'}, {'end_date': '2023-12-11', 'facility': 'M1', 'site': 'epc', 'start_date': '2023-01-15'}, {'end_date': '2022-09-29', 'facility': 'M1', 'site': 'hou', 'start_date': '2021-09-08'}, {'end_date': '2023-12-15', 'facility': 'C1', 'site': 'ena', 'start_date': '2013-10-09'}, {'end_date': '2015-12-01', 'facility': 'S1', 'site': 'mao', 'start_date': '2014-02-06'}, {'end_date': '2021-10-13', 'facility': 'M1', 'site': 'guc', 'start_date': '2021-09-02'}, {'end_date': '2023-06-14', 'facility': 'S2', 'site': 'guc', 'start_date': '2021-10-27'}, {'end_date': '2020-10-01', 'facility': 'M1', 'site': 'mos', 'start_date': '2019-10-11'}, {'end_date': '2021-06-13', 'facility': 'M1', 'site': 'oli', 'start_date': '2016-08-06'}, {'end_date': '2017-09-28', 'facility': 'C1', 'site': 'sgp', 'start_date': '2015-10-01'}, {'end_date': '2023-12-17', 'facility': 'E13', 'site': 'sgp', 'start_date': '2016-11-15'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2017-09-26'\n", + "date_end = '2017-09-28'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['Bs_G_1um', 'Bs_R_1um', 'Bbs_B_1um']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'Bs_B_1um'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'Bs_G_1um'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/AOP/aoppsap1flynn1m.c1.ipynb b/VAPs/quicklook/AOP/aoppsap1flynn1m.c1.ipynb new file mode 100644 index 00000000..2b230f98 --- /dev/null +++ b/VAPs/quicklook/AOP/aoppsap1flynn1m.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AOPPSAP1FLYNN1M.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/aop) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'aoppsap1flynn1m'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2019-04-30', 'facility': 'M1', 'site': 'cor', 'start_date': '2018-09-23'}, {'end_date': '2017-11-01', 'facility': 'M1', 'site': 'asi', 'start_date': '2016-04-23'}, {'end_date': '2020-06-01', 'facility': 'M1', 'site': 'anx', 'start_date': '2019-12-01'}, {'end_date': '2023-12-13', 'facility': 'M1', 'site': 'epc', 'start_date': '2023-01-15'}, {'end_date': '2018-01-11', 'facility': 'M1', 'site': 'mar', 'start_date': '2017-10-29'}, {'end_date': '2022-09-30', 'facility': 'M1', 'site': 'hou', 'start_date': '2021-09-08'}, {'end_date': '2023-12-17', 'facility': 'C1', 'site': 'ena', 'start_date': '2013-10-09'}, {'end_date': '2015-12-01', 'facility': 'S1', 'site': 'mao', 'start_date': '2014-02-06'}, {'end_date': '2021-10-14', 'facility': 'M1', 'site': 'guc', 'start_date': '2021-09-02'}, {'end_date': '2023-06-15', 'facility': 'S2', 'site': 'guc', 'start_date': '2021-10-27'}, {'end_date': '2020-10-01', 'facility': 'M1', 'site': 'mos', 'start_date': '2019-10-11'}, {'end_date': '2021-06-14', 'facility': 'M1', 'site': 'oli', 'start_date': '2016-08-06'}, {'end_date': '2017-09-29', 'facility': 'C1', 'site': 'sgp', 'start_date': '2015-10-01'}, {'end_date': '2023-12-18', 'facility': 'E13', 'site': 'sgp', 'start_date': '2016-11-15'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2017-09-27'\n", + "date_end = '2017-09-29'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['Bs_B', 'Bs_G', 'Bs_R']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'Bs_B'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'Bs_B'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/AOSCCNAVG/AOSCCNAVG_tutorial.ipynb b/VAPs/quicklook/AOSCCNAVG/AOSCCNAVG_tutorial.ipynb new file mode 100644 index 00000000..8761225d --- /dev/null +++ b/VAPs/quicklook/AOSCCNAVG/AOSCCNAVG_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AOSCCNAVG.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/aosccnavg) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using aosccnavg as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `aosccnavg.c1`, where `aosccnavg` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `mao` and facility `M1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/mao/maoaosccnavgM1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"aosccnavg\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"mao\"\n", + "facility = \"M1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/AOSCCNAVG/aosccnavg.c1.ipynb b/VAPs/quicklook/AOSCCNAVG/aosccnavg.c1.ipynb new file mode 100644 index 00000000..97b1a61f --- /dev/null +++ b/VAPs/quicklook/AOSCCNAVG/aosccnavg.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AOSCCNAVG.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/aosccnavg) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'aosccnavg'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2014-10-19', 'facility': 'M1', 'site': 'mao', 'start_date': '2014-02-19'}, {'end_date': '2013-06-13', 'facility': 'M1', 'site': 'pvc', 'start_date': '2013-03-31'}, {'end_date': '2014-11-05', 'facility': 'C1', 'site': 'sgp', 'start_date': '2007-05-19'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2014-11-03'\n", + "date_end = '2014-11-05'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['N_CCN', 'N_CPC']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'supersaturation_setpoint'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'N_CCN'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/AOSCCNAVG/aosccnavg.c2.ipynb b/VAPs/quicklook/AOSCCNAVG/aosccnavg.c2.ipynb new file mode 100644 index 00000000..4c0fc3b3 --- /dev/null +++ b/VAPs/quicklook/AOSCCNAVG/aosccnavg.c2.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AOSCCNAVG.C2 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/aosccnavg) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'aosccnavg'\n", + "DATA_LEVEL = 'c2'\n", + "LOCATIONS = [{'end_date': '2014-09-29', 'facility': 'M1', 'site': 'mao', 'start_date': '2014-07-01'}, {'end_date': '2010-12-30', 'facility': 'M1', 'site': 'grw', 'start_date': '2009-04-16'}, {'end_date': '2012-03-26', 'facility': 'M1', 'site': 'pgh', 'start_date': '2011-06-09'}, {'end_date': '2013-03-30', 'facility': 'M1', 'site': 'pvc', 'start_date': '2012-07-16'}, {'end_date': '2014-09-29', 'facility': 'C1', 'site': 'sgp', 'start_date': '2011-01-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2014-09-27'\n", + "date_end = '2014-09-29'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['N_CCN', 'N_CPC']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'supersaturation_setpoint'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'N_CCN'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/AOSSP2BC/AOSSP2BC_tutorial.ipynb b/VAPs/quicklook/AOSSP2BC/AOSSP2BC_tutorial.ipynb new file mode 100644 index 00000000..b50440af --- /dev/null +++ b/VAPs/quicklook/AOSSP2BC/AOSSP2BC_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AOSSP2RBC1M.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/aossp2bc) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using aossp2rbc1m as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `aossp2rbc1m.c1`, where `aossp2rbc1m` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `mao` and facility `S1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/mao/maoaossp2rbc1mS1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"aossp2rbc1m\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"mao\"\n", + "facility = \"S1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/AOSSP2BC/aossp2rbc1m.c1.ipynb b/VAPs/quicklook/AOSSP2BC/aossp2rbc1m.c1.ipynb new file mode 100644 index 00000000..ab355710 --- /dev/null +++ b/VAPs/quicklook/AOSSP2BC/aossp2rbc1m.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AOSSP2RBC1M.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/aossp2bc) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'aossp2rbc1m'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2014-10-19', 'facility': 'S1', 'site': 'mao', 'start_date': '2014-02-17'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'mao', 'S1' )\n", + "\n", + "date_start = '2014-10-17'\n", + "date_end = '2014-10-19'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['rBC', 'N_dN_rBC', 'N_dN_pure_scattering']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'rBC'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'rBC'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/ARMBE/ARMBE_tutorial.ipynb b/VAPs/quicklook/ARMBE/ARMBE_tutorial.ipynb new file mode 100644 index 00000000..a74da809 --- /dev/null +++ b/VAPs/quicklook/ARMBE/ARMBE_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# ARMBEATM.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/armbe) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using armbeatm as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `armbeatm.c1`, where `armbeatm` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `cor` and facility `M1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/cor/corarmbeatmM1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"armbeatm\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"cor\"\n", + "facility = \"M1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/ARMBE/armbeatm.c1.ipynb b/VAPs/quicklook/ARMBE/armbeatm.c1.ipynb new file mode 100644 index 00000000..4716a81d --- /dev/null +++ b/VAPs/quicklook/ARMBE/armbeatm.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# ARMBEATM.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/armbe) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'armbeatm'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2019-04-30', 'facility': 'M1', 'site': 'cor', 'start_date': '2018-09-23'}, {'end_date': '2016-12-31', 'facility': 'M1', 'site': 'awr', 'start_date': '2016-01-01'}, {'end_date': '2020-05-31', 'facility': 'M1', 'site': 'anx', 'start_date': '2019-12-01'}, {'end_date': '2006-12-30', 'facility': 'M1', 'site': 'nim', 'start_date': '2006-01-01'}, {'end_date': '2021-06-14', 'facility': 'M1', 'site': 'oli', 'start_date': '2013-10-01'}, {'end_date': '2022-09-30', 'facility': 'M1', 'site': 'hou', 'start_date': '2021-10-01'}, {'end_date': '2020-12-31', 'facility': 'C1', 'site': 'ena', 'start_date': '2014-01-01'}, {'end_date': '2015-12-31', 'facility': 'M1', 'site': 'mao', 'start_date': '2014-01-01'}, {'end_date': '2023-06-15', 'facility': 'M1', 'site': 'guc', 'start_date': '2021-09-01'}, {'end_date': '2020-12-31', 'facility': 'C1', 'site': 'nsa', 'start_date': '2001-01-01'}, {'end_date': '2020-12-27', 'facility': 'C1', 'site': 'sgp', 'start_date': '1994-01-01'}, {'end_date': '2010-12-31', 'facility': 'C1', 'site': 'twp', 'start_date': '1996-01-01'}, {'end_date': '2010-12-31', 'facility': 'C2', 'site': 'twp', 'start_date': '1998-01-01'}, {'end_date': '2010-12-31', 'facility': 'C3', 'site': 'twp', 'start_date': '2002-01-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2019-12-30'\n", + "date_end = '2020-01-01'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['u_wind_sfc', 'v_wind_sfc', 'relative_humidity_sfc']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'u_wind_sfc'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/ARMBE/armbecldrad.c1.ipynb b/VAPs/quicklook/ARMBE/armbecldrad.c1.ipynb new file mode 100644 index 00000000..0c3d67bb --- /dev/null +++ b/VAPs/quicklook/ARMBE/armbecldrad.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# ARMBECLDRAD.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/armbe) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'armbecldrad'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2016-12-31', 'facility': 'M1', 'site': 'awr', 'start_date': '2016-01-01'}, {'end_date': '2020-05-31', 'facility': 'M1', 'site': 'anx', 'start_date': '2019-12-01'}, {'end_date': '2006-12-30', 'facility': 'M1', 'site': 'nim', 'start_date': '2006-01-01'}, {'end_date': '2020-12-31', 'facility': 'C1', 'site': 'nsa', 'start_date': '1998-01-01'}, {'end_date': '2020-12-31', 'facility': 'C1', 'site': 'ena', 'start_date': '2014-01-01'}, {'end_date': '2022-09-30', 'facility': 'M1', 'site': 'hou', 'start_date': '2021-10-01'}, {'end_date': '2015-12-01', 'facility': 'M1', 'site': 'mao', 'start_date': '2014-01-01'}, {'end_date': '2023-06-15', 'facility': 'M1', 'site': 'guc', 'start_date': '2021-09-01'}, {'end_date': '2019-04-30', 'facility': 'M1', 'site': 'cor', 'start_date': '2018-09-23'}, {'end_date': '2020-10-31', 'facility': 'M1', 'site': 'mos', 'start_date': '2019-11-01'}, {'end_date': '2021-06-14', 'facility': 'M1', 'site': 'oli', 'start_date': '2013-10-01'}, {'end_date': '2020-12-31', 'facility': 'C1', 'site': 'sgp', 'start_date': '1996-01-01'}, {'end_date': '2011-12-31', 'facility': 'C1', 'site': 'twp', 'start_date': '1996-01-01'}, {'end_date': '2010-12-31', 'facility': 'C2', 'site': 'twp', 'start_date': '1998-01-01'}, {'end_date': '2011-12-31', 'facility': 'C3', 'site': 'twp', 'start_date': '2002-01-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2019-12-30'\n", + "date_end = '2020-01-01'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['cld_frac', 'tot_cld', 'swdn']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'cld_frac'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'cld_frac'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/ARSCL/.ipynb_checkpoints/arscl1cloth.c1-checkpoint.ipynb b/VAPs/quicklook/ARSCL/.ipynb_checkpoints/arscl1cloth.c1-checkpoint.ipynb new file mode 100644 index 00000000..4502aea5 --- /dev/null +++ b/VAPs/quicklook/ARSCL/.ipynb_checkpoints/arscl1cloth.c1-checkpoint.ipynb @@ -0,0 +1,2631 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# ARSCL1CLOTH.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/arscl) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'arscl1cloth'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2011-03-23', 'facility': 'C1', 'site': 'nsa', 'start_date': '1998-03-25'}, {'end_date': '2011-01-04', 'facility': 'C1', 'site': 'sgp', 'start_date': '1996-11-08'}, {'end_date': '2011-03-07', 'facility': 'C1', 'site': 'twp', 'start_date': '1999-07-01'}, {'end_date': '2009-02-14', 'facility': 'C2', 'site': 'twp', 'start_date': '1998-11-01'}, {'end_date': '2011-02-28', 'facility': 'C3', 'site': 'twp', 'start_date': '2003-01-01'}]" + ] + }, + { + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ac6764f5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The following locations and date ranges are available for this VAP:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sitefacilitystart_dateend_date
0nsaC11998-03-252011-03-23
1sgpC11996-11-082011-01-04
2twpC11999-07-012011-03-07
3twpC21998-11-012009-02-14
4twpC32003-01-012011-02-28
\n", + "
" + ], + "text/plain": [ + " site facility start_date end_date\n", + "0 nsa C1 1998-03-25 2011-03-23\n", + "1 sgp C1 1996-11-08 2011-01-04\n", + "2 twp C1 1999-07-01 2011-03-07\n", + "3 twp C2 1998-11-01 2009-02-14\n", + "4 twp C3 2003-01-01 2011-02-28" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2011-01-03'\n", + "date_end = '2011-01-04'" + ] + }, + { + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/data/archive/sgp/sgparscl1clothC1.c1'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['20110103', '20110104']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "51feea2e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['/data/archive/sgp/sgparscl1clothC1.c1/sgparscl1clothC1.c1.20110103.000000.cdf',\n", + " '/data/archive/sgp/sgparscl1clothC1.c1/sgparscl1clothC1.c1.20110104.000000.cdf']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "77 files loaded\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:                         (time: 8640, nheights: 512, numlayers: 10)\n",
+       "Coordinates:\n",
+       "  * time                            (time) timedelta64[ns] 00:00:00 ... 23:59:50\n",
+       "Dimensions without coordinates: nheights, numlayers\n",
+       "Data variables: (12/23)\n",
+       "    base_time                       object ...\n",
+       "    time_offset                     (time) timedelta64[ns] dask.array<chunksize=(8640,), meta=np.ndarray>\n",
+       "    Heights                         (nheights) float32 dask.array<chunksize=(512,), meta=np.ndarray>\n",
+       "    Reflectivity                    (time, nheights) int16 dask.array<chunksize=(8640, 512), meta=np.ndarray>\n",
+       "    ReflectivityNoClutter           (time, nheights) int16 dask.array<chunksize=(8640, 512), meta=np.ndarray>\n",
+       "    ReflectivityBestEstimate        (time, nheights) int16 dask.array<chunksize=(8640, 512), meta=np.ndarray>\n",
+       "    ...                              ...\n",
+       "    CloudLayerBottomHeightMplZwang  (time, numlayers) float32 dask.array<chunksize=(8640, 10), meta=np.ndarray>\n",
+       "    CloudLayerTopHeightMplZwang     (time, numlayers) float32 dask.array<chunksize=(8640, 10), meta=np.ndarray>\n",
+       "    qc_RadarArtifacts               (time, nheights) |S1 dask.array<chunksize=(8640, 512), meta=np.ndarray>\n",
+       "    qc_ReflectivityClutterFlag      (time, nheights) |S1 dask.array<chunksize=(8640, 512), meta=np.ndarray>\n",
+       "    qc_CloudLayerTopHeightMplZwang  (time, numlayers) float32 dask.array<chunksize=(8640, 10), meta=np.ndarray>\n",
+       "    qc_BeamAttenuationMplZwang      (time) float32 dask.array<chunksize=(8640,), meta=np.ndarray>\n",
+       "Attributes: (12/18)\n",
+       "    Date:                      Wed Jul 13 16:28:55 GMT 2011\n",
+       "    Version:                   $State: Exp $\n",
+       "    Number_Input_Platforms:    3\n",
+       "    Input_Platforms:           sgp30smplcmask1zwangC1.c1,sgpvceil25kC1.b1,sgp...\n",
+       "    Input_Platforms_Versions:  ?????,10.2,1.16\n",
+       "    Command_Line:              arsc1/arscl2 -s YYYYMMDD -e YYYYMMDD SITE FACI...\n",
+       "    ...                        ...\n",
+       "    commentf:                  Note that -32768 is also used for the geophysi...\n",
+       "    _file_dates:               ['20110103']\n",
+       "    _file_times:               ['000000']\n",
+       "    datastream:                sgparscl1clothC1.c1\n",
+       "    _datastream:               sgparscl1clothC1.c1\n",
+       "    _arm_standards_flag:       1
" + ], + "text/plain": [ + "\n", + "Dimensions: (time: 8640, nheights: 512, numlayers: 10)\n", + "Coordinates:\n", + " * time (time) timedelta64[ns] 00:00:00 ... 23:59:50\n", + "Dimensions without coordinates: nheights, numlayers\n", + "Data variables: (12/23)\n", + " base_time object ...\n", + " time_offset (time) timedelta64[ns] dask.array\n", + " Heights (nheights) float32 dask.array\n", + " Reflectivity (time, nheights) int16 dask.array\n", + " ReflectivityNoClutter (time, nheights) int16 dask.array\n", + " ReflectivityBestEstimate (time, nheights) int16 dask.array\n", + " ... ...\n", + " CloudLayerBottomHeightMplZwang (time, numlayers) float32 dask.array\n", + " CloudLayerTopHeightMplZwang (time, numlayers) float32 dask.array\n", + " qc_RadarArtifacts (time, nheights) |S1 dask.array\n", + " qc_ReflectivityClutterFlag (time, nheights) |S1 dask.array\n", + " qc_CloudLayerTopHeightMplZwang (time, numlayers) float32 dask.array\n", + " qc_BeamAttenuationMplZwang (time) float32 dask.array\n", + "Attributes: (12/18)\n", + " Date: Wed Jul 13 16:28:55 GMT 2011\n", + " Version: $State: Exp $\n", + " Number_Input_Platforms: 3\n", + " Input_Platforms: sgp30smplcmask1zwangC1.c1,sgpvceil25kC1.b1,sgp...\n", + " Input_Platforms_Versions: ?????,10.2,1.16\n", + " Command_Line: arsc1/arscl2 -s YYYYMMDD -e YYYYMMDD SITE FACI...\n", + " ... ...\n", + " commentf: Note that -32768 is also used for the geophysi...\n", + " _file_dates: ['20110103']\n", + " _file_times: ['000000']\n", + " datastream: sgparscl1clothC1.c1\n", + " _datastream: sgparscl1clothC1.c1\n", + " _arm_standards_flag: 1" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter[0]\n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['Reflectivity', 'ReflectivityNoClutter', 'ReflectivityBestEstimate']" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "de5b8b3d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'ReflectivityNoClutter' (time: 8640, nheights: 512)>\n",
+       "dask.array<open_dataset-86c2f04dd636517ce84998ed679d9bf5ReflectivityNoClutter, shape=(8640, 512), dtype=int16, chunksize=(8640, 512), chunktype=numpy.ndarray>\n",
+       "Coordinates:\n",
+       "  * time     (time) timedelta64[ns] 00:00:00 00:00:10 ... 23:59:40 23:59:50\n",
+       "Dimensions without coordinates: nheights\n",
+       "Attributes:\n",
+       "    long_name:  MMCR Reflectivity with Clutter Removed\n",
+       "    units:      dBZ (X100)\n",
+       "    comment:    Divide ReflectivityNoClutter by 100 to get dBZ
" + ], + "text/plain": [ + "\n", + "dask.array\n", + "Coordinates:\n", + " * time (time) timedelta64[ns] 00:00:00 00:00:10 ... 23:59:40 23:59:50\n", + "Dimensions without coordinates: nheights\n", + "Attributes:\n", + " long_name: MMCR Reflectivity with Clutter Removed\n", + " units: dBZ (X100)\n", + " comment: Divide ReflectivityNoClutter by 100 to get dBZ" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds.ReflectivityNoClutter.data" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [ + { + "ename": "UFuncTypeError", + "evalue": "Cannot cast ufunc 'greater_equal' input 0 from dtype(' 5\u001b[0m ts_ax \u001b[38;5;241m=\u001b[39m \u001b[43mts_display\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mplot\u001b[49m\u001b[43m(\u001b[49m\u001b[43mv\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msubplot_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mi\u001b[49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mset_title\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mds\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvariables\u001b[49m\u001b[43m[\u001b[49m\u001b[43mv\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mattrs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mlong_name\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6\u001b[0m ts_ax\u001b[38;5;241m.\u001b[39mgrid()\n\u001b[1;32m 8\u001b[0m plt\u001b[38;5;241m.\u001b[39mshow()\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/plotting/timeseriesdisplay.py:588\u001b[0m, in \u001b[0;36mTimeSeriesDisplay.plot\u001b[0;34m(self, field, dsname, subplot_index, cmap, set_title, add_nan, day_night_background, invert_y_axis, abs_limits, time_rng, y_rng, use_var_for_y, set_shading, assessment_overplot, overplot_marker, overplot_behind, overplot_markersize, assessment_overplot_category, assessment_overplot_category_color, force_line_plot, labels, cbar_label, cbar_h_adjust, secondary_y, y_axis_flag_meanings, colorbar_labels, cb_friendly, **kwargs)\u001b[0m\n\u001b[1;32m 586\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 587\u001b[0m kwargs[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mface\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m--> 588\u001b[0m mesh \u001b[38;5;241m=\u001b[39m \u001b[43max\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpcolormesh\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 589\u001b[0m \u001b[43m \u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43masarray\u001b[49m\u001b[43m(\u001b[49m\u001b[43mxdata\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 590\u001b[0m \u001b[43m \u001b[49m\u001b[43mydata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 591\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtranspose\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 592\u001b[0m \u001b[43m \u001b[49m\u001b[43mshading\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mset_shading\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 593\u001b[0m \u001b[43m \u001b[49m\u001b[43mcmap\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcmap\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 594\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 595\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 597\u001b[0m \u001b[38;5;66;03m# Set Title\u001b[39;00m\n\u001b[1;32m 598\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m set_title \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/__init__.py:1442\u001b[0m, in \u001b[0;36m_preprocess_data..inner\u001b[0;34m(ax, data, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1439\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 1440\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minner\u001b[39m(ax, \u001b[38;5;241m*\u001b[39margs, data\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 1441\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m data \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1442\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43max\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mmap\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43msanitize_sequence\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1444\u001b[0m bound \u001b[38;5;241m=\u001b[39m new_sig\u001b[38;5;241m.\u001b[39mbind(ax, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 1445\u001b[0m auto_label \u001b[38;5;241m=\u001b[39m (bound\u001b[38;5;241m.\u001b[39marguments\u001b[38;5;241m.\u001b[39mget(label_namer)\n\u001b[1;32m 1446\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m bound\u001b[38;5;241m.\u001b[39mkwargs\u001b[38;5;241m.\u001b[39mget(label_namer))\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/axes/_axes.py:6220\u001b[0m, in \u001b[0;36mAxes.pcolormesh\u001b[0;34m(self, alpha, norm, cmap, vmin, vmax, shading, antialiased, *args, **kwargs)\u001b[0m\n\u001b[1;32m 6217\u001b[0m shading \u001b[38;5;241m=\u001b[39m shading\u001b[38;5;241m.\u001b[39mlower()\n\u001b[1;32m 6218\u001b[0m kwargs\u001b[38;5;241m.\u001b[39msetdefault(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnone\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m-> 6220\u001b[0m X, Y, C, shading \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_pcolorargs\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mpcolormesh\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6221\u001b[0m \u001b[43m \u001b[49m\u001b[43mshading\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mshading\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6222\u001b[0m coords \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mstack([X, Y], axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m 6223\u001b[0m \u001b[38;5;66;03m# convert to one dimensional array, except for 3D RGB(A) arrays\u001b[39;00m\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/axes/_axes.py:5786\u001b[0m, in \u001b[0;36mAxes._pcolorargs\u001b[0;34m(self, funcname, shading, *args, **kwargs)\u001b[0m\n\u001b[1;32m 5783\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m X\n\u001b[1;32m 5785\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ncols \u001b[38;5;241m==\u001b[39m Nx:\n\u001b[0;32m-> 5786\u001b[0m X \u001b[38;5;241m=\u001b[39m \u001b[43m_interp_grid\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5787\u001b[0m Y \u001b[38;5;241m=\u001b[39m _interp_grid(Y)\n\u001b[1;32m 5788\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m nrows \u001b[38;5;241m==\u001b[39m Ny:\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/axes/_axes.py:5768\u001b[0m, in \u001b[0;36mAxes._pcolorargs.._interp_grid\u001b[0;34m(X)\u001b[0m\n\u001b[1;32m 5766\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m np\u001b[38;5;241m.\u001b[39mshape(X)[\u001b[38;5;241m1\u001b[39m] \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m 5767\u001b[0m dX \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mdiff(X, axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\u001b[38;5;241m/\u001b[39m\u001b[38;5;241m2.\u001b[39m\n\u001b[0;32m-> 5768\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (np\u001b[38;5;241m.\u001b[39mall(\u001b[43mdX\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m>\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0\u001b[39;49m) \u001b[38;5;129;01mor\u001b[39;00m np\u001b[38;5;241m.\u001b[39mall(dX \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m)):\n\u001b[1;32m 5769\u001b[0m _api\u001b[38;5;241m.\u001b[39mwarn_external(\n\u001b[1;32m 5770\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe input coordinates to \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfuncname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m are \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 5771\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minterpreted as cell centers, but are not \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 5774\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124medges, in which case, please supply \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 5775\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mexplicit cell edges to \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfuncname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 5776\u001b[0m X \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mhstack((X[:, [\u001b[38;5;241m0\u001b[39m]] \u001b[38;5;241m-\u001b[39m dX[:, [\u001b[38;5;241m0\u001b[39m]],\n\u001b[1;32m 5777\u001b[0m X[:, :\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m] \u001b[38;5;241m+\u001b[39m dX,\n\u001b[1;32m 5778\u001b[0m X[:, [\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]] \u001b[38;5;241m+\u001b[39m dX[:, [\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]]))\n", + "\u001b[0;31mUFuncTypeError\u001b[0m: Cannot cast ufunc 'greater_equal' input 0 from dtype('\n", + "
\n", + " Figure\n", + "
\n", + " \n", + " \n", + " " + ], + "text/plain": [ + "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'RadarArtifacts'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'Reflectivity'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/ARSCL/.ipynb_checkpoints/arsclbnd1cloth.c1-checkpoint.ipynb b/VAPs/quicklook/ARSCL/.ipynb_checkpoints/arsclbnd1cloth.c1-checkpoint.ipynb new file mode 100644 index 00000000..b365a780 --- /dev/null +++ b/VAPs/quicklook/ARSCL/.ipynb_checkpoints/arsclbnd1cloth.c1-checkpoint.ipynb @@ -0,0 +1,1937 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# ARSCLBND1CLOTH.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/arscl) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'arsclbnd1cloth'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2011-03-23', 'facility': 'C1', 'site': 'nsa', 'start_date': '1998-03-25'}, {'end_date': '2011-01-04', 'facility': 'C1', 'site': 'sgp', 'start_date': '1996-11-08'}, {'end_date': '2011-03-07', 'facility': 'C1', 'site': 'twp', 'start_date': '1999-07-01'}, {'end_date': '2009-02-14', 'facility': 'C2', 'site': 'twp', 'start_date': '1998-11-01'}, {'end_date': '2011-02-28', 'facility': 'C3', 'site': 'twp', 'start_date': '2003-01-01'}]" + ] + }, + { + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ac6764f5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The following locations and date ranges are available for this VAP:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sitefacilitystart_dateend_date
0nsaC11998-03-252011-03-23
1sgpC11996-11-082011-01-04
2twpC11999-07-012011-03-07
3twpC21998-11-012009-02-14
4twpC32003-01-012011-02-28
\n", + "
" + ], + "text/plain": [ + " site facility start_date end_date\n", + "0 nsa C1 1998-03-25 2011-03-23\n", + "1 sgp C1 1996-11-08 2011-01-04\n", + "2 twp C1 1999-07-01 2011-03-07\n", + "3 twp C2 1998-11-01 2009-02-14\n", + "4 twp C3 2003-01-01 2011-02-28" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2011-01-03'\n", + "date_end = '2011-01-04'" + ] + }, + { + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/data/archive/sgp/sgparsclbnd1clothC1.c1'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['20110103', '20110104']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "51feea2e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['/data/archive/sgp/sgparsclbnd1clothC1.c1/sgparsclbnd1clothC1.c1.20110103.000000.cdf',\n", + " '/data/archive/sgp/sgparsclbnd1clothC1.c1/sgparsclbnd1clothC1.c1.20110104.000000.cdf']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "604409ea", + "metadata": {}, + "outputs": [], + "source": [ + "# this datastream is a bit different. It has trouble merge the individual datasets." + ] + }, + { + "cell_type": "code", + "execution_count": 122, + "id": "ccbe501b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:                         (time: 17280, numlayers: 10)\n",
+       "Coordinates:\n",
+       "  * time                            (time) datetime64[ns] 2011-01-01 ... 2011...\n",
+       "Dimensions without coordinates: numlayers\n",
+       "Data variables:\n",
+       "    base_time                       datetime64[ns] 2011-01-01\n",
+       "    time_offset                     (time) timedelta64[ns] 00:00:00 ... NaT\n",
+       "    CloudBaseBestEstimate           (time) float32 -1.0 -1.0 -1.0 ... nan nan\n",
+       "    CloudLayerBottomHeightMplZwang  (time, numlayers) float32 0.0 0.0 ... nan\n",
+       "    CloudLayerTopHeightMplZwang     (time, numlayers) float32 0.0 0.0 ... nan\n",
+       "    qc_CloudLayerTopHeightMplZwang  (time, numlayers) float32 0.0 0.0 ... nan\n",
+       "Attributes:\n",
+       "    Date:                      Wed Jul 13 16:27:17 GMT 2011\n",
+       "    Version:                   $State: Exp $\n",
+       "    Number_Input_Platforms:    3\n",
+       "    Input_Platforms:           sgp30smplcmask1zwangC1.c1,sgpvceil25kC1.b1,sgp...\n",
+       "    Input_Platforms_Versions:  ?????,10.2,1.16\n",
+       "    zeb_platform:              sgparsclbnd1clothC1.c1\n",
+       "    Command_Line:              arsc1/arscl2 -s YYYYMMDD -e YYYYMMDD SITE FACI...\n",
+       "    contact:                    \n",
+       "    comment:                   If all layer top heights are 0, then the first...
" + ], + "text/plain": [ + "\n", + "Dimensions: (time: 17280, numlayers: 10)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 2011-01-01 ... 2011...\n", + "Dimensions without coordinates: numlayers\n", + "Data variables:\n", + " base_time datetime64[ns] 2011-01-01\n", + " time_offset (time) timedelta64[ns] 00:00:00 ... NaT\n", + " CloudBaseBestEstimate (time) float32 -1.0 -1.0 -1.0 ... nan nan\n", + " CloudLayerBottomHeightMplZwang (time, numlayers) float32 0.0 0.0 ... nan\n", + " CloudLayerTopHeightMplZwang (time, numlayers) float32 0.0 0.0 ... nan\n", + " qc_CloudLayerTopHeightMplZwang (time, numlayers) float32 0.0 0.0 ... nan\n", + "Attributes:\n", + " Date: Wed Jul 13 16:27:17 GMT 2011\n", + " Version: $State: Exp $\n", + " Number_Input_Platforms: 3\n", + " Input_Platforms: sgp30smplcmask1zwangC1.c1,sgpvceil25kC1.b1,sgp...\n", + " Input_Platforms_Versions: ?????,10.2,1.16\n", + " zeb_platform: sgparsclbnd1clothC1.c1\n", + " Command_Line: arsc1/arscl2 -s YYYYMMDD -e YYYYMMDD SITE FACI...\n", + " contact: \n", + " comment: If all layer top heights are 0, then the first..." + ] + }, + "execution_count": 122, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# this datastream require some special treatment to merge the files\n", + "ds_single_1 = xr.load_dataset(\"/data/archive/sgp/sgparsclbnd1clothC1.c1/sgparsclbnd1clothC1.c1.20110101.000000.cdf\")\n", + "ds_single_1\n", + "\n", + "ds_single_2 = xr.load_dataset(\"/data/archive/sgp/sgparsclbnd1clothC1.c1/sgparsclbnd1clothC1.c1.20110102.000000.cdf\")\n", + "ds_single_2\n", + "\n", + "# ds_single_1.time.data = ds_single_1.base_time.data + ds_single_1.time.data\n", + "# ds_single_2.time.data = ds_single_2.base_time.data + ds_single_2.time.data\n", + "\n", + "ds_single_1['time'] = ds_single_1.base_time.data + ds_single_1.time.data * 10000000000\n", + "ds_single_2['time'] = ds_single_2.base_time.data + ds_single_2.time.data * 10000000000\n", + "\n", + "ds_single_1['base_time'] = pd.to_datetime(ds_single_1.base_time.data)\n", + "ds_single_2['base_time'] = pd.to_datetime(ds_single_2.base_time.data)\n", + "\n", + "ds = xr.merge([ds_single_1, ds_single_2], compat='override') \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": 123, + "id": "dde711c1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'CloudBaseBestEstimate' (time: 8640)>\n",
+       "array([-1., -1., -1., ..., -1., -1., -1.], dtype=float32)\n",
+       "Coordinates:\n",
+       "  * time     (time) datetime64[ns] 2011-01-02 ... 2011-01-02T23:59:50\n",
+       "Attributes:\n",
+       "    long_name:  LASER Cloud Base Height Best Estimate\n",
+       "    units:      m AGL\n",
+       "    comment:    -3. Data do not exist, -2. Data exist but no retrieval, -1. C...
" + ], + "text/plain": [ + "\n", + "array([-1., -1., -1., ..., -1., -1., -1.], dtype=float32)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 2011-01-02 ... 2011-01-02T23:59:50\n", + "Attributes:\n", + " long_name: LASER Cloud Base Height Best Estimate\n", + " units: m AGL\n", + " comment: -3. Data do not exist, -2. Data exist but no retrieval, -1. C..." + ] + }, + "execution_count": 123, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_single_2.CloudBaseBestEstimate" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "id": "7c1839ad", + "metadata": {}, + "outputs": [ + { + "ename": "MergeError", + "evalue": "conflicting values for variable 'base_time' on objects to be combined. You can skip this check by specifying compat='override'.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/io/armfiles.py:143\u001b[0m, in \u001b[0;36mread_netcdf\u001b[0;34m(filenames, concat_dim, return_None, combine, decode_times, use_cftime, use_base_time, combine_attrs, cleanup_qc, keep_variables, **kwargs)\u001b[0m\n\u001b[1;32m 141\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 142\u001b[0m \u001b[38;5;66;03m# Read data file with Xarray function\u001b[39;00m\n\u001b[0;32m--> 143\u001b[0m ds \u001b[38;5;241m=\u001b[39m \u001b[43mxr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen_mfdataset\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilenames\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 145\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m except_tuple \u001b[38;5;28;01mas\u001b[39;00m exception:\n\u001b[1;32m 146\u001b[0m \u001b[38;5;66;03m# If requested return None for File not found error\u001b[39;00m\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/backends/api.py:1026\u001b[0m, in \u001b[0;36mopen_mfdataset\u001b[0;34m(paths, chunks, concat_dim, compat, preprocess, engine, data_vars, coords, combine, parallel, join, attrs_file, combine_attrs, **kwargs)\u001b[0m\n\u001b[1;32m 1023\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m combine \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mby_coords\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 1024\u001b[0m \u001b[38;5;66;03m# Redo ordering from coordinates, ignoring how they were ordered\u001b[39;00m\n\u001b[1;32m 1025\u001b[0m \u001b[38;5;66;03m# previously\u001b[39;00m\n\u001b[0;32m-> 1026\u001b[0m combined \u001b[38;5;241m=\u001b[39m \u001b[43mcombine_by_coords\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1027\u001b[0m \u001b[43m \u001b[49m\u001b[43mdatasets\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1028\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1029\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_vars\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_vars\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1030\u001b[0m \u001b[43m \u001b[49m\u001b[43mcoords\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcoords\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1031\u001b[0m \u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1032\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombine_attrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcombine_attrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1033\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1034\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/combine.py:982\u001b[0m, in \u001b[0;36mcombine_by_coords\u001b[0;34m(data_objects, compat, data_vars, coords, fill_value, join, combine_attrs, datasets)\u001b[0m\n\u001b[1;32m 981\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m \u001b[38;5;28mvars\u001b[39m, datasets_with_same_vars \u001b[38;5;129;01min\u001b[39;00m grouped_by_vars:\n\u001b[0;32m--> 982\u001b[0m concatenated \u001b[38;5;241m=\u001b[39m \u001b[43m_combine_single_variable_hypercube\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 983\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mlist\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mdatasets_with_same_vars\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 984\u001b[0m \u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 985\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_vars\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_vars\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 986\u001b[0m \u001b[43m \u001b[49m\u001b[43mcoords\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcoords\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 987\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 988\u001b[0m \u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 989\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombine_attrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcombine_attrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 990\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 991\u001b[0m concatenated_grouped_by_data_vars\u001b[38;5;241m.\u001b[39mappend(concatenated)\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/combine.py:629\u001b[0m, in \u001b[0;36m_combine_single_variable_hypercube\u001b[0;34m(datasets, fill_value, data_vars, coords, compat, join, combine_attrs)\u001b[0m\n\u001b[1;32m 624\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 625\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAt least one Dataset is required to resolve variable names \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 626\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfor combined hypercube.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 627\u001b[0m )\n\u001b[0;32m--> 629\u001b[0m combined_ids, concat_dims \u001b[38;5;241m=\u001b[39m \u001b[43m_infer_concat_order_from_coords\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mlist\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mdatasets\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 631\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m fill_value \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 632\u001b[0m \u001b[38;5;66;03m# check that datasets form complete hypercube\u001b[39;00m\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/combine.py:149\u001b[0m, in \u001b[0;36m_infer_concat_order_from_coords\u001b[0;34m(datasets)\u001b[0m\n\u001b[1;32m 148\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(datasets) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m concat_dims:\n\u001b[0;32m--> 149\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 150\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCould not find any dimension coordinates to use to \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 151\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124morder the datasets for concatenation\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 152\u001b[0m )\n\u001b[1;32m 154\u001b[0m combined_ids \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mdict\u001b[39m(\u001b[38;5;28mzip\u001b[39m(tile_ids, datasets))\n", + "\u001b[0;31mValueError\u001b[0m: Could not find any dimension coordinates to use to order the datasets for concatenation", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mMergeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[72], line 5\u001b[0m\n\u001b[1;32m 1\u001b[0m files_filter \u001b[38;5;241m=\u001b[39m \\\n\u001b[1;32m 2\u001b[0m [\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m/data/archive/sgp/sgparsclbnd1clothC1.c1/sgparsclbnd1clothC1.c1.20110104.000000.cdf\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 3\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m/data/archive/sgp/sgparsclbnd1clothC1.c1/sgparsclbnd1clothC1.c1.20110106.000000.cdf\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[0;32m----> 5\u001b[0m ds_multi \u001b[38;5;241m=\u001b[39m \u001b[43mact\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mio\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marmfiles\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_netcdf\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfiles_list\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6\u001b[0m ds_multi\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/io/armfiles.py:164\u001b[0m, in \u001b[0;36mread_netcdf\u001b[0;34m(filenames, concat_dim, return_None, combine, decode_times, use_cftime, use_base_time, combine_attrs, cleanup_qc, keep_variables, **kwargs)\u001b[0m\n\u001b[1;32m 157\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 158\u001b[0m kwargs[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcombine\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnested\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 159\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(exception)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mValueError\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m exception\u001b[38;5;241m.\u001b[39margs[\u001b[38;5;241m0\u001b[39m] \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mCould not find any dimension coordinates \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mto use to order the datasets for concatenation\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 162\u001b[0m ):\n\u001b[1;32m 163\u001b[0m kwargs[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcombine\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnested\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m--> 164\u001b[0m ds \u001b[38;5;241m=\u001b[39m \u001b[43mxr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen_mfdataset\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilenames\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 166\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 167\u001b[0m \u001b[38;5;66;03m# When all else fails raise the orginal exception\u001b[39;00m\n\u001b[1;32m 168\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exception\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/backends/api.py:1013\u001b[0m, in \u001b[0;36mopen_mfdataset\u001b[0;34m(paths, chunks, concat_dim, compat, preprocess, engine, data_vars, coords, combine, parallel, join, attrs_file, combine_attrs, **kwargs)\u001b[0m\n\u001b[1;32m 1009\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1010\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m combine \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnested\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 1011\u001b[0m \u001b[38;5;66;03m# Combined nested list by successive concat and merge operations\u001b[39;00m\n\u001b[1;32m 1012\u001b[0m \u001b[38;5;66;03m# along each dimension, using structure given by \"ids\"\u001b[39;00m\n\u001b[0;32m-> 1013\u001b[0m combined \u001b[38;5;241m=\u001b[39m \u001b[43m_nested_combine\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1014\u001b[0m \u001b[43m \u001b[49m\u001b[43mdatasets\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1015\u001b[0m \u001b[43m \u001b[49m\u001b[43mconcat_dims\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconcat_dim\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1016\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1017\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_vars\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_vars\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1018\u001b[0m \u001b[43m \u001b[49m\u001b[43mcoords\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcoords\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1019\u001b[0m \u001b[43m \u001b[49m\u001b[43mids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1020\u001b[0m \u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1021\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombine_attrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcombine_attrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1022\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1023\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m combine \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mby_coords\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 1024\u001b[0m \u001b[38;5;66;03m# Redo ordering from coordinates, ignoring how they were ordered\u001b[39;00m\n\u001b[1;32m 1025\u001b[0m \u001b[38;5;66;03m# previously\u001b[39;00m\n\u001b[1;32m 1026\u001b[0m combined \u001b[38;5;241m=\u001b[39m combine_by_coords(\n\u001b[1;32m 1027\u001b[0m datasets,\n\u001b[1;32m 1028\u001b[0m compat\u001b[38;5;241m=\u001b[39mcompat,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1032\u001b[0m combine_attrs\u001b[38;5;241m=\u001b[39mcombine_attrs,\n\u001b[1;32m 1033\u001b[0m )\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/combine.py:365\u001b[0m, in \u001b[0;36m_nested_combine\u001b[0;34m(datasets, concat_dims, compat, data_vars, coords, ids, fill_value, join, combine_attrs)\u001b[0m\n\u001b[1;32m 362\u001b[0m _check_shape_tile_ids(combined_ids)\n\u001b[1;32m 364\u001b[0m \u001b[38;5;66;03m# Apply series of concatenate or merge operations along each dimension\u001b[39;00m\n\u001b[0;32m--> 365\u001b[0m combined \u001b[38;5;241m=\u001b[39m \u001b[43m_combine_nd\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 366\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombined_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 367\u001b[0m \u001b[43m \u001b[49m\u001b[43mconcat_dims\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 368\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 369\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_vars\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_vars\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 370\u001b[0m \u001b[43m \u001b[49m\u001b[43mcoords\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcoords\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 371\u001b[0m \u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 372\u001b[0m \u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 373\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombine_attrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcombine_attrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 374\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 375\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m combined\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/combine.py:239\u001b[0m, in \u001b[0;36m_combine_nd\u001b[0;34m(combined_ids, concat_dims, data_vars, coords, compat, fill_value, join, combine_attrs)\u001b[0m\n\u001b[1;32m 235\u001b[0m \u001b[38;5;66;03m# Each iteration of this loop reduces the length of the tile_ids tuples\u001b[39;00m\n\u001b[1;32m 236\u001b[0m \u001b[38;5;66;03m# by one. It always combines along the first dimension, removing the first\u001b[39;00m\n\u001b[1;32m 237\u001b[0m \u001b[38;5;66;03m# element of the tuple\u001b[39;00m\n\u001b[1;32m 238\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m concat_dim \u001b[38;5;129;01min\u001b[39;00m concat_dims:\n\u001b[0;32m--> 239\u001b[0m combined_ids \u001b[38;5;241m=\u001b[39m \u001b[43m_combine_all_along_first_dim\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 240\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombined_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 241\u001b[0m \u001b[43m \u001b[49m\u001b[43mdim\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconcat_dim\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 242\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_vars\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_vars\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 243\u001b[0m \u001b[43m \u001b[49m\u001b[43mcoords\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcoords\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 244\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 245\u001b[0m \u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 246\u001b[0m \u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 247\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombine_attrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcombine_attrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 248\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 249\u001b[0m (combined_ds,) \u001b[38;5;241m=\u001b[39m combined_ids\u001b[38;5;241m.\u001b[39mvalues()\n\u001b[1;32m 250\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m combined_ds\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/combine.py:275\u001b[0m, in \u001b[0;36m_combine_all_along_first_dim\u001b[0;34m(combined_ids, dim, data_vars, coords, compat, fill_value, join, combine_attrs)\u001b[0m\n\u001b[1;32m 273\u001b[0m combined_ids \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mdict\u001b[39m(\u001b[38;5;28msorted\u001b[39m(group))\n\u001b[1;32m 274\u001b[0m datasets \u001b[38;5;241m=\u001b[39m combined_ids\u001b[38;5;241m.\u001b[39mvalues()\n\u001b[0;32m--> 275\u001b[0m new_combined_ids[new_id] \u001b[38;5;241m=\u001b[39m \u001b[43m_combine_1d\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 276\u001b[0m \u001b[43m \u001b[49m\u001b[43mdatasets\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdim\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata_vars\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcoords\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcombine_attrs\u001b[49m\n\u001b[1;32m 277\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 278\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m new_combined_ids\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/combine.py:320\u001b[0m, in \u001b[0;36m_combine_1d\u001b[0;34m(datasets, concat_dim, compat, data_vars, coords, fill_value, join, combine_attrs)\u001b[0m\n\u001b[1;32m 318\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m\n\u001b[1;32m 319\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 320\u001b[0m combined \u001b[38;5;241m=\u001b[39m \u001b[43mmerge\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 321\u001b[0m \u001b[43m \u001b[49m\u001b[43mdatasets\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 322\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 323\u001b[0m \u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 324\u001b[0m \u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 325\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombine_attrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcombine_attrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 326\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m combined\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/merge.py:1025\u001b[0m, in \u001b[0;36mmerge\u001b[0;34m(objects, compat, join, fill_value, combine_attrs)\u001b[0m\n\u001b[1;32m 1022\u001b[0m obj \u001b[38;5;241m=\u001b[39m obj\u001b[38;5;241m.\u001b[39mto_dataset(promote_attrs\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m) \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(obj, DataArray) \u001b[38;5;28;01melse\u001b[39;00m obj\n\u001b[1;32m 1023\u001b[0m dict_like_objects\u001b[38;5;241m.\u001b[39mappend(obj)\n\u001b[0;32m-> 1025\u001b[0m merge_result \u001b[38;5;241m=\u001b[39m \u001b[43mmerge_core\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1026\u001b[0m \u001b[43m \u001b[49m\u001b[43mdict_like_objects\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1027\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1028\u001b[0m \u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1029\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombine_attrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcombine_attrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1030\u001b[0m \u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1031\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1032\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m Dataset\u001b[38;5;241m.\u001b[39m_construct_direct(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mmerge_result\u001b[38;5;241m.\u001b[39m_asdict())\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/merge.py:757\u001b[0m, in \u001b[0;36mmerge_core\u001b[0;34m(objects, compat, join, combine_attrs, priority_arg, explicit_coords, indexes, fill_value)\u001b[0m\n\u001b[1;32m 755\u001b[0m collected \u001b[38;5;241m=\u001b[39m collect_variables_and_indexes(aligned, indexes\u001b[38;5;241m=\u001b[39mindexes)\n\u001b[1;32m 756\u001b[0m prioritized \u001b[38;5;241m=\u001b[39m _get_priority_vars_and_indexes(aligned, priority_arg, compat\u001b[38;5;241m=\u001b[39mcompat)\n\u001b[0;32m--> 757\u001b[0m variables, out_indexes \u001b[38;5;241m=\u001b[39m \u001b[43mmerge_collected\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 758\u001b[0m \u001b[43m \u001b[49m\u001b[43mcollected\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mprioritized\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcombine_attrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcombine_attrs\u001b[49m\n\u001b[1;32m 759\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 761\u001b[0m dims \u001b[38;5;241m=\u001b[39m calculate_dimensions(variables)\n\u001b[1;32m 763\u001b[0m coord_names, noncoord_names \u001b[38;5;241m=\u001b[39m determine_coords(coerced)\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/merge.py:302\u001b[0m, in \u001b[0;36mmerge_collected\u001b[0;34m(grouped, prioritized, compat, combine_attrs, equals)\u001b[0m\n\u001b[1;32m 300\u001b[0m variables \u001b[38;5;241m=\u001b[39m [variable \u001b[38;5;28;01mfor\u001b[39;00m variable, _ \u001b[38;5;129;01min\u001b[39;00m elements_list]\n\u001b[1;32m 301\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 302\u001b[0m merged_vars[name] \u001b[38;5;241m=\u001b[39m \u001b[43munique_variable\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 303\u001b[0m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvariables\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mequals\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 304\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 305\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m MergeError:\n\u001b[1;32m 306\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m compat \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mminimal\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 307\u001b[0m \u001b[38;5;66;03m# we need more than \"minimal\" compatibility (for which\u001b[39;00m\n\u001b[1;32m 308\u001b[0m \u001b[38;5;66;03m# we drop conflicting coordinates)\u001b[39;00m\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/merge.py:156\u001b[0m, in \u001b[0;36munique_variable\u001b[0;34m(name, variables, compat, equals)\u001b[0m\n\u001b[1;32m 153\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n\u001b[1;32m 155\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m equals:\n\u001b[0;32m--> 156\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m MergeError(\n\u001b[1;32m 157\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mconflicting values for variable \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m on objects to be combined. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 158\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mYou can skip this check by specifying compat=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124moverride\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 159\u001b[0m )\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m combine_method:\n\u001b[1;32m 162\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m var \u001b[38;5;129;01min\u001b[39;00m variables[\u001b[38;5;241m1\u001b[39m:]:\n", + "\u001b[0;31mMergeError\u001b[0m: conflicting values for variable 'base_time' on objects to be combined. You can skip this check by specifying compat='override'." + ] + } + ], + "source": [ + "files_filter = \\\n", + "['/data/archive/sgp/sgparsclbnd1clothC1.c1/sgparsclbnd1clothC1.c1.20110104.000000.cdf',\n", + " '/data/archive/sgp/sgparsclbnd1clothC1.c1/sgparsclbnd1clothC1.c1.20110106.000000.cdf']\n", + "\n", + "ds_multi = act.io.armfiles.read_netcdf(files_list)\n", + "ds_multi" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2 files loaded\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:                         (time: 17280, numlayers: 10)\n",
+       "Coordinates:\n",
+       "  * time                            (time) datetime64[ns] 2011-01-03 ... 2011...\n",
+       "Dimensions without coordinates: numlayers\n",
+       "Data variables:\n",
+       "    base_time                       datetime64[ns] 2011-01-03\n",
+       "    time_offset                     (time) timedelta64[ns] 00:00:00 ... NaT\n",
+       "    CloudBaseBestEstimate           (time) float32 -1.0 -1.0 -1.0 ... nan nan\n",
+       "    CloudLayerBottomHeightMplZwang  (time, numlayers) float32 0.0 0.0 ... nan\n",
+       "    CloudLayerTopHeightMplZwang     (time, numlayers) float32 0.0 0.0 ... nan\n",
+       "    qc_CloudLayerTopHeightMplZwang  (time, numlayers) float32 0.0 0.0 ... nan\n",
+       "Attributes:\n",
+       "    Date:                      Wed Jul 13 16:28:57 GMT 2011\n",
+       "    Version:                   $State: Exp $\n",
+       "    Number_Input_Platforms:    3\n",
+       "    Input_Platforms:           sgp30smplcmask1zwangC1.c1,sgpvceil25kC1.b1,sgp...\n",
+       "    Input_Platforms_Versions:  ?????,10.2,1.16\n",
+       "    zeb_platform:              sgparsclbnd1clothC1.c1\n",
+       "    Command_Line:              arsc1/arscl2 -s YYYYMMDD -e YYYYMMDD SITE FACI...\n",
+       "    contact:                    \n",
+       "    comment:                   If all layer top heights are 0, then the first...
" + ], + "text/plain": [ + "\n", + "Dimensions: (time: 17280, numlayers: 10)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 2011-01-03 ... 2011...\n", + "Dimensions without coordinates: numlayers\n", + "Data variables:\n", + " base_time datetime64[ns] 2011-01-03\n", + " time_offset (time) timedelta64[ns] 00:00:00 ... NaT\n", + " CloudBaseBestEstimate (time) float32 -1.0 -1.0 -1.0 ... nan nan\n", + " CloudLayerBottomHeightMplZwang (time, numlayers) float32 0.0 0.0 ... nan\n", + " CloudLayerTopHeightMplZwang (time, numlayers) float32 0.0 0.0 ... nan\n", + " qc_CloudLayerTopHeightMplZwang (time, numlayers) float32 0.0 0.0 ... nan\n", + "Attributes:\n", + " Date: Wed Jul 13 16:28:57 GMT 2011\n", + " Version: $State: Exp $\n", + " Number_Input_Platforms: 3\n", + " Input_Platforms: sgp30smplcmask1zwangC1.c1,sgpvceil25kC1.b1,sgp...\n", + " Input_Platforms_Versions: ?????,10.2,1.16\n", + " zeb_platform: sgparsclbnd1clothC1.c1\n", + " Command_Line: arsc1/arscl2 -s YYYYMMDD -e YYYYMMDD SITE FACI...\n", + " contact: \n", + " comment: If all layer top heights are 0, then the first..." + ] + }, + "execution_count": 101, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = ds_single_2\n", + "ds = data\n", + "# ds = act.io.armfiles.read_netcdf(files_list, compat='override')\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['CloudBaseBestEstimate']" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/kefeimo/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/plotting/plot.py:81: UserWarning: Could not discern datastreamname and dict or tuple were not provided. Using defaultname of act_datastream!\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "14e3ab08f694414e84a6909356fbe15a", + "version_major": 2, + "version_minor": 0 + }, + "image/png": "", + "text/html": [ + "\n", + "
\n", + "
\n", + " Figure\n", + "
\n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'CloudLayerTopHeightMplCamp'" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "QC not available for the selected field: CloudLayerTopHeightMplCamp\n" + ] + } + ], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/kefeimo/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/plotting/plot.py:81: UserWarning: Could not discern datastreamname and dict or tuple were not provided. Using defaultname of act_datastream!\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "29438b2a84d24cb3b3d1d4be9f4dae0c", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "AppLayout(children=(Dropdown(description='Field:', layout=Layout(grid_area='header', margin='0px 30% 0px 20%',…" + ] + }, + "execution_count": 116, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'CloudBaseBestEstimate'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1d7932f0", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/ARSCL/ARSCL_tutorial.ipynb b/VAPs/quicklook/ARSCL/ARSCL_tutorial.ipynb new file mode 100644 index 00000000..0e130cc5 --- /dev/null +++ b/VAPs/quicklook/ARSCL/ARSCL_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# ARSCL1CLOTH.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/arscl) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using arscl1cloth as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `arscl1cloth.c1`, where `arscl1cloth` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `nsa` and facility `C1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/nsa/nsaarscl1clothC1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"arscl1cloth\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"nsa\"\n", + "facility = \"C1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/ARSCL/arscl1cloth.c1.ipynb b/VAPs/quicklook/ARSCL/arscl1cloth.c1.ipynb new file mode 100644 index 00000000..21e2bc45 --- /dev/null +++ b/VAPs/quicklook/ARSCL/arscl1cloth.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# ARSCL1CLOTH.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/arscl) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'arscl1cloth'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2011-03-23', 'facility': 'C1', 'site': 'nsa', 'start_date': '1998-03-25'}, {'end_date': '2011-01-04', 'facility': 'C1', 'site': 'sgp', 'start_date': '1996-11-08'}, {'end_date': '2011-03-07', 'facility': 'C1', 'site': 'twp', 'start_date': '1999-07-01'}, {'end_date': '2009-02-14', 'facility': 'C2', 'site': 'twp', 'start_date': '1998-11-01'}, {'end_date': '2011-02-28', 'facility': 'C3', 'site': 'twp', 'start_date': '2003-01-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2011-01-03'\n", + "date_end = '2011-01-04'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['Reflectivity', 'ReflectivityNoClutter', 'ReflectivityBestEstimate']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'RadarArtifacts'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'Reflectivity'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/ARSCL/arsclbnd1cloth.c1.ipynb b/VAPs/quicklook/ARSCL/arsclbnd1cloth.c1.ipynb new file mode 100644 index 00000000..c275fc7d --- /dev/null +++ b/VAPs/quicklook/ARSCL/arsclbnd1cloth.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# ARSCLBND1CLOTH.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/arscl) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'arsclbnd1cloth'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2011-03-23', 'facility': 'C1', 'site': 'nsa', 'start_date': '1998-03-25'}, {'end_date': '2011-01-04', 'facility': 'C1', 'site': 'sgp', 'start_date': '1996-11-08'}, {'end_date': '2011-03-07', 'facility': 'C1', 'site': 'twp', 'start_date': '1999-07-01'}, {'end_date': '2009-02-14', 'facility': 'C2', 'site': 'twp', 'start_date': '1998-11-01'}, {'end_date': '2011-02-28', 'facility': 'C3', 'site': 'twp', 'start_date': '2003-01-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2011-01-03'\n", + "date_end = '2011-01-04'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['CloudBaseBestEstimate']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'CloudLayerTopHeightMplCamp'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'CloudBaseBestEstimate'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/ASDBE-AIR/ASDBE-AIR_tutorial.ipynb b/VAPs/quicklook/ASDBE-AIR/ASDBE-AIR_tutorial.ipynb new file mode 100644 index 00000000..4b3c7f0d --- /dev/null +++ b/VAPs/quicklook/ASDBE-AIR/ASDBE-AIR_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AAFMERGEDAEROSOLSD.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/asdbe-air) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using aafmergedaerosolsd as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `aafmergedaerosolsd.c1`, where `aafmergedaerosolsd` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `ena` and facility `F1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/ena/enaaafmergedaerosolsdF1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"aafmergedaerosolsd\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"ena\"\n", + "facility = \"F1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/ASDBE-AIR/aafmergedaerosolsd.c1.ipynb b/VAPs/quicklook/ASDBE-AIR/aafmergedaerosolsd.c1.ipynb new file mode 100644 index 00000000..e1e9ec56 --- /dev/null +++ b/VAPs/quicklook/ASDBE-AIR/aafmergedaerosolsd.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AAFMERGEDAEROSOLSD.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/asdbe-air) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'aafmergedaerosolsd'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2018-02-19', 'facility': 'F1', 'site': 'ena', 'start_date': '2017-06-21'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'ena', 'F1' )\n", + "\n", + "date_start = '2018-02-17'\n", + "date_end = '2018-02-19'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['number_concentration']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'number_concentration'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/BAEBBR/30baebbr.c1.ipynb b/VAPs/quicklook/BAEBBR/30baebbr.c1.ipynb new file mode 100644 index 00000000..f6fdf996 --- /dev/null +++ b/VAPs/quicklook/BAEBBR/30baebbr.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# 30BAEBBR.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/baebbr) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = '30baebbr'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2023-09-28', 'facility': 'E11', 'site': 'sgp', 'start_date': '2016-08-04'}, {'end_date': '2023-11-30', 'facility': 'E12', 'site': 'sgp', 'start_date': '1993-09-29'}, {'end_date': '2023-11-30', 'facility': 'E13', 'site': 'sgp', 'start_date': '1993-07-20'}, {'end_date': '2023-09-28', 'facility': 'E15', 'site': 'sgp', 'start_date': '1993-07-11'}, {'end_date': '2009-11-17', 'facility': 'E18', 'site': 'sgp', 'start_date': '1997-09-10'}, {'end_date': '2011-09-18', 'facility': 'E19', 'site': 'sgp', 'start_date': '1997-05-30'}, {'end_date': '2011-11-16', 'facility': 'E20', 'site': 'sgp', 'start_date': '1993-07-06'}, {'end_date': '2009-12-01', 'facility': 'E22', 'site': 'sgp', 'start_date': '1993-07-04'}, {'end_date': '2002-04-08', 'facility': 'E25', 'site': 'sgp', 'start_date': '1997-08-10'}, {'end_date': '2009-12-17', 'facility': 'E26', 'site': 'sgp', 'start_date': '1993-07-05'}, {'end_date': '2009-12-04', 'facility': 'E27', 'site': 'sgp', 'start_date': '2003-05-07'}, {'end_date': '2009-10-20', 'facility': 'E2', 'site': 'sgp', 'start_date': '1997-05-22'}, {'end_date': '2023-11-30', 'facility': 'E32', 'site': 'sgp', 'start_date': '2011-09-28'}, {'end_date': '2023-09-28', 'facility': 'E34', 'site': 'sgp', 'start_date': '2011-09-02'}, {'end_date': '2023-09-23', 'facility': 'E35', 'site': 'sgp', 'start_date': '2011-10-05'}, {'end_date': '2023-09-28', 'facility': 'E36', 'site': 'sgp', 'start_date': '2011-09-28'}, {'end_date': '2023-11-30', 'facility': 'E39', 'site': 'sgp', 'start_date': '2015-09-30'}, {'end_date': '2023-09-28', 'facility': 'E40', 'site': 'sgp', 'start_date': '2015-10-15'}, {'end_date': '2011-09-25', 'facility': 'E4', 'site': 'sgp', 'start_date': '1993-07-13'}, {'end_date': '2011-11-13', 'facility': 'E7', 'site': 'sgp', 'start_date': '1993-10-04'}, {'end_date': '2009-11-10', 'facility': 'E8', 'site': 'sgp', 'start_date': '1993-07-12'}, {'end_date': '2023-09-28', 'facility': 'E9', 'site': 'sgp', 'start_date': '1993-07-11'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'E11' )\n", + "\n", + "date_start = '2023-09-26'\n", + "date_end = '2023-09-28'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['be_latent_heat_flux', 'be_sensible_heat_flux', 'net_radiation']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'aerodynamic_latent_heat_flux'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'be_latent_heat_flux'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/BAEBBR/BAEBBR_tutorial.ipynb b/VAPs/quicklook/BAEBBR/BAEBBR_tutorial.ipynb new file mode 100644 index 00000000..983b27d3 --- /dev/null +++ b/VAPs/quicklook/BAEBBR/BAEBBR_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# 30BAEBBR.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/baebbr) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using 30baebbr as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `30baebbr.c1`, where `30baebbr` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `sgp` and facility `E11`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/sgp/sgp30baebbrE11.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"30baebbr\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"sgp\"\n", + "facility = \"E11\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/BBHRP/.ipynb_checkpoints/bbhrpavg1mlawer.c1-checkpoint.ipynb b/VAPs/quicklook/BBHRP/.ipynb_checkpoints/bbhrpavg1mlawer.c1-checkpoint.ipynb new file mode 100644 index 00000000..3ad7672a --- /dev/null +++ b/VAPs/quicklook/BBHRP/.ipynb_checkpoints/bbhrpavg1mlawer.c1-checkpoint.ipynb @@ -0,0 +1,3768 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# BBHRPAVG1MLAWER.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/bbhrp) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'bbhrpavg1mlawer'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2006-02-27', 'facility': 'C1', 'site': 'sgp', 'start_date': '2000-03-01'}]" + ] + }, + { + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ac6764f5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The following locations and date ranges are available for this VAP:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sitefacilitystart_dateend_date
0sgpC12000-03-012006-02-27
\n", + "
" + ], + "text/plain": [ + " site facility start_date end_date\n", + "0 sgp C1 2000-03-01 2006-02-27" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2006-02-25'\n", + "date_end = '2006-02-27'" + ] + }, + { + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/data/archive/sgp/sgpbbhrpavg1mlawerC1.c1'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['20060225', '20060226', '20060227']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "51feea2e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['/data/archive/sgp/sgpbbhrpavg1mlawerC1.c1/sgpbbhrpavg1mlawerC1.c1.20060225.002000.cdf',\n", + " '/data/archive/sgp/sgpbbhrpavg1mlawerC1.c1/sgpbbhrpavg1mlawerC1.c1.20060227.002000.cdf']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2 files loaded\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:                               (time: 96, levels: 55, layers: 54)\n",
+       "Coordinates:\n",
+       "  * time                                  (time) datetime64[ns] 2006-02-25T00...\n",
+       "Dimensions without coordinates: levels, layers\n",
+       "Data variables: (12/52)\n",
+       "    base_time                             (time) datetime64[ns] 2006-02-25T00...\n",
+       "    time_offset                           (time) datetime64[ns] 2006-02-25T00...\n",
+       "    height                                (time, levels) float32 dask.array<chunksize=(48, 55), meta=np.ndarray>\n",
+       "    pressure                              (time, levels) float32 dask.array<chunksize=(48, 55), meta=np.ndarray>\n",
+       "    temperature                           (time, levels) float32 dask.array<chunksize=(48, 55), meta=np.ndarray>\n",
+       "    column_ozone                          (time) float32 dask.array<chunksize=(48,), meta=np.ndarray>\n",
+       "    ...                                    ...\n",
+       "    cloud_tot_lwp                         (time) float32 dask.array<chunksize=(48,), meta=np.ndarray>\n",
+       "    cloud_tot_iwp                         (time) float32 dask.array<chunksize=(48,), meta=np.ndarray>\n",
+       "    cloud_fraction                        (time) float32 dask.array<chunksize=(48,), meta=np.ndarray>\n",
+       "    lat                                   (time) float32 36.61 36.61 ... 36.61\n",
+       "    lon                                   (time) float32 -97.49 ... -97.49\n",
+       "    alt                                   (time) float32 315.0 315.0 ... 315.0\n",
+       "Attributes:\n",
+       "    Date:                 Thu Jun  4 22:13:53 2009\n",
+       "    Version:              Version: ver1.5\n",
+       "    missing_value:        -9999.0\n",
+       "    _file_dates:          ['20060225', '20060227']\n",
+       "    _file_times:          ['002000', '002000']\n",
+       "    datastream:           sgpbbhrpavg1mlawerC1.c1\n",
+       "    _datastream:          sgpbbhrpavg1mlawerC1.c1\n",
+       "    _arm_standards_flag:  1
" + ], + "text/plain": [ + "\n", + "Dimensions: (time: 96, levels: 55, layers: 54)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 2006-02-25T00...\n", + "Dimensions without coordinates: levels, layers\n", + "Data variables: (12/52)\n", + " base_time (time) datetime64[ns] 2006-02-25T00...\n", + " time_offset (time) datetime64[ns] 2006-02-25T00...\n", + " height (time, levels) float32 dask.array\n", + " pressure (time, levels) float32 dask.array\n", + " temperature (time, levels) float32 dask.array\n", + " column_ozone (time) float32 dask.array\n", + " ... ...\n", + " cloud_tot_lwp (time) float32 dask.array\n", + " cloud_tot_iwp (time) float32 dask.array\n", + " cloud_fraction (time) float32 dask.array\n", + " lat (time) float32 36.61 36.61 ... 36.61\n", + " lon (time) float32 -97.49 ... -97.49\n", + " alt (time) float32 315.0 315.0 ... 315.0\n", + "Attributes:\n", + " Date: Thu Jun 4 22:13:53 2009\n", + " Version: Version: ver1.5\n", + " missing_value: -9999.0\n", + " _file_dates: ['20060225', '20060227']\n", + " _file_times: ['002000', '002000']\n", + " datastream: sgpbbhrpavg1mlawerC1.c1\n", + " _datastream: sgpbbhrpavg1mlawerC1.c1\n", + " _arm_standards_flag: 1" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['pressure', 'temperature', 'column_ozone']" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "901cca0c06a4463aa7bea7658c637e66", + "version_major": 2, + "version_minor": 0 + }, + "image/png": "", + "text/html": [ + "\n", + "
\n", + "
\n", + " Figure\n", + "
\n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'flux'" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "'flux'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/dataset.py:1348\u001b[0m, in \u001b[0;36mDataset._construct_dataarray\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 1347\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1348\u001b[0m variable \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_variables\u001b[49m\u001b[43m[\u001b[49m\u001b[43mname\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 1349\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n", + "\u001b[0;31mKeyError\u001b[0m: 'flux'", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[11], line 7\u001b[0m\n\u001b[1;32m 5\u001b[0m qc_display \u001b[38;5;241m=\u001b[39m act\u001b[38;5;241m.\u001b[39mplotting\u001b[38;5;241m.\u001b[39mTimeSeriesDisplay(ds)\n\u001b[1;32m 6\u001b[0m qc_display\u001b[38;5;241m.\u001b[39madd_subplots((\u001b[38;5;241m2\u001b[39m,), figsize \u001b[38;5;241m=\u001b[39m (\u001b[38;5;241m9.5\u001b[39m,\u001b[38;5;241m10\u001b[39m))\n\u001b[0;32m----> 7\u001b[0m qc_ax \u001b[38;5;241m=\u001b[39m \u001b[43mqc_display\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mplot\u001b[49m\u001b[43m(\u001b[49m\u001b[43mqc_variable\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msubplot_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mset_title\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mQC results on field: \u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mqc_variable\u001b[49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 8\u001b[0m qc_ax\u001b[38;5;241m.\u001b[39mgrid()\n\u001b[1;32m 9\u001b[0m qc_display\u001b[38;5;241m.\u001b[39mqc_flag_block_plot(qc_variable, subplot_index\u001b[38;5;241m=\u001b[39m(\u001b[38;5;241m1\u001b[39m,))\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/plotting/timeseriesdisplay.py:418\u001b[0m, in \u001b[0;36mTimeSeriesDisplay.plot\u001b[0;34m(self, field, dsname, subplot_index, cmap, set_title, add_nan, day_night_background, invert_y_axis, abs_limits, time_rng, y_rng, use_var_for_y, set_shading, assessment_overplot, overplot_marker, overplot_behind, overplot_markersize, assessment_overplot_category, assessment_overplot_category_color, force_line_plot, labels, cbar_label, cbar_h_adjust, secondary_y, y_axis_flag_meanings, colorbar_labels, cb_friendly, **kwargs)\u001b[0m\n\u001b[1;32m 415\u001b[0m assessment_overplot_category_color[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mAcceptable\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m (\u001b[38;5;241m0.0\u001b[39m, \u001b[38;5;241m0.4240129715562796\u001b[39m, \u001b[38;5;241m0.4240129715562796\u001b[39m),\n\u001b[1;32m 417\u001b[0m \u001b[38;5;66;03m# Get data and dimensions\u001b[39;00m\n\u001b[0;32m--> 418\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_obj\u001b[49m\u001b[43m[\u001b[49m\u001b[43mdsname\u001b[49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[43mfield\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 419\u001b[0m dim \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_obj[dsname][field]\u001b[38;5;241m.\u001b[39mdims)\n\u001b[1;32m 420\u001b[0m xdata \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_obj[dsname][dim[\u001b[38;5;241m0\u001b[39m]]\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/dataset.py:1439\u001b[0m, in \u001b[0;36mDataset.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1437\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39misel(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkey)\n\u001b[1;32m 1438\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m utils\u001b[38;5;241m.\u001b[39mhashable(key):\n\u001b[0;32m-> 1439\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_construct_dataarray\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1440\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m utils\u001b[38;5;241m.\u001b[39miterable_of_hashable(key):\n\u001b[1;32m 1441\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_copy_listed(key)\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/dataset.py:1350\u001b[0m, in \u001b[0;36mDataset._construct_dataarray\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 1348\u001b[0m variable \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_variables[name]\n\u001b[1;32m 1349\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[0;32m-> 1350\u001b[0m _, name, variable \u001b[38;5;241m=\u001b[39m \u001b[43m_get_virtual_variable\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_variables\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdims\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1352\u001b[0m needed_dims \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m(variable\u001b[38;5;241m.\u001b[39mdims)\n\u001b[1;32m 1354\u001b[0m coords: \u001b[38;5;28mdict\u001b[39m[Hashable, Variable] \u001b[38;5;241m=\u001b[39m {}\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/dataset.py:186\u001b[0m, in \u001b[0;36m_get_virtual_variable\u001b[0;34m(variables, key, dim_sizes)\u001b[0m\n\u001b[1;32m 184\u001b[0m split_key \u001b[38;5;241m=\u001b[39m key\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m 185\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(split_key) \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m2\u001b[39m:\n\u001b[0;32m--> 186\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key)\n\u001b[1;32m 188\u001b[0m ref_name, var_name \u001b[38;5;241m=\u001b[39m split_key\n\u001b[1;32m 189\u001b[0m ref_var \u001b[38;5;241m=\u001b[39m variables[ref_name]\n", + "\u001b[0;31mKeyError\u001b[0m: 'flux'" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "ae3815a803c64f629cc4a6feb7130f6b", + "version_major": 2, + "version_minor": 0 + }, + "image/png": "", + "text/html": [ + "\n", + "
\n", + "
\n", + " Figure\n", + "
\n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'pressure'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/BBHRP/1bbhrpripbe1mcfarlane.c1.ipynb b/VAPs/quicklook/BBHRP/1bbhrpripbe1mcfarlane.c1.ipynb new file mode 100644 index 00000000..250c9efc --- /dev/null +++ b/VAPs/quicklook/BBHRP/1bbhrpripbe1mcfarlane.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# 1BBHRPRIPBE1MCFARLANE.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/bbhrp) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = '1bbhrpripbe1mcfarlane'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2011-06-05', 'facility': 'C1', 'site': 'sgp', 'start_date': '2002-03-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2011-06-03'\n", + "date_end = '2011-06-05'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['cloud_flag', 'trop_level', 'long_heating_rate']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'down_long_surf_flux'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'cloud_flag'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/BBHRP/30bbhrpripbe1mcfarlane.c1.ipynb b/VAPs/quicklook/BBHRP/30bbhrpripbe1mcfarlane.c1.ipynb new file mode 100644 index 00000000..e1d1d235 --- /dev/null +++ b/VAPs/quicklook/BBHRP/30bbhrpripbe1mcfarlane.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# 30BBHRPRIPBE1MCFARLANE.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/bbhrp) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = '30bbhrpripbe1mcfarlane'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2010-12-30', 'facility': 'C1', 'site': 'sgp', 'start_date': '2002-03-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2010-12-28'\n", + "date_end = '2010-12-30'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['long_heating_rate', 'long_heating_rate_std', 'long_heating_rate_frac']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'long_heating_rate'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'long_heating_rate'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/BBHRP/BBHRP_tutorial.ipynb b/VAPs/quicklook/BBHRP/BBHRP_tutorial.ipynb new file mode 100644 index 00000000..17e8097d --- /dev/null +++ b/VAPs/quicklook/BBHRP/BBHRP_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# 1BBHRPRIPBE1MCFARLANE.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/bbhrp) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using 1bbhrpripbe1mcfarlane as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `1bbhrpripbe1mcfarlane.c1`, where `1bbhrpripbe1mcfarlane` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `sgp` and facility `C1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/sgp/sgp1bbhrpripbe1mcfarlaneC1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"1bbhrpripbe1mcfarlane\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"sgp\"\n", + "facility = \"C1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/BBHRP/bbhrpavg1mlawer.c1.ipynb b/VAPs/quicklook/BBHRP/bbhrpavg1mlawer.c1.ipynb new file mode 100644 index 00000000..7c0b6d4d --- /dev/null +++ b/VAPs/quicklook/BBHRP/bbhrpavg1mlawer.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# BBHRPAVG1MLAWER.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/bbhrp) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'bbhrpavg1mlawer'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2006-02-27', 'facility': 'C1', 'site': 'sgp', 'start_date': '2000-03-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2006-02-25'\n", + "date_end = '2006-02-27'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['pressure', 'temperature', 'column_ozone']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'flux'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'pressure'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/BEFLUX/BEFLUX_tutorial.ipynb b/VAPs/quicklook/BEFLUX/BEFLUX_tutorial.ipynb new file mode 100644 index 00000000..83809f02 --- /dev/null +++ b/VAPs/quicklook/BEFLUX/BEFLUX_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# BEFLUX1LONG.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/beflux) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using beflux1long as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `beflux1long.c1`, where `beflux1long` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `sgp` and facility `C1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/sgp/sgpbeflux1longC1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"beflux1long\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"sgp\"\n", + "facility = \"C1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/BEFLUX/beflux1long.c1.ipynb b/VAPs/quicklook/BEFLUX/beflux1long.c1.ipynb new file mode 100644 index 00000000..02a9622a --- /dev/null +++ b/VAPs/quicklook/BEFLUX/beflux1long.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# BEFLUX1LONG.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/beflux) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'beflux1long'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2023-12-18', 'facility': 'C1', 'site': 'sgp', 'start_date': '1995-05-19'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2023-12-16'\n", + "date_end = '2023-12-18'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['down_short_hemisp', 'down_long_hemisp', 'short_direct_normal']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'down_short_hemisp'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/BEFLUX/qcflux1long.c1.ipynb b/VAPs/quicklook/BEFLUX/qcflux1long.c1.ipynb new file mode 100644 index 00000000..c8ae1ac1 --- /dev/null +++ b/VAPs/quicklook/BEFLUX/qcflux1long.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# QCFLUX1LONG.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/beflux) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'qcflux1long'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2023-12-18', 'facility': 'C1', 'site': 'sgp', 'start_date': '1997-03-21'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2023-12-16'\n", + "date_end = '2023-12-18'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['down_short_hemisp', 'down_long_hemisp', 'short_direct_normal']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'down_short_hemisp'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/CCNKAPPA/CCNKAPPA_tutorial.ipynb b/VAPs/quicklook/CCNKAPPA/CCNKAPPA_tutorial.ipynb new file mode 100644 index 00000000..9d9602fc --- /dev/null +++ b/VAPs/quicklook/CCNKAPPA/CCNKAPPA_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AOSCCNSMPSKAPPA.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/ccnkappa) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using aosccnsmpskappa as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `aosccnsmpskappa.c1`, where `aosccnsmpskappa` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `cor` and facility `M1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/cor/coraosccnsmpskappaM1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"aosccnsmpskappa\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"cor\"\n", + "facility = \"M1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/CCNKAPPA/aosccnsmpskappa.c1.ipynb b/VAPs/quicklook/CCNKAPPA/aosccnsmpskappa.c1.ipynb new file mode 100644 index 00000000..4a49117e --- /dev/null +++ b/VAPs/quicklook/CCNKAPPA/aosccnsmpskappa.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AOSCCNSMPSKAPPA.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/ccnkappa) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'aosccnsmpskappa'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2019-04-30', 'facility': 'M1', 'site': 'cor', 'start_date': '2018-09-28'}, {'end_date': '2017-10-20', 'facility': 'M1', 'site': 'asi', 'start_date': '2016-05-20'}, {'end_date': '2020-04-23', 'facility': 'M1', 'site': 'anx', 'start_date': '2019-12-01'}, {'end_date': '2023-09-16', 'facility': 'M1', 'site': 'epc', 'start_date': '2023-05-23'}, {'end_date': '2022-09-02', 'facility': 'M1', 'site': 'hou', 'start_date': '2021-09-15'}, {'end_date': '2023-08-23', 'facility': 'C1', 'site': 'ena', 'start_date': '2022-11-01'}, {'end_date': '2021-10-15', 'facility': 'M1', 'site': 'guc', 'start_date': '2021-09-01'}, {'end_date': '2020-10-01', 'facility': 'M1', 'site': 'mos', 'start_date': '2019-10-09'}, {'end_date': '2023-08-18', 'facility': 'E13', 'site': 'sgp', 'start_date': '2017-04-12'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'E13' )\n", + "\n", + "date_start = '2023-08-16'\n", + "date_end = '2023-08-18'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['kappa', 'critical_diameter', 'aerosol_number_concentration']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'kappa'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'kappa'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/CCNPROF/.ipynb_checkpoints/rlccnprof1ghan.c1-checkpoint.ipynb b/VAPs/quicklook/CCNPROF/.ipynb_checkpoints/rlccnprof1ghan.c1-checkpoint.ipynb new file mode 100644 index 00000000..696f23b7 --- /dev/null +++ b/VAPs/quicklook/CCNPROF/.ipynb_checkpoints/rlccnprof1ghan.c1-checkpoint.ipynb @@ -0,0 +1,4109 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# RLCCNPROF1GHAN.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/ccnprof) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'rlccnprof1ghan'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2014-06-24', 'facility': 'C1', 'site': 'sgp', 'start_date': '2006-09-15'}]" + ] + }, + { + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ac6764f5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The following locations and date ranges are available for this VAP:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sitefacilitystart_dateend_date
0sgpC12006-09-152014-06-24
\n", + "
" + ], + "text/plain": [ + " site facility start_date end_date\n", + "0 sgp C1 2006-09-15 2014-06-24" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2014-06-22'\n", + "date_end = '2014-06-24'" + ] + }, + { + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/data/archive/sgp/sgprlccnprof1ghanC1.c1'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['20140622', '20140623', '20140624']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "51feea2e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['/data/archive/sgp/sgprlccnprof1ghanC1.c1/sgprlccnprof1ghanC1.c1.20140624.000000.cdf']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1 files loaded\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:                           (time: 24, height: 52, ss_step: 7,\n",
+       "                                       param2: 2)\n",
+       "Coordinates:\n",
+       "  * time                              (time) datetime64[ns] 2014-06-24 ... 20...\n",
+       "  * height                            (height) float32 0.15 0.225 ... 3.9 3.975\n",
+       "  * ss_step                           (ss_step) float32 1.0 2.0 3.0 ... 6.0 7.0\n",
+       "Dimensions without coordinates: param2\n",
+       "Data variables: (12/62)\n",
+       "    base_time                         datetime64[ns] 2014-06-24\n",
+       "    time_offset                       (time) datetime64[ns] 2014-06-24 ... 20...\n",
+       "    qc_time                           (time) int32 dask.array<chunksize=(24,), meta=np.ndarray>\n",
+       "    rh_mean                           (time, height) float32 dask.array<chunksize=(24, 52), meta=np.ndarray>\n",
+       "    qc_rh_mean                        (time, height) int32 dask.array<chunksize=(24, 52), meta=np.ndarray>\n",
+       "    rh_std_dev                        (time, height) float32 dask.array<chunksize=(24, 52), meta=np.ndarray>\n",
+       "    ...                                ...\n",
+       "    qc_N_CCN_7                        (time) int32 dask.array<chunksize=(24,), meta=np.ndarray>\n",
+       "    temperature_second_deriv          (time, height) float32 dask.array<chunksize=(24, 52), meta=np.ndarray>\n",
+       "    cbh                               (time) float32 dask.array<chunksize=(24,), meta=np.ndarray>\n",
+       "    lat                               float32 ...\n",
+       "    lon                               float32 ...\n",
+       "    alt                               float32 ...\n",
+       "Attributes: (12/17)\n",
+       "    command_line:                   ccnprof -s sgp -f C1 -b 20140624 -e 20140...\n",
+       "    process_version:                v1.2\n",
+       "    dod_version:                    rlccnprof1ghan-c1-0.5\n",
+       "    site_id:                        sgp\n",
+       "    facility_id:                    C1: Lamont, Oklahoma\n",
+       "    input_datastreams:              sgpaosccn100C1.a1 : 12.9 : 20140624.00000...\n",
+       "    ...                             ...\n",
+       "    history:                        created by user dsmgr on machine iron at ...\n",
+       "    _file_dates:                    ['20140624']\n",
+       "    _file_times:                    ['000000']\n",
+       "    datastream:                     \n",
+       "    _datastream:                    \n",
+       "    _arm_standards_flag:            1
" + ], + "text/plain": [ + "\n", + "Dimensions: (time: 24, height: 52, ss_step: 7,\n", + " param2: 2)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 2014-06-24 ... 20...\n", + " * height (height) float32 0.15 0.225 ... 3.9 3.975\n", + " * ss_step (ss_step) float32 1.0 2.0 3.0 ... 6.0 7.0\n", + "Dimensions without coordinates: param2\n", + "Data variables: (12/62)\n", + " base_time datetime64[ns] 2014-06-24\n", + " time_offset (time) datetime64[ns] 2014-06-24 ... 20...\n", + " qc_time (time) int32 dask.array\n", + " rh_mean (time, height) float32 dask.array\n", + " qc_rh_mean (time, height) int32 dask.array\n", + " rh_std_dev (time, height) float32 dask.array\n", + " ... ...\n", + " qc_N_CCN_7 (time) int32 dask.array\n", + " temperature_second_deriv (time, height) float32 dask.array\n", + " cbh (time) float32 dask.array\n", + " lat float32 ...\n", + " lon float32 ...\n", + " alt float32 ...\n", + "Attributes: (12/17)\n", + " command_line: ccnprof -s sgp -f C1 -b 20140624 -e 20140...\n", + " process_version: v1.2\n", + " dod_version: rlccnprof1ghan-c1-0.5\n", + " site_id: sgp\n", + " facility_id: C1: Lamont, Oklahoma\n", + " input_datastreams: sgpaosccn100C1.a1 : 12.9 : 20140624.00000...\n", + " ... ...\n", + " history: created by user dsmgr on machine iron at ...\n", + " _file_dates: ['20140624']\n", + " _file_times: ['000000']\n", + " datastream: \n", + " _datastream: \n", + " _arm_standards_flag: 1" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['nsteps', 'rh_mean', 'rh_std_dev']" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "'nsteps'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[9], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m ts_display\u001b[38;5;241m.\u001b[39madd_subplots((\u001b[38;5;28mlen\u001b[39m(variables_to_plot),), figsize \u001b[38;5;241m=\u001b[39m (\u001b[38;5;241m9.5\u001b[39m,\u001b[38;5;241m4\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mlen\u001b[39m(variables_to_plot)))\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i,v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(variables_to_plot):\n\u001b[0;32m----> 5\u001b[0m ts_ax \u001b[38;5;241m=\u001b[39m ts_display\u001b[38;5;241m.\u001b[39mplot(v, subplot_index\u001b[38;5;241m=\u001b[39m(i,), set_title\u001b[38;5;241m=\u001b[39m\u001b[43mds\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvariables\u001b[49m\u001b[43m[\u001b[49m\u001b[43mv\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241m.\u001b[39mattrs[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlong_name\u001b[39m\u001b[38;5;124m'\u001b[39m],)\n\u001b[1;32m 6\u001b[0m ts_ax\u001b[38;5;241m.\u001b[39mgrid()\n\u001b[1;32m 8\u001b[0m plt\u001b[38;5;241m.\u001b[39mshow()\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/utils.py:453\u001b[0m, in \u001b[0;36mFrozen.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 452\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__getitem__\u001b[39m(\u001b[38;5;28mself\u001b[39m, key: K) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m V:\n\u001b[0;32m--> 453\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmapping\u001b[49m\u001b[43m[\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m]\u001b[49m\n", + "\u001b[0;31mKeyError\u001b[0m: 'nsteps'" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b9705370d4b8484a87f6a3e53aec927d", + "version_major": 2, + "version_minor": 0 + }, + "image/png": "", + "text/html": [ + "\n", + "
\n", + "
\n", + " Figure\n", + "
\n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'rh_mean'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'nsteps'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/CCNPROF/CCNPROF_tutorial.ipynb b/VAPs/quicklook/CCNPROF/CCNPROF_tutorial.ipynb new file mode 100644 index 00000000..4a92854b --- /dev/null +++ b/VAPs/quicklook/CCNPROF/CCNPROF_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# RLCCNPROF1GHAN.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/ccnprof) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using rlccnprof1ghan as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `rlccnprof1ghan.c1`, where `rlccnprof1ghan` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `sgp` and facility `C1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/sgp/sgprlccnprof1ghanC1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"rlccnprof1ghan\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"sgp\"\n", + "facility = \"C1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/CCNPROF/rlccnprof1ghan.c1.ipynb b/VAPs/quicklook/CCNPROF/rlccnprof1ghan.c1.ipynb new file mode 100644 index 00000000..78c85732 --- /dev/null +++ b/VAPs/quicklook/CCNPROF/rlccnprof1ghan.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# RLCCNPROF1GHAN.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/ccnprof) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'rlccnprof1ghan'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2014-06-24', 'facility': 'C1', 'site': 'sgp', 'start_date': '2006-09-15'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2014-06-22'\n", + "date_end = '2014-06-24'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['nsteps', 'rh_mean', 'rh_std_dev']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'rh_mean'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'nsteps'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/CLAP/CLAP_tutorial.ipynb b/VAPs/quicklook/CLAP/CLAP_tutorial.ipynb new file mode 100644 index 00000000..9ef362ce --- /dev/null +++ b/VAPs/quicklook/CLAP/CLAP_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AOSCLAP3W.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/clap) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using aosclap3w as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `aosclap3w.c1`, where `aosclap3w` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `pvc` and facility `M1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/pvc/pvcaosclap3wM1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"aosclap3w\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"pvc\"\n", + "facility = \"M1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/CLAP/aosclap3w.c1.ipynb b/VAPs/quicklook/CLAP/aosclap3w.c1.ipynb new file mode 100644 index 00000000..728c580e --- /dev/null +++ b/VAPs/quicklook/CLAP/aosclap3w.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AOSCLAP3W.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/clap) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'aosclap3w'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2013-06-24', 'facility': 'M1', 'site': 'pvc', 'start_date': '2012-07-16'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'pvc', 'M1' )\n", + "\n", + "date_start = '2013-06-22'\n", + "date_end = '2013-06-24'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['Ba_B_CLAP3W_1', 'Ba_G_CLAP3W_1', 'Ba_R_CLAP3W_1']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'Ba_B_CLAP3W_1'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'Ba_B_CLAP3W_1'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/CLDTYPE/CLDTYPE_tutorial.ipynb b/VAPs/quicklook/CLDTYPE/CLDTYPE_tutorial.ipynb new file mode 100644 index 00000000..41ff471b --- /dev/null +++ b/VAPs/quicklook/CLDTYPE/CLDTYPE_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# CLDTYPE.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/cldtype) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using cldtype as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `cldtype.c1`, where `cldtype` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `anx` and facility `M1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/anx/anxcldtypeM1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"cldtype\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"anx\"\n", + "facility = \"M1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/CLDTYPE/cldtype.c1.ipynb b/VAPs/quicklook/CLDTYPE/cldtype.c1.ipynb new file mode 100644 index 00000000..c7e2636a --- /dev/null +++ b/VAPs/quicklook/CLDTYPE/cldtype.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# CLDTYPE.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/cldtype) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'cldtype'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2020-05-31', 'facility': 'M1', 'site': 'anx', 'start_date': '2019-12-01'}, {'end_date': '2023-08-30', 'facility': 'C1', 'site': 'ena', 'start_date': '2015-07-17'}, {'end_date': '2022-09-30', 'facility': 'M1', 'site': 'hou', 'start_date': '2021-10-01'}, {'end_date': '2023-06-15', 'facility': 'M1', 'site': 'guc', 'start_date': '2021-09-14'}, {'end_date': '2019-04-30', 'facility': 'M1', 'site': 'cor', 'start_date': '2018-09-23'}, {'end_date': '2023-08-30', 'facility': 'C1', 'site': 'nsa', 'start_date': '1998-03-25'}, {'end_date': '2023-08-30', 'facility': 'C1', 'site': 'sgp', 'start_date': '1996-11-08'}, {'end_date': '2014-05-03', 'facility': 'C1', 'site': 'twp', 'start_date': '1999-07-01'}, {'end_date': '2009-02-14', 'facility': 'C2', 'site': 'twp', 'start_date': '1998-11-01'}, {'end_date': '2014-05-03', 'facility': 'C3', 'site': 'twp', 'start_date': '2003-01-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2023-08-28'\n", + "date_end = '2023-08-30'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['cloudtype', 'cloud_base_best_estimate', 'cloud_layer_top_height']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'cloudtype'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'cloudtype'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/CMAC2/.ipynb_checkpoints/cmac2.c1-checkpoint.ipynb b/VAPs/quicklook/CMAC2/.ipynb_checkpoints/cmac2.c1-checkpoint.ipynb new file mode 100644 index 00000000..425a9cbc --- /dev/null +++ b/VAPs/quicklook/CMAC2/.ipynb_checkpoints/cmac2.c1-checkpoint.ipynb @@ -0,0 +1,3537 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# CMAC2.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/cmac2) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'cmac2'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2019-01-22', 'facility': 'I4', 'site': 'sgp', 'start_date': '2018-08-30'}, {'end_date': '2019-04-05', 'facility': 'I5', 'site': 'sgp', 'start_date': '2018-08-30'}, {'end_date': '2019-02-26', 'facility': 'I6', 'site': 'sgp', 'start_date': '2018-08-30'}]" + ] + }, + { + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ac6764f5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The following locations and date ranges are available for this VAP:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sitefacilitystart_dateend_date
0sgpI42018-08-302019-01-22
1sgpI52018-08-302019-04-05
2sgpI62018-08-302019-02-26
\n", + "
" + ], + "text/plain": [ + " site facility start_date end_date\n", + "0 sgp I4 2018-08-30 2019-01-22\n", + "1 sgp I5 2018-08-30 2019-04-05\n", + "2 sgp I6 2018-08-30 2019-02-26" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'I4' )\n", + "\n", + "date_start = '2019-01-21'\n", + "date_end = '2019-01-22'" + ] + }, + { + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/data/archive/sgp/sgpcmac2I4.c1'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['20190121', '20190122']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "51feea2e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['/data/archive/sgp/sgpcmac2I4.c1/sgpcmac2I4.c1.20190122.020009.nc',\n", + " '/data/archive/sgp/sgpcmac2I4.c1/sgpcmac2I4.c1.20190122.044928.nc',\n", + " '/data/archive/sgp/sgpcmac2I4.c1/sgpcmac2I4.c1.20190122.024236.nc',\n", + " '/data/archive/sgp/sgpcmac2I4.c1/sgpcmac2I4.c1.20190122.005559.nc',\n", + " '/data/archive/sgp/sgpcmac2I4.c1/sgpcmac2I4.c1.20190122.003457.nc',\n", + " '/data/archive/sgp/sgpcmac2I4.c1/sgpcmac2I4.c1.20190122.034552.nc',\n", + " '/data/archive/sgp/sgpcmac2I4.c1/sgpcmac2I4.c1.20190122.042813.nc',\n", + " '/data/archive/sgp/sgpcmac2I4.c1/sgpcmac2I4.c1.20190122.022111.nc',\n", + " '/data/archive/sgp/sgpcmac2I4.c1/sgpcmac2I4.c1.20190122.030339.nc',\n", + " '/data/archive/sgp/sgpcmac2I4.c1/sgpcmac2I4.c1.20190122.051031.nc',\n", + " '/data/archive/sgp/sgpcmac2I4.c1/sgpcmac2I4.c1.20190122.040656.nc',\n", + " '/data/archive/sgp/sgpcmac2I4.c1/sgpcmac2I4.c1.20190122.011706.nc',\n", + " '/data/archive/sgp/sgpcmac2I4.c1/sgpcmac2I4.c1.20190122.013821.nc',\n", + " '/data/archive/sgp/sgpcmac2I4.c1/sgpcmac2I4.c1.20190122.032444.nc',\n", + " '/data/archive/sgp/sgpcmac2I4.c1/sgpcmac2I4.c1.20190122.001346.nc']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "15 files loaded\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/kefeimo/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/qc/clean.py:234: RuntimeWarning: invalid value encountered in cast\n", + " data = data.astype(dtype)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:                                   (time: 97200, range: 1001,\n",
+       "                                               sweep: 18)\n",
+       "Coordinates:\n",
+       "  * time                                      (time) datetime64[ns] 2019-01-2...\n",
+       "  * range                                     (range) float32 0.0 ... 1e+05\n",
+       "    azimuth                                   (time) float32 dask.array<chunksize=(6480,), meta=np.ndarray>\n",
+       "    elevation                                 (time) float32 dask.array<chunksize=(6480,), meta=np.ndarray>\n",
+       "Dimensions without coordinates: sweep\n",
+       "Data variables: (12/49)\n",
+       "    base_time                                 (time) datetime64[ns] 2019-01-2...\n",
+       "    time_offset                               (time) datetime64[ns] 2019-01-2...\n",
+       "    reflectivity                              (time, range) float32 dask.array<chunksize=(6480, 1001), meta=np.ndarray>\n",
+       "    cross_correlation_ratio_hv                (time, range) float32 dask.array<chunksize=(6480, 1001), meta=np.ndarray>\n",
+       "    normalized_coherent_power                 (time, range) float32 dask.array<chunksize=(6480, 1001), meta=np.ndarray>\n",
+       "    mean_doppler_velocity                     (time, range) float32 dask.array<chunksize=(6480, 1001), meta=np.ndarray>\n",
+       "    ...                                        ...\n",
+       "    path_integrated_attenuation               (time, range) float32 dask.array<chunksize=(6480, 1001), meta=np.ndarray>\n",
+       "    corrected_differential_reflectivity       (time, range) float32 dask.array<chunksize=(6480, 1001), meta=np.ndarray>\n",
+       "    ground_clutter                            (time, range) float32 dask.array<chunksize=(6480, 1001), meta=np.ndarray>\n",
+       "    lat                                       (time) float32 36.58 ... 36.58\n",
+       "    lon                                       (time) float32 -97.36 ... -97.36\n",
+       "    alt                                       (time) float32 330.0 ... 330.0\n",
+       "Attributes: (12/26)\n",
+       "    Conventions:           ARM-1.0 CF/Radial instrument_parameters\n",
+       "    title:                 Atmospheric Radiation Measurement (ARM) program X-...\n",
+       "    institution:           United States Department of Energy - Atmospheric R...\n",
+       "    references:            See XSAPR Instrument Handbook\n",
+       "    source:                Atmospheric Radiation Measurement (ARM) program X-...\n",
+       "    comment:               Data in this file has not be calibrated, corrected...\n",
+       "    ...                    ...\n",
+       "    original_container:    sigmet\n",
+       "    history:               created by user rjackson on machine or-condo-c215....\n",
+       "    _file_dates:           ['20190122', '20190122', '20190122', '20190122', '...\n",
+       "    _file_times:           ['001346', '003457', '005559', '011706', '013821',...\n",
+       "    _datastream:           sgpadicmac2I4.c1\n",
+       "    _arm_standards_flag:   1
" + ], + "text/plain": [ + "\n", + "Dimensions: (time: 97200, range: 1001,\n", + " sweep: 18)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 2019-01-2...\n", + " * range (range) float32 0.0 ... 1e+05\n", + " azimuth (time) float32 dask.array\n", + " elevation (time) float32 dask.array\n", + "Dimensions without coordinates: sweep\n", + "Data variables: (12/49)\n", + " base_time (time) datetime64[ns] 2019-01-2...\n", + " time_offset (time) datetime64[ns] 2019-01-2...\n", + " reflectivity (time, range) float32 dask.array\n", + " cross_correlation_ratio_hv (time, range) float32 dask.array\n", + " normalized_coherent_power (time, range) float32 dask.array\n", + " mean_doppler_velocity (time, range) float32 dask.array\n", + " ... ...\n", + " path_integrated_attenuation (time, range) float32 dask.array\n", + " corrected_differential_reflectivity (time, range) float32 dask.array\n", + " ground_clutter (time, range) float32 dask.array\n", + " lat (time) float32 36.58 ... 36.58\n", + " lon (time) float32 -97.36 ... -97.36\n", + " alt (time) float32 330.0 ... 330.0\n", + "Attributes: (12/26)\n", + " Conventions: ARM-1.0 CF/Radial instrument_parameters\n", + " title: Atmospheric Radiation Measurement (ARM) program X-...\n", + " institution: United States Department of Energy - Atmospheric R...\n", + " references: See XSAPR Instrument Handbook\n", + " source: Atmospheric Radiation Measurement (ARM) program X-...\n", + " comment: Data in this file has not be calibrated, corrected...\n", + " ... ...\n", + " original_container: sigmet\n", + " history: created by user rjackson on machine or-condo-c215....\n", + " _file_dates: ['20190122', '20190122', '20190122', '20190122', '...\n", + " _file_times: ['001346', '003457', '005559', '011706', '013821',...\n", + " _datastream: sgpadicmac2I4.c1\n", + " _arm_standards_flag: 1" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['reflectivity', 'cross_correlation_ratio_hv', 'normalized_coherent_power']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'reflectivity'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/CMAC2/CMAC2_tutorial.ipynb b/VAPs/quicklook/CMAC2/CMAC2_tutorial.ipynb new file mode 100644 index 00000000..144d0787 --- /dev/null +++ b/VAPs/quicklook/CMAC2/CMAC2_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# CMAC2.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/cmac2) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using cmac2 as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `cmac2.c1`, where `cmac2` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `sgp` and facility `I4`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/sgp/sgpcmac2I4.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"cmac2\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"sgp\"\n", + "facility = \"I4\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/CMAC2/cmac2.c1.ipynb b/VAPs/quicklook/CMAC2/cmac2.c1.ipynb new file mode 100644 index 00000000..5a555298 --- /dev/null +++ b/VAPs/quicklook/CMAC2/cmac2.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# CMAC2.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/cmac2) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'cmac2'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2019-01-22', 'facility': 'I4', 'site': 'sgp', 'start_date': '2018-08-30'}, {'end_date': '2019-04-05', 'facility': 'I5', 'site': 'sgp', 'start_date': '2018-08-30'}, {'end_date': '2019-02-26', 'facility': 'I6', 'site': 'sgp', 'start_date': '2018-08-30'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'I4' )\n", + "\n", + "date_start = '2019-01-21'\n", + "date_end = '2019-01-22'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['reflectivity', 'cross_correlation_ratio_hv', 'normalized_coherent_power']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'reflectivity'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/CO-AIR/CO-AIR_tutorial.ipynb b/VAPs/quicklook/CO-AIR/CO-AIR_tutorial.ipynb new file mode 100644 index 00000000..4c6b0624 --- /dev/null +++ b/VAPs/quicklook/CO-AIR/CO-AIR_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AAFCO.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/co-air) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using aafco as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `aafco.c1`, where `aafco` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `cor` and facility `F1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/cor/coraafcoF1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"aafco\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"cor\"\n", + "facility = \"F1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/CO-AIR/aafco.c1.ipynb b/VAPs/quicklook/CO-AIR/aafco.c1.ipynb new file mode 100644 index 00000000..d222d0fb --- /dev/null +++ b/VAPs/quicklook/CO-AIR/aafco.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AAFCO.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/co-air) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'aafco'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2018-12-08', 'facility': 'F1', 'site': 'cor', 'start_date': '2018-11-04'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'cor', 'F1' )\n", + "\n", + "date_start = '2018-12-06'\n", + "date_end = '2018-12-08'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['co', 'n2o', 'h2o']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'co'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/COGS/COGS_tutorial.ipynb b/VAPs/quicklook/COGS/COGS_tutorial.ipynb new file mode 100644 index 00000000..461cee79 --- /dev/null +++ b/VAPs/quicklook/COGS/COGS_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# COGS.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/cogs) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using cogs as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `cogs.c1`, where `cogs` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `sgp` and facility `N1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/sgp/sgpcogsN1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"cogs\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"sgp\"\n", + "facility = \"N1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/COGS/cogs.c1.ipynb b/VAPs/quicklook/COGS/cogs.c1.ipynb new file mode 100644 index 00000000..6534f003 --- /dev/null +++ b/VAPs/quicklook/COGS/cogs.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# COGS.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/cogs) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'cogs'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2019-10-27', 'facility': 'N1', 'site': 'sgp', 'start_date': '2017-09-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'N1' )\n", + "\n", + "date_start = '2019-10-25'\n", + "date_end = '2019-10-27'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['cldfrac', 'cbh']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'cldfrac'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/DIFFCOR/DIFFCOR_tutorial.ipynb b/VAPs/quicklook/DIFFCOR/DIFFCOR_tutorial.ipynb new file mode 100644 index 00000000..1b872f74 --- /dev/null +++ b/VAPs/quicklook/DIFFCOR/DIFFCOR_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# BRS1DUTT.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/diffcor) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using brs1dutt as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `brs1dutt.c1`, where `brs1dutt` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `sgp` and facility `C1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/sgp/sgpbrs1duttC1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"brs1dutt\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"sgp\"\n", + "facility = \"C1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/DIFFCOR/brs1dutt.c1.ipynb b/VAPs/quicklook/DIFFCOR/brs1dutt.c1.ipynb new file mode 100644 index 00000000..d33149cf --- /dev/null +++ b/VAPs/quicklook/DIFFCOR/brs1dutt.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# BRS1DUTT.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/diffcor) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'brs1dutt'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2001-01-22', 'facility': 'C1', 'site': 'sgp', 'start_date': '1993-09-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2001-01-20'\n", + "date_end = '2001-01-22'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['down_short_hemisp_sum', 'status_down_short_hemisp_sum', 'up_short_hemisp']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'down_short_hemisp_sum'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/DIFFCOR/siros1dutt.c1.ipynb b/VAPs/quicklook/DIFFCOR/siros1dutt.c1.ipynb new file mode 100644 index 00000000..f5e4513d --- /dev/null +++ b/VAPs/quicklook/DIFFCOR/siros1dutt.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# SIROS1DUTT.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/diffcor) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'siros1dutt'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '1997-10-30', 'facility': 'E10', 'site': 'sgp', 'start_date': '1995-07-21'}, {'end_date': '1997-08-21', 'facility': 'E11', 'site': 'sgp', 'start_date': '1995-06-30'}, {'end_date': '1997-10-26', 'facility': 'E12', 'site': 'sgp', 'start_date': '1996-01-19'}, {'end_date': '1997-08-25', 'facility': 'E13', 'site': 'sgp', 'start_date': '1994-01-07'}, {'end_date': '1997-08-14', 'facility': 'E15', 'site': 'sgp', 'start_date': '1994-01-12'}, {'end_date': '1997-08-20', 'facility': 'E16', 'site': 'sgp', 'start_date': '1995-06-02'}, {'end_date': '1997-09-15', 'facility': 'E18', 'site': 'sgp', 'start_date': '1996-06-20'}, {'end_date': '1997-11-20', 'facility': 'E1', 'site': 'sgp', 'start_date': '1995-11-15'}, {'end_date': '1998-02-10', 'facility': 'E20', 'site': 'sgp', 'start_date': '1994-11-03'}, {'end_date': '1997-11-25', 'facility': 'E22', 'site': 'sgp', 'start_date': '1995-03-16'}, {'end_date': '1997-11-25', 'facility': 'E24', 'site': 'sgp', 'start_date': '1995-11-07'}, {'end_date': '1997-11-05', 'facility': 'E2', 'site': 'sgp', 'start_date': '1996-03-07'}, {'end_date': '1997-11-04', 'facility': 'E3', 'site': 'sgp', 'start_date': '1996-03-06'}, {'end_date': '1997-08-07', 'facility': 'E4', 'site': 'sgp', 'start_date': '1995-05-08'}, {'end_date': '1997-11-06', 'facility': 'E5', 'site': 'sgp', 'start_date': '1996-06-14'}, {'end_date': '1997-11-05', 'facility': 'E6', 'site': 'sgp', 'start_date': '1996-03-05'}, {'end_date': '1997-10-31', 'facility': 'E7', 'site': 'sgp', 'start_date': '1995-05-18'}, {'end_date': '1997-07-14', 'facility': 'E8', 'site': 'sgp', 'start_date': '1995-09-22'}, {'end_date': '1998-02-03', 'facility': 'E9', 'site': 'sgp', 'start_date': '1994-01-12'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'E10' )\n", + "\n", + "date_start = '1997-10-28'\n", + "date_end = '1997-10-30'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['down_short_hemisp_sum', 'status_down_short_hemisp_sum', 'up_short_hemisp']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'down_short_hemisp_sum'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/DIFFCOR/sirs1dutt.c1.ipynb b/VAPs/quicklook/DIFFCOR/sirs1dutt.c1.ipynb new file mode 100644 index 00000000..faca610d --- /dev/null +++ b/VAPs/quicklook/DIFFCOR/sirs1dutt.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# SIRS1DUTT.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/diffcor) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'sirs1dutt'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '1999-04-13', 'facility': 'C1', 'site': 'sgp', 'start_date': '1997-03-21'}, {'end_date': '2001-02-14', 'facility': 'E10', 'site': 'sgp', 'start_date': '1997-11-05'}, {'end_date': '2001-02-20', 'facility': 'E11', 'site': 'sgp', 'start_date': '1997-08-22'}, {'end_date': '2001-02-20', 'facility': 'E12', 'site': 'sgp', 'start_date': '1997-10-31'}, {'end_date': '2001-02-22', 'facility': 'E13', 'site': 'sgp', 'start_date': '1997-08-29'}, {'end_date': '2001-02-20', 'facility': 'E15', 'site': 'sgp', 'start_date': '1997-08-26'}, {'end_date': '2001-02-21', 'facility': 'E16', 'site': 'sgp', 'start_date': '1997-08-21'}, {'end_date': '2001-02-20', 'facility': 'E18', 'site': 'sgp', 'start_date': '1997-10-15'}, {'end_date': '2001-02-22', 'facility': 'E19', 'site': 'sgp', 'start_date': '1998-07-08'}, {'end_date': '2001-02-12', 'facility': 'E1', 'site': 'sgp', 'start_date': '1997-11-21'}, {'end_date': '2001-02-21', 'facility': 'E20', 'site': 'sgp', 'start_date': '1998-02-13'}, {'end_date': '2001-02-20', 'facility': 'E21', 'site': 'sgp', 'start_date': '1999-09-11'}, {'end_date': '2001-02-21', 'facility': 'E22', 'site': 'sgp', 'start_date': '1997-11-25'}, {'end_date': '2001-02-22', 'facility': 'E24', 'site': 'sgp', 'start_date': '1997-12-03'}, {'end_date': '2001-02-21', 'facility': 'E25', 'site': 'sgp', 'start_date': '1997-11-12'}, {'end_date': '2001-02-14', 'facility': 'E2', 'site': 'sgp', 'start_date': '1997-11-06'}, {'end_date': '2001-02-14', 'facility': 'E3', 'site': 'sgp', 'start_date': '1997-11-05'}, {'end_date': '2001-02-14', 'facility': 'E4', 'site': 'sgp', 'start_date': '1997-11-10'}, {'end_date': '2001-02-14', 'facility': 'E5', 'site': 'sgp', 'start_date': '1997-12-18'}, {'end_date': '2001-02-15', 'facility': 'E6', 'site': 'sgp', 'start_date': '1997-11-06'}, {'end_date': '2001-02-13', 'facility': 'E7', 'site': 'sgp', 'start_date': '1997-10-31'}, {'end_date': '2001-02-13', 'facility': 'E8', 'site': 'sgp', 'start_date': '1997-08-21'}, {'end_date': '2001-02-13', 'facility': 'E9', 'site': 'sgp', 'start_date': '1998-02-07'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '1999-04-11'\n", + "date_end = '1999-04-13'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['down_short_hemisp_sum', 'status_down_short_hemisp_sum', 'up_short_hemisp']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'down_short_hemisp_sum'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/DLPROF-WIND/DLPROF-WIND_tutorial.ipynb b/VAPs/quicklook/DLPROF-WIND/DLPROF-WIND_tutorial.ipynb new file mode 100644 index 00000000..7e46950f --- /dev/null +++ b/VAPs/quicklook/DLPROF-WIND/DLPROF-WIND_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# DLPROFWIND4NEWS.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/dlprof-wind) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using dlprofwind4news as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `dlprofwind4news.c1`, where `dlprofwind4news` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `asi` and facility `M1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/asi/asidlprofwind4newsM1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"dlprofwind4news\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"asi\"\n", + "facility = \"M1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/DLPROF-WIND/dlprofwind4news.c1.ipynb b/VAPs/quicklook/DLPROF-WIND/dlprofwind4news.c1.ipynb new file mode 100644 index 00000000..6cfd8dfe --- /dev/null +++ b/VAPs/quicklook/DLPROF-WIND/dlprofwind4news.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# DLPROFWIND4NEWS.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/dlprof-wind) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'dlprofwind4news'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2017-10-29', 'facility': 'M1', 'site': 'asi', 'start_date': '2016-09-06'}, {'end_date': '2020-06-01', 'facility': 'S2', 'site': 'anx', 'start_date': '2019-12-01'}, {'end_date': '2023-09-12', 'facility': 'C1', 'site': 'ena', 'start_date': '2014-10-21'}, {'end_date': '2022-09-30', 'facility': 'M1', 'site': 'hou', 'start_date': '2021-08-05'}, {'end_date': '2015-08-27', 'facility': 'M1', 'site': 'mao', 'start_date': '2014-01-31'}, {'end_date': '2023-06-15', 'facility': 'M1', 'site': 'guc', 'start_date': '2021-09-01'}, {'end_date': '2019-04-30', 'facility': 'M1', 'site': 'cor', 'start_date': '2018-09-28'}, {'end_date': '2012-03-31', 'facility': 'M1', 'site': 'pgh', 'start_date': '2011-07-21'}, {'end_date': '2023-12-17', 'facility': 'C1', 'site': 'nsa', 'start_date': '2014-07-30'}, {'end_date': '2021-06-14', 'facility': 'M1', 'site': 'oli', 'start_date': '2014-07-28'}, {'end_date': '2023-12-18', 'facility': 'C1', 'site': 'sgp', 'start_date': '2010-11-02'}, {'end_date': '2022-09-26', 'facility': 'E32', 'site': 'sgp', 'start_date': '2016-05-03'}, {'end_date': '2023-10-31', 'facility': 'E37', 'site': 'sgp', 'start_date': '2016-05-03'}, {'end_date': '2023-08-23', 'facility': 'E39', 'site': 'sgp', 'start_date': '2016-04-01'}, {'end_date': '2022-10-30', 'facility': 'E41', 'site': 'sgp', 'start_date': '2016-05-03'}, {'end_date': '2013-06-19', 'facility': 'M1', 'site': 'pvc', 'start_date': '2012-06-29'}, {'end_date': '2015-01-03', 'facility': 'C3', 'site': 'twp', 'start_date': '2010-12-13'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2023-12-16'\n", + "date_end = '2023-12-18'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['u', 'v', 'wind_speed']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'u'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/DLPROF-WSTATS/DLPROF-WSTATS_tutorial.ipynb b/VAPs/quicklook/DLPROF-WSTATS/DLPROF-WSTATS_tutorial.ipynb new file mode 100644 index 00000000..8724b42e --- /dev/null +++ b/VAPs/quicklook/DLPROF-WSTATS/DLPROF-WSTATS_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# DLPROFWSTATS4NEWS.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/dlprof-wstats) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using dlprofwstats4news as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `dlprofwstats4news.c1`, where `dlprofwstats4news` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `asi` and facility `M1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/asi/asidlprofwstats4newsM1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"dlprofwstats4news\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"asi\"\n", + "facility = \"M1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/DLPROF-WSTATS/dlprofwstats4news.c1.ipynb b/VAPs/quicklook/DLPROF-WSTATS/dlprofwstats4news.c1.ipynb new file mode 100644 index 00000000..68dd8077 --- /dev/null +++ b/VAPs/quicklook/DLPROF-WSTATS/dlprofwstats4news.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# DLPROFWSTATS4NEWS.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/dlprof-wstats) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'dlprofwstats4news'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2017-10-30', 'facility': 'M1', 'site': 'asi', 'start_date': '2016-09-06'}, {'end_date': '2020-06-01', 'facility': 'S2', 'site': 'anx', 'start_date': '2019-12-01'}, {'end_date': '2023-09-28', 'facility': 'C1', 'site': 'ena', 'start_date': '2014-10-21'}, {'end_date': '2022-09-29', 'facility': 'M1', 'site': 'hou', 'start_date': '2021-08-05'}, {'end_date': '2015-08-26', 'facility': 'M1', 'site': 'mao', 'start_date': '2014-01-01'}, {'end_date': '2023-06-14', 'facility': 'M1', 'site': 'guc', 'start_date': '2021-09-01'}, {'end_date': '2019-04-29', 'facility': 'M1', 'site': 'cor', 'start_date': '2018-09-23'}, {'end_date': '2012-03-30', 'facility': 'M1', 'site': 'pgh', 'start_date': '2011-06-21'}, {'end_date': '2023-12-17', 'facility': 'C1', 'site': 'nsa', 'start_date': '2014-07-30'}, {'end_date': '2021-06-13', 'facility': 'M1', 'site': 'oli', 'start_date': '2014-07-28'}, {'end_date': '2023-10-23', 'facility': 'C1', 'site': 'sgp', 'start_date': '2010-10-22'}, {'end_date': '2022-09-25', 'facility': 'E32', 'site': 'sgp', 'start_date': '2016-05-03'}, {'end_date': '2023-10-30', 'facility': 'E37', 'site': 'sgp', 'start_date': '2016-05-03'}, {'end_date': '2023-08-22', 'facility': 'E39', 'site': 'sgp', 'start_date': '2016-04-01'}, {'end_date': '2022-10-29', 'facility': 'E41', 'site': 'sgp', 'start_date': '2016-05-03'}, {'end_date': '2013-06-18', 'facility': 'M1', 'site': 'pvc', 'start_date': '2012-06-29'}, {'end_date': '2015-01-04', 'facility': 'C3', 'site': 'twp', 'start_date': '2010-12-13'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2023-10-21'\n", + "date_end = '2023-10-23'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['w_variance', 'w_skewness', 'w_kurtosis']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'w_variance'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/FCDP-AIR/FCDP-AIR_tutorial.ipynb b/VAPs/quicklook/FCDP-AIR/FCDP-AIR_tutorial.ipynb new file mode 100644 index 00000000..efe3ad1b --- /dev/null +++ b/VAPs/quicklook/FCDP-AIR/FCDP-AIR_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AAFFCDP.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/fcdp-air) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using aaffcdp as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `aaffcdp.c1`, where `aaffcdp` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `cor` and facility `F1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/cor/coraaffcdpF1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"aaffcdp\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"cor\"\n", + "facility = \"F1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/FCDP-AIR/aaffcdp.c1.ipynb b/VAPs/quicklook/FCDP-AIR/aaffcdp.c1.ipynb new file mode 100644 index 00000000..013560ce --- /dev/null +++ b/VAPs/quicklook/FCDP-AIR/aaffcdp.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AAFFCDP.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/fcdp-air) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'aaffcdp'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2018-12-08', 'facility': 'F1', 'site': 'cor', 'start_date': '2018-11-04'}, {'end_date': '2018-02-19', 'facility': 'F1', 'site': 'ena', 'start_date': '2017-06-21'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'cor', 'F1' )\n", + "\n", + "date_start = '2018-12-06'\n", + "date_end = '2018-12-08'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['total_number_concentration', 'number_concentration']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'total_number_concentration'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/GVR/GVR_tutorial.ipynb b/VAPs/quicklook/GVR/GVR_tutorial.ipynb new file mode 100644 index 00000000..d3ad529a --- /dev/null +++ b/VAPs/quicklook/GVR/GVR_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# GVR.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/gvr) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using gvr as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `gvr.c1`, where `gvr` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `nsa` and facility `C1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/nsa/nsagvrC1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"gvr\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"nsa\"\n", + "facility = \"C1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/GVR/gvr.c1.ipynb b/VAPs/quicklook/GVR/gvr.c1.ipynb new file mode 100644 index 00000000..57a84088 --- /dev/null +++ b/VAPs/quicklook/GVR/gvr.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# GVR.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/gvr) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'gvr'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2023-12-16', 'facility': 'C1', 'site': 'nsa', 'start_date': '2006-09-28'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'nsa', 'C1' )\n", + "\n", + "date_start = '2023-12-14'\n", + "date_end = '2023-12-16'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['tbsky1', 'tbsky3', 'tbsky7']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'tbsky1'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'tbsky1'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/HVPS-AIR/HVPS-AIR_tutorial.ipynb b/VAPs/quicklook/HVPS-AIR/HVPS-AIR_tutorial.ipynb new file mode 100644 index 00000000..2cf77f39 --- /dev/null +++ b/VAPs/quicklook/HVPS-AIR/HVPS-AIR_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AAFHVPS.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/hvps-air) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using aafhvps as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `aafhvps.c1`, where `aafhvps` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `cor` and facility `F1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/cor/coraafhvpsF1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"aafhvps\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"cor\"\n", + "facility = \"F1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/HVPS-AIR/aafhvps.c1.ipynb b/VAPs/quicklook/HVPS-AIR/aafhvps.c1.ipynb new file mode 100644 index 00000000..28960894 --- /dev/null +++ b/VAPs/quicklook/HVPS-AIR/aafhvps.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AAFHVPS.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/hvps-air) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'aafhvps'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2018-12-08', 'facility': 'F1', 'site': 'cor', 'start_date': '2018-11-04'}, {'end_date': '2018-02-19', 'facility': 'F1', 'site': 'ena', 'start_date': '2017-06-21'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'cor', 'F1' )\n", + "\n", + "date_start = '2018-12-06'\n", + "date_end = '2018-12-08'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['total_number_concentration', 'number_concentration']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'total_number_concentration'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/INLETCVI-AIR/INLETCVI-AIR_tutorial.ipynb b/VAPs/quicklook/INLETCVI-AIR/INLETCVI-AIR_tutorial.ipynb new file mode 100644 index 00000000..d345342a --- /dev/null +++ b/VAPs/quicklook/INLETCVI-AIR/INLETCVI-AIR_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AAFINLETCVI.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/inletcvi-air) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using aafinletcvi as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `aafinletcvi.c1`, where `aafinletcvi` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `cor` and facility `F1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/cor/coraafinletcviF1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"aafinletcvi\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"cor\"\n", + "facility = \"F1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/INLETCVI-AIR/aafinletcvi.c1.ipynb b/VAPs/quicklook/INLETCVI-AIR/aafinletcvi.c1.ipynb new file mode 100644 index 00000000..5f7787c7 --- /dev/null +++ b/VAPs/quicklook/INLETCVI-AIR/aafinletcvi.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AAFINLETCVI.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/inletcvi-air) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'aafinletcvi'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2018-12-08', 'facility': 'F1', 'site': 'cor', 'start_date': '2018-11-04'}, {'end_date': '2018-02-19', 'facility': 'F1', 'site': 'ena', 'start_date': '2017-06-21'}, {'end_date': '2016-09-22', 'facility': 'F1', 'site': 'sgp', 'start_date': '2016-04-25'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'F1' )\n", + "\n", + "date_start = '2016-09-20'\n", + "date_end = '2016-09-22'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['cvi_cut_size', 'enhancement_factor']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'cvi_cut_size'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'cvi_cut_size'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/INTERPSONDE/INTERPSONDE_tutorial.ipynb b/VAPs/quicklook/INTERPSONDE/INTERPSONDE_tutorial.ipynb new file mode 100644 index 00000000..7c751ae1 --- /dev/null +++ b/VAPs/quicklook/INTERPSONDE/INTERPSONDE_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# INTERPOLATEDSONDE.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/interpsonde) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using interpolatedsonde as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `interpolatedsonde.c1`, where `interpolatedsonde` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `awr` and facility `M1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/awr/awrinterpolatedsondeM1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"interpolatedsonde\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"awr\"\n", + "facility = \"M1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/INTERPSONDE/interpolatedsonde.c1.ipynb b/VAPs/quicklook/INTERPSONDE/interpolatedsonde.c1.ipynb new file mode 100644 index 00000000..d0faebd8 --- /dev/null +++ b/VAPs/quicklook/INTERPSONDE/interpolatedsonde.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# INTERPOLATEDSONDE.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/interpsonde) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'interpolatedsonde'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2017-01-04', 'facility': 'M1', 'site': 'awr', 'start_date': '2015-11-30'}, {'end_date': '2016-01-16', 'facility': 'S1', 'site': 'awr', 'start_date': '2015-12-04'}, {'end_date': '2017-11-01', 'facility': 'S1', 'site': 'asi', 'start_date': '2016-05-01'}, {'end_date': '2020-05-31', 'facility': 'M1', 'site': 'anx', 'start_date': '2019-12-01'}, {'end_date': '2023-06-17', 'facility': 'M1', 'site': 'guc', 'start_date': '2021-09-01'}, {'end_date': '2023-12-11', 'facility': 'C1', 'site': 'ena', 'start_date': '2013-09-28'}, {'end_date': '2022-10-02', 'facility': 'M1', 'site': 'hou', 'start_date': '2021-10-01'}, {'end_date': '2023-12-11', 'facility': 'M1', 'site': 'epc', 'start_date': '2023-02-06'}, {'end_date': '2015-11-30', 'facility': 'M1', 'site': 'mao', 'start_date': '2014-01-01'}, {'end_date': '2018-03-24', 'facility': 'M1', 'site': 'mar', 'start_date': '2017-10-31'}, {'end_date': '2013-10-02', 'facility': 'M1', 'site': 'mag', 'start_date': '2012-09-30'}, {'end_date': '2019-05-01', 'facility': 'M1', 'site': 'cor', 'start_date': '2018-09-28'}, {'end_date': '2019-05-01', 'facility': 'S1', 'site': 'cor', 'start_date': '2018-09-23'}, {'end_date': '2012-04-09', 'facility': 'M1', 'site': 'gan', 'start_date': '2011-09-15'}, {'end_date': '2013-06-29', 'facility': 'M1', 'site': 'pvc', 'start_date': '2012-07-01'}, {'end_date': '2020-10-01', 'facility': 'M1', 'site': 'mos', 'start_date': '2019-10-11'}, {'end_date': '2023-12-11', 'facility': 'C1', 'site': 'nsa', 'start_date': '2002-04-28'}, {'end_date': '2021-06-16', 'facility': 'M1', 'site': 'oli', 'start_date': '2013-10-01'}, {'end_date': '2023-12-11', 'facility': 'C1', 'site': 'sgp', 'start_date': '1999-07-23'}, {'end_date': '2014-09-10', 'facility': 'M1', 'site': 'tmp', 'start_date': '2014-02-01'}, {'end_date': '2014-07-09', 'facility': 'C1', 'site': 'twp', 'start_date': '2001-04-05'}, {'end_date': '2013-09-08', 'facility': 'C2', 'site': 'twp', 'start_date': '1998-10-21'}, {'end_date': '2015-01-16', 'facility': 'C3', 'site': 'twp', 'start_date': '2002-04-03'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2023-12-09'\n", + "date_end = '2023-12-11'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['temp', 'rh', 'vap_pres']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'precip'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'temp'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/KAZRARSCL/KAZRARSCL_tutorial.ipynb b/VAPs/quicklook/KAZRARSCL/KAZRARSCL_tutorial.ipynb new file mode 100644 index 00000000..021f11ec --- /dev/null +++ b/VAPs/quicklook/KAZRARSCL/KAZRARSCL_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# ARSCLKAZR1KOLLIAS.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/kazrarscl) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using arsclkazr1kollias as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `arsclkazr1kollias.c1`, where `arsclkazr1kollias` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `anx` and facility `M1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/anx/anxarsclkazr1kolliasM1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"arsclkazr1kollias\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"anx\"\n", + "facility = \"M1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/KAZRARSCL/arsclkazr1kollias.c1.ipynb b/VAPs/quicklook/KAZRARSCL/arsclkazr1kollias.c1.ipynb new file mode 100644 index 00000000..6b5f5017 --- /dev/null +++ b/VAPs/quicklook/KAZRARSCL/arsclkazr1kollias.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# ARSCLKAZR1KOLLIAS.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/kazrarscl) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'arsclkazr1kollias'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2020-05-31', 'facility': 'M1', 'site': 'anx', 'start_date': '2019-12-01'}, {'end_date': '2014-02-07', 'facility': 'C1', 'site': 'nsa', 'start_date': '2011-11-11'}, {'end_date': '2019-04-30', 'facility': 'M1', 'site': 'cor', 'start_date': '2018-10-15'}, {'end_date': '2012-02-07', 'facility': 'M1', 'site': 'gan', 'start_date': '2011-10-09'}, {'end_date': '2016-09-30', 'facility': 'M1', 'site': 'oli', 'start_date': '2015-10-01'}, {'end_date': '2014-03-15', 'facility': 'C1', 'site': 'sgp', 'start_date': '2011-01-18'}, {'end_date': '2014-05-03', 'facility': 'C1', 'site': 'twp', 'start_date': '2011-03-12'}, {'end_date': '2014-05-03', 'facility': 'C3', 'site': 'twp', 'start_date': '2011-01-27'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2014-03-14'\n", + "date_end = '2014-03-15'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['reflectivity_best_estimate', 'reflectivity', 'mean_doppler_velocity']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'reflectivity_best_estimate'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'reflectivity_best_estimate'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/KAZRARSCL/arsclkazrbnd1kollias.c1.ipynb b/VAPs/quicklook/KAZRARSCL/arsclkazrbnd1kollias.c1.ipynb new file mode 100644 index 00000000..6f5afc71 --- /dev/null +++ b/VAPs/quicklook/KAZRARSCL/arsclkazrbnd1kollias.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# ARSCLKAZRBND1KOLLIAS.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/kazrarscl) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'arsclkazrbnd1kollias'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2020-05-31', 'facility': 'M1', 'site': 'anx', 'start_date': '2019-12-01'}, {'end_date': '2014-02-07', 'facility': 'C1', 'site': 'nsa', 'start_date': '2011-11-11'}, {'end_date': '2019-04-30', 'facility': 'M1', 'site': 'cor', 'start_date': '2018-10-15'}, {'end_date': '2012-02-07', 'facility': 'M1', 'site': 'gan', 'start_date': '2011-10-09'}, {'end_date': '2014-03-15', 'facility': 'C1', 'site': 'sgp', 'start_date': '2011-01-18'}, {'end_date': '2014-05-03', 'facility': 'C1', 'site': 'twp', 'start_date': '2011-03-12'}, {'end_date': '2014-05-03', 'facility': 'C3', 'site': 'twp', 'start_date': '2011-01-27'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2014-03-14'\n", + "date_end = '2014-03-15'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['cloud_base_best_estimate', 'cloud_layer_base_height', 'cloud_layer_top_height']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'cloud_base_best_estimate'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/KAZRARSCLCLOUDSAT/KAZRARSCLCLOUDSAT_tutorial.ipynb b/VAPs/quicklook/KAZRARSCLCLOUDSAT/KAZRARSCLCLOUDSAT_tutorial.ipynb new file mode 100644 index 00000000..2d5b8bcf --- /dev/null +++ b/VAPs/quicklook/KAZRARSCLCLOUDSAT/KAZRARSCLCLOUDSAT_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# ARSCLKAZRCLOUDSAT.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/kazrarsclcloudsat) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using arsclkazrcloudsat as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `arsclkazrcloudsat.c1`, where `arsclkazrcloudsat` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `awr` and facility `M1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/awr/awrarsclkazrcloudsatM1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"arsclkazrcloudsat\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"awr\"\n", + "facility = \"M1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/KAZRARSCLCLOUDSAT/arsclkazrcloudsat.c1.ipynb b/VAPs/quicklook/KAZRARSCLCLOUDSAT/arsclkazrcloudsat.c1.ipynb new file mode 100644 index 00000000..6db5a260 --- /dev/null +++ b/VAPs/quicklook/KAZRARSCLCLOUDSAT/arsclkazrcloudsat.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# ARSCLKAZRCLOUDSAT.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/kazrarsclcloudsat) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'arsclkazrcloudsat'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2016-12-31', 'facility': 'M1', 'site': 'awr', 'start_date': '2016-04-01'}, {'end_date': '2017-06-30', 'facility': 'C1', 'site': 'nsa', 'start_date': '2012-03-01'}, {'end_date': '2017-11-29', 'facility': 'M1', 'site': 'oli', 'start_date': '2015-11-01'}, {'end_date': '2017-08-31', 'facility': 'C1', 'site': 'sgp', 'start_date': '2012-03-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2017-08-30'\n", + "date_end = '2017-08-31'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['reflectivity_best_estimate', 'reflectivity', 'mean_doppler_velocity']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'reflectivity_best_estimate'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'reflectivity_best_estimate'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/KAZRCFRCOR/KAZRCFRCOR_tutorial.ipynb b/VAPs/quicklook/KAZRCFRCOR/KAZRCFRCOR_tutorial.ipynb new file mode 100644 index 00000000..fec24d1b --- /dev/null +++ b/VAPs/quicklook/KAZRCFRCOR/KAZRCFRCOR_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# KAZRCFRCORGE.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/kazrcfrcor) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using kazrcfrcorge as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `kazrcfrcorge.c1`, where `kazrcfrcorge` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `cor` and facility `M1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/cor/corkazrcfrcorgeM1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"kazrcfrcorge\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"cor\"\n", + "facility = \"M1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/KAZRCFRCOR/kazrcfrcorge.c1.ipynb b/VAPs/quicklook/KAZRCFRCOR/kazrcfrcorge.c1.ipynb new file mode 100644 index 00000000..211aad8d --- /dev/null +++ b/VAPs/quicklook/KAZRCFRCOR/kazrcfrcorge.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# KAZRCFRCORGE.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/kazrcfrcor) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'kazrcfrcorge'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2019-04-30', 'facility': 'M1', 'site': 'cor', 'start_date': '2018-10-15'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'cor', 'M1' )\n", + "\n", + "date_start = '2019-04-29'\n", + "date_end = '2019-04-30'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['linear_depolarization_ratio', 'mean_doppler_velocity', 'mean_doppler_velocity_crosspolar_v']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'mean_doppler_velocity'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'linear_depolarization_ratio'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/KAZRCFRCOR/kazrcfrcormd.c1.ipynb b/VAPs/quicklook/KAZRCFRCOR/kazrcfrcormd.c1.ipynb new file mode 100644 index 00000000..9bf125ae --- /dev/null +++ b/VAPs/quicklook/KAZRCFRCOR/kazrcfrcormd.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# KAZRCFRCORMD.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/kazrcfrcor) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'kazrcfrcormd'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2019-04-30', 'facility': 'M1', 'site': 'cor', 'start_date': '2018-10-15'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'cor', 'M1' )\n", + "\n", + "date_start = '2019-04-29'\n", + "date_end = '2019-04-30'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['linear_depolarization_ratio', 'mean_doppler_velocity', 'mean_doppler_velocity_crosspolar_v']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'mean_doppler_velocity'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'linear_depolarization_ratio'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/KAZRCOR/.ipynb_checkpoints/kazrcorge.c1-checkpoint.ipynb b/VAPs/quicklook/KAZRCOR/.ipynb_checkpoints/kazrcorge.c1-checkpoint.ipynb new file mode 100644 index 00000000..bf322ff2 --- /dev/null +++ b/VAPs/quicklook/KAZRCOR/.ipynb_checkpoints/kazrcorge.c1-checkpoint.ipynb @@ -0,0 +1,445 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# KAZRCORGE.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/kazrcor) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'kazrcorge'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2014-02-07', 'facility': 'C1', 'site': 'nsa', 'start_date': '2011-11-11'}, {'end_date': '2014-03-15', 'facility': 'C1', 'site': 'sgp', 'start_date': '2011-01-18'}, {'end_date': '2014-03-16', 'facility': 'C1', 'site': 'twp', 'start_date': '2011-03-12'}, {'end_date': '2014-05-03', 'facility': 'C3', 'site': 'twp', 'start_date': '2011-01-27'}]" + ] + }, + { + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ac6764f5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The following locations and date ranges are available for this VAP:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sitefacilitystart_dateend_date
0nsaC12011-11-112014-02-07
1sgpC12011-01-182014-03-15
2twpC12011-03-122014-03-16
3twpC32011-01-272014-05-03
\n", + "
" + ], + "text/plain": [ + " site facility start_date end_date\n", + "0 nsa C1 2011-11-11 2014-02-07\n", + "1 sgp C1 2011-01-18 2014-03-15\n", + "2 twp C1 2011-03-12 2014-03-16\n", + "3 twp C3 2011-01-27 2014-05-03" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2014-03-14'\n", + "date_end = '2014-03-15'" + ] + }, + { + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/data/archive/sgp/sgpkazrcorgeC1.c1'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['20140314', '20140315']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "51feea2e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['/data/archive/sgp/sgpkazrcorgeC1.c1/sgpkazrcorgeC1.c1.20140314.000001.nc',\n", + " '/data/archive/sgp/sgpkazrcorgeC1.c1/sgpkazrcorgeC1.c1.20140315.000002.nc']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['reflectivity', 'mean_doppler_velocity', 'spectral_width']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'reflectivity'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'reflectivity'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/KAZRCOR/.ipynb_checkpoints/kazrcorhi.c1-checkpoint.ipynb b/VAPs/quicklook/KAZRCOR/.ipynb_checkpoints/kazrcorhi.c1-checkpoint.ipynb new file mode 100644 index 00000000..5595ed42 --- /dev/null +++ b/VAPs/quicklook/KAZRCOR/.ipynb_checkpoints/kazrcorhi.c1-checkpoint.ipynb @@ -0,0 +1,1856 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# KAZRCORHI.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/kazrcor) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'kazrcorhi'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2014-03-16', 'facility': 'C1', 'site': 'twp', 'start_date': '2011-03-12'}, {'end_date': '2014-05-03', 'facility': 'C3', 'site': 'twp', 'start_date': '2011-01-27'}]" + ] + }, + { + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ac6764f5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The following locations and date ranges are available for this VAP:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sitefacilitystart_dateend_date
0twpC12011-03-122014-03-16
1twpC32011-01-272014-05-03
\n", + "
" + ], + "text/plain": [ + " site facility start_date end_date\n", + "0 twp C1 2011-03-12 2014-03-16\n", + "1 twp C3 2011-01-27 2014-05-03" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'twp', 'C1' )\n", + "\n", + "date_start = '2014-03-15'\n", + "date_end = '2014-03-16'" + ] + }, + { + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/data/archive/twp/twpkazrcorhiC1.c1'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['20140315', '20140316']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "51feea2e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['/data/archive/twp/twpkazrcorhiC1.c1/twpkazrcorhiC1.c1.20140315.000001.nc',\n", + " '/data/archive/twp/twpkazrcorhiC1.c1/twpkazrcorhiC1.c1.20140316.000001.nc']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2 files loaded\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:                                   (time: 89645, range: 516)\n",
+       "Coordinates:\n",
+       "  * time                                      (time) datetime64[ns] 2014-03-1...\n",
+       "  * range                                     (range) float32 2.007e+03 ... 1...\n",
+       "Data variables: (12/23)\n",
+       "    base_time                                 (time) datetime64[ns] 2014-03-1...\n",
+       "    time_offset                               (time) datetime64[ns] 2014-03-1...\n",
+       "    reflectivity_copol                        (time, range) float32 dask.array<chunksize=(46771, 516), meta=np.ndarray>\n",
+       "    qc_reflectivity_copol                     (time, range) int32 dask.array<chunksize=(46771, 516), meta=np.ndarray>\n",
+       "    gaseous_attenuation_correction_copol      (time, range) float32 dask.array<chunksize=(46771, 516), meta=np.ndarray>\n",
+       "    qc_gaseous_attenuation_correction_copol   (time, range) int32 dask.array<chunksize=(46771, 516), meta=np.ndarray>\n",
+       "    ...                                        ...\n",
+       "    qc_rh                                     (time, range) int32 dask.array<chunksize=(46771, 516), meta=np.ndarray>\n",
+       "    bar_pres                                  (time, range) float32 dask.array<chunksize=(46771, 516), meta=np.ndarray>\n",
+       "    qc_bar_pres                               (time, range) int32 dask.array<chunksize=(46771, 516), meta=np.ndarray>\n",
+       "    lat                                       (time) float32 -2.06 ... -2.06\n",
+       "    lon                                       (time) float32 147.4 ... 147.4\n",
+       "    alt                                       (time) float32 4.0 4.0 ... 4.0 4.0\n",
+       "Attributes: (12/32)\n",
+       "    command_line:                idl -R -n kazrcor -s twp -f C1 -b 20140315 -...\n",
+       "    Conventions:                 ARM-1.1\n",
+       "    process_version:             vap-kazrcor-1.6-0.el6\n",
+       "    input_datastreams:           twpkazrgeC1.b1 : 1.3 : 20140315.000001\\ntwpk...\n",
+       "    dod_version:                 kazrcorhi-c1-1.3\n",
+       "    site_id:                     twp\n",
+       "    ...                          ...\n",
+       "    doi:                         10.5439/1228772\n",
+       "    history:                     created by user ttoto on machine chalk at 20...\n",
+       "    _file_dates:                 ['20140315', '20140316']\n",
+       "    _file_times:                 ['000001', '000001']\n",
+       "    _datastream:                 twpkazrcorhiC1.c1\n",
+       "    _arm_standards_flag:         1
" + ], + "text/plain": [ + "\n", + "Dimensions: (time: 89645, range: 516)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 2014-03-1...\n", + " * range (range) float32 2.007e+03 ... 1...\n", + "Data variables: (12/23)\n", + " base_time (time) datetime64[ns] 2014-03-1...\n", + " time_offset (time) datetime64[ns] 2014-03-1...\n", + " reflectivity_copol (time, range) float32 dask.array\n", + " qc_reflectivity_copol (time, range) int32 dask.array\n", + " gaseous_attenuation_correction_copol (time, range) float32 dask.array\n", + " qc_gaseous_attenuation_correction_copol (time, range) int32 dask.array\n", + " ... ...\n", + " qc_rh (time, range) int32 dask.array\n", + " bar_pres (time, range) float32 dask.array\n", + " qc_bar_pres (time, range) int32 dask.array\n", + " lat (time) float32 -2.06 ... -2.06\n", + " lon (time) float32 147.4 ... 147.4\n", + " alt (time) float32 4.0 4.0 ... 4.0 4.0\n", + "Attributes: (12/32)\n", + " command_line: idl -R -n kazrcor -s twp -f C1 -b 20140315 -...\n", + " Conventions: ARM-1.1\n", + " process_version: vap-kazrcor-1.6-0.el6\n", + " input_datastreams: twpkazrgeC1.b1 : 1.3 : 20140315.000001\\ntwpk...\n", + " dod_version: kazrcorhi-c1-1.3\n", + " site_id: twp\n", + " ... ...\n", + " doi: 10.5439/1228772\n", + " history: created by user ttoto on machine chalk at 20...\n", + " _file_dates: ['20140315', '20140316']\n", + " _file_times: ['000001', '000001']\n", + " _datastream: twpkazrcorhiC1.c1\n", + " _arm_standards_flag: 1" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['reflectivity', 'mean_doppler_velocity', 'spectral_width']" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "'reflectivity'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[9], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m ts_display\u001b[38;5;241m.\u001b[39madd_subplots((\u001b[38;5;28mlen\u001b[39m(variables_to_plot),), figsize \u001b[38;5;241m=\u001b[39m (\u001b[38;5;241m9.5\u001b[39m,\u001b[38;5;241m4\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mlen\u001b[39m(variables_to_plot)))\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i,v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(variables_to_plot):\n\u001b[0;32m----> 5\u001b[0m ts_ax \u001b[38;5;241m=\u001b[39m ts_display\u001b[38;5;241m.\u001b[39mplot(v, subplot_index\u001b[38;5;241m=\u001b[39m(i,), set_title\u001b[38;5;241m=\u001b[39m\u001b[43mds\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvariables\u001b[49m\u001b[43m[\u001b[49m\u001b[43mv\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241m.\u001b[39mattrs[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlong_name\u001b[39m\u001b[38;5;124m'\u001b[39m],)\n\u001b[1;32m 6\u001b[0m ts_ax\u001b[38;5;241m.\u001b[39mgrid()\n\u001b[1;32m 8\u001b[0m plt\u001b[38;5;241m.\u001b[39mshow()\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/utils.py:453\u001b[0m, in \u001b[0;36mFrozen.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 452\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__getitem__\u001b[39m(\u001b[38;5;28mself\u001b[39m, key: K) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m V:\n\u001b[0;32m--> 453\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmapping\u001b[49m\u001b[43m[\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m]\u001b[49m\n", + "\u001b[0;31mKeyError\u001b[0m: 'reflectivity'" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "f666994b48ed49da969503777e133ba7", + "version_major": 2, + "version_minor": 0 + }, + "image/png": "", + "text/html": [ + "\n", + "
\n", + "
\n", + " Figure\n", + "
\n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'reflectivity'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'reflectivity'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/KAZRCOR/.ipynb_checkpoints/kazrcormd.c1-checkpoint.ipynb b/VAPs/quicklook/KAZRCOR/.ipynb_checkpoints/kazrcormd.c1-checkpoint.ipynb new file mode 100644 index 00000000..284e0d7b --- /dev/null +++ b/VAPs/quicklook/KAZRCOR/.ipynb_checkpoints/kazrcormd.c1-checkpoint.ipynb @@ -0,0 +1,2667 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# KAZRCORMD.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/kazrcor) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'kazrcormd'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2014-02-07', 'facility': 'C1', 'site': 'nsa', 'start_date': '2011-11-11'}, {'end_date': '2014-03-15', 'facility': 'C1', 'site': 'sgp', 'start_date': '2011-05-03'}]" + ] + }, + { + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ac6764f5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The following locations and date ranges are available for this VAP:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sitefacilitystart_dateend_date
0nsaC12011-11-112014-02-07
1sgpC12011-05-032014-03-15
\n", + "
" + ], + "text/plain": [ + " site facility start_date end_date\n", + "0 nsa C1 2011-11-11 2014-02-07\n", + "1 sgp C1 2011-05-03 2014-03-15" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2014-03-14'\n", + "date_end = '2014-03-15'" + ] + }, + { + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/data/archive/sgp/sgpkazrcormdC1.c1'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['20140314', '20140315']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "51feea2e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['/data/archive/sgp/sgpkazrcormdC1.c1/sgpkazrcormdC1.c1.20140314.000001.nc',\n", + " '/data/archive/sgp/sgpkazrcormdC1.c1/sgpkazrcormdC1.c1.20140315.000002.nc']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "72 files loaded\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:                                   (time: 23386, range: 652)\n",
+       "Coordinates:\n",
+       "  * time                                      (time) datetime64[ns] 2014-03-1...\n",
+       "  * range                                     (range) float32 718.1 ... 2.023...\n",
+       "Data variables: (12/39)\n",
+       "    base_time                                 datetime64[ns] 2014-03-14\n",
+       "    time_offset                               (time) datetime64[ns] 2014-03-1...\n",
+       "    reflectivity_copol                        (time, range) float32 dask.array<chunksize=(23386, 652), meta=np.ndarray>\n",
+       "    qc_reflectivity_copol                     (time, range) int32 dask.array<chunksize=(23386, 652), meta=np.ndarray>\n",
+       "    gaseous_attenuation_correction_copol      (time, range) float32 dask.array<chunksize=(23386, 652), meta=np.ndarray>\n",
+       "    qc_gaseous_attenuation_correction_copol   (time, range) int32 dask.array<chunksize=(23386, 652), meta=np.ndarray>\n",
+       "    ...                                        ...\n",
+       "    qc_rh                                     (time, range) int32 dask.array<chunksize=(23386, 652), meta=np.ndarray>\n",
+       "    bar_pres                                  (time, range) float32 dask.array<chunksize=(23386, 652), meta=np.ndarray>\n",
+       "    qc_bar_pres                               (time, range) int32 dask.array<chunksize=(23386, 652), meta=np.ndarray>\n",
+       "    lat                                       float32 ...\n",
+       "    lon                                       float32 ...\n",
+       "    alt                                       float32 ...\n",
+       "Attributes: (12/33)\n",
+       "    command_line:                idl -R -n kazrcor -s sgp -f C1 -b 20140314 -...\n",
+       "    Conventions:                 ARM-1.1\n",
+       "    process_version:             vap-kazrcor-1.6-0.el6\n",
+       "    input_datastreams:           sgpkazrgeC1.b1 : 1.3 : 20140314.000001\\nsgpk...\n",
+       "    dod_version:                 kazrcormd-c1-2.0\n",
+       "    site_id:                     sgp\n",
+       "    ...                          ...\n",
+       "    doi:                         10.5439/1228771\n",
+       "    history:                     created by user ttoto on machine chalk at 20...\n",
+       "    _file_dates:                 ['20140314']\n",
+       "    _file_times:                 ['000001']\n",
+       "    _datastream:                 sgpkazrcormdC1.c1\n",
+       "    _arm_standards_flag:         1
" + ], + "text/plain": [ + "\n", + "Dimensions: (time: 23386, range: 652)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 2014-03-1...\n", + " * range (range) float32 718.1 ... 2.023...\n", + "Data variables: (12/39)\n", + " base_time datetime64[ns] 2014-03-14\n", + " time_offset (time) datetime64[ns] 2014-03-1...\n", + " reflectivity_copol (time, range) float32 dask.array\n", + " qc_reflectivity_copol (time, range) int32 dask.array\n", + " gaseous_attenuation_correction_copol (time, range) float32 dask.array\n", + " qc_gaseous_attenuation_correction_copol (time, range) int32 dask.array\n", + " ... ...\n", + " qc_rh (time, range) int32 dask.array\n", + " bar_pres (time, range) float32 dask.array\n", + " qc_bar_pres (time, range) int32 dask.array\n", + " lat float32 ...\n", + " lon float32 ...\n", + " alt float32 ...\n", + "Attributes: (12/33)\n", + " command_line: idl -R -n kazrcor -s sgp -f C1 -b 20140314 -...\n", + " Conventions: ARM-1.1\n", + " process_version: vap-kazrcor-1.6-0.el6\n", + " input_datastreams: sgpkazrgeC1.b1 : 1.3 : 20140314.000001\\nsgpk...\n", + " dod_version: kazrcormd-c1-2.0\n", + " site_id: sgp\n", + " ... ...\n", + " doi: 10.5439/1228771\n", + " history: created by user ttoto on machine chalk at 20...\n", + " _file_dates: ['20140314']\n", + " _file_times: ['000001']\n", + " _datastream: sgpkazrcormdC1.c1\n", + " _arm_standards_flag: 1" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter [0]\n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['reflectivity', 'mean_doppler_velocity', 'spectral_width']" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "'reflectivity'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[9], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m ts_display\u001b[38;5;241m.\u001b[39madd_subplots((\u001b[38;5;28mlen\u001b[39m(variables_to_plot),), figsize \u001b[38;5;241m=\u001b[39m (\u001b[38;5;241m9.5\u001b[39m,\u001b[38;5;241m4\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mlen\u001b[39m(variables_to_plot)))\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i,v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(variables_to_plot):\n\u001b[0;32m----> 5\u001b[0m ts_ax \u001b[38;5;241m=\u001b[39m ts_display\u001b[38;5;241m.\u001b[39mplot(v, subplot_index\u001b[38;5;241m=\u001b[39m(i,), set_title\u001b[38;5;241m=\u001b[39m\u001b[43mds\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvariables\u001b[49m\u001b[43m[\u001b[49m\u001b[43mv\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241m.\u001b[39mattrs[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlong_name\u001b[39m\u001b[38;5;124m'\u001b[39m],)\n\u001b[1;32m 6\u001b[0m ts_ax\u001b[38;5;241m.\u001b[39mgrid()\n\u001b[1;32m 8\u001b[0m plt\u001b[38;5;241m.\u001b[39mshow()\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/utils.py:453\u001b[0m, in \u001b[0;36mFrozen.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 452\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__getitem__\u001b[39m(\u001b[38;5;28mself\u001b[39m, key: K) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m V:\n\u001b[0;32m--> 453\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmapping\u001b[49m\u001b[43m[\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m]\u001b[49m\n", + "\u001b[0;31mKeyError\u001b[0m: 'reflectivity'" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "fe6a6740ed58487aa977d2bfc40e0bdc", + "version_major": 2, + "version_minor": 0 + }, + "image/png": "", + "text/html": [ + "\n", + "
\n", + "
\n", + " Figure\n", + "
\n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'reflectivity'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'reflectivity'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/KAZRCOR/KAZRCOR_tutorial.ipynb b/VAPs/quicklook/KAZRCOR/KAZRCOR_tutorial.ipynb new file mode 100644 index 00000000..6ad0056b --- /dev/null +++ b/VAPs/quicklook/KAZRCOR/KAZRCOR_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# KAZRCORGE.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/kazrcor) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using kazrcorge as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `kazrcorge.c1`, where `kazrcorge` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `nsa` and facility `C1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/nsa/nsakazrcorgeC1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"kazrcorge\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"nsa\"\n", + "facility = \"C1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/KAZRCOR/kazrcorge.c1.ipynb b/VAPs/quicklook/KAZRCOR/kazrcorge.c1.ipynb new file mode 100644 index 00000000..c984815f --- /dev/null +++ b/VAPs/quicklook/KAZRCOR/kazrcorge.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# KAZRCORGE.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/kazrcor) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'kazrcorge'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2014-02-07', 'facility': 'C1', 'site': 'nsa', 'start_date': '2011-11-11'}, {'end_date': '2014-03-15', 'facility': 'C1', 'site': 'sgp', 'start_date': '2011-01-18'}, {'end_date': '2014-03-16', 'facility': 'C1', 'site': 'twp', 'start_date': '2011-03-12'}, {'end_date': '2014-05-03', 'facility': 'C3', 'site': 'twp', 'start_date': '2011-01-27'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2014-03-14'\n", + "date_end = '2014-03-15'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['reflectivity', 'mean_doppler_velocity', 'spectral_width']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'reflectivity'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'reflectivity'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/KAZRCOR/kazrcorhi.c1.ipynb b/VAPs/quicklook/KAZRCOR/kazrcorhi.c1.ipynb new file mode 100644 index 00000000..8ba736ed --- /dev/null +++ b/VAPs/quicklook/KAZRCOR/kazrcorhi.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# KAZRCORHI.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/kazrcor) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'kazrcorhi'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2014-03-16', 'facility': 'C1', 'site': 'twp', 'start_date': '2011-03-12'}, {'end_date': '2014-05-03', 'facility': 'C3', 'site': 'twp', 'start_date': '2011-01-27'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'twp', 'C1' )\n", + "\n", + "date_start = '2014-03-15'\n", + "date_end = '2014-03-16'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['reflectivity', 'mean_doppler_velocity', 'spectral_width']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'reflectivity'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'reflectivity'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/KAZRCOR/kazrcormd.c1.ipynb b/VAPs/quicklook/KAZRCOR/kazrcormd.c1.ipynb new file mode 100644 index 00000000..aaa33290 --- /dev/null +++ b/VAPs/quicklook/KAZRCOR/kazrcormd.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# KAZRCORMD.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/kazrcor) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'kazrcormd'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2014-02-07', 'facility': 'C1', 'site': 'nsa', 'start_date': '2011-11-11'}, {'end_date': '2014-03-15', 'facility': 'C1', 'site': 'sgp', 'start_date': '2011-05-03'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2014-03-14'\n", + "date_end = '2014-03-15'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['reflectivity', 'mean_doppler_velocity', 'spectral_width']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'reflectivity'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'reflectivity'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/LCLHEIGHT/LCLHEIGHT_tutorial.ipynb b/VAPs/quicklook/LCLHEIGHT/LCLHEIGHT_tutorial.ipynb new file mode 100644 index 00000000..f2282e3b --- /dev/null +++ b/VAPs/quicklook/LCLHEIGHT/LCLHEIGHT_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# LCL.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/lclheight) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using lcl as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `lcl.c1`, where `lcl` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `sgp` and facility `C1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/sgp/sgplclC1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"lcl\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"sgp\"\n", + "facility = \"C1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/LCLHEIGHT/lcl.c1.ipynb b/VAPs/quicklook/LCLHEIGHT/lcl.c1.ipynb new file mode 100644 index 00000000..27d7893a --- /dev/null +++ b/VAPs/quicklook/LCLHEIGHT/lcl.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# LCL.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/lclheight) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'lcl'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2023-06-01', 'facility': 'C1', 'site': 'sgp', 'start_date': '2017-01-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2023-05-30'\n", + "date_end = '2023-06-01'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['lcl']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'temperature'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'lcl'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/LDQUANTS/LDQUANTS_tutorial.ipynb b/VAPs/quicklook/LDQUANTS/LDQUANTS_tutorial.ipynb new file mode 100644 index 00000000..d22480c3 --- /dev/null +++ b/VAPs/quicklook/LDQUANTS/LDQUANTS_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# LDQUANTS.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/ldquants) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using ldquants as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `ldquants.c1`, where `ldquants` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `guc` and facility `M1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/guc/gucldquantsM1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"ldquants\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"guc\"\n", + "facility = \"M1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/LDQUANTS/ldquants.c1.ipynb b/VAPs/quicklook/LDQUANTS/ldquants.c1.ipynb new file mode 100644 index 00000000..b34c8218 --- /dev/null +++ b/VAPs/quicklook/LDQUANTS/ldquants.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# LDQUANTS.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/ldquants) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'ldquants'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2023-06-15', 'facility': 'M1', 'site': 'guc', 'start_date': '2021-09-01'}, {'end_date': '2023-06-15', 'facility': 'S2', 'site': 'guc', 'start_date': '2021-09-03'}, {'end_date': '2023-12-18', 'facility': 'C1', 'site': 'ena', 'start_date': '2014-02-27'}, {'end_date': '2022-09-30', 'facility': 'M1', 'site': 'hou', 'start_date': '2021-08-05'}, {'end_date': '2022-10-01', 'facility': 'S1', 'site': 'hou', 'start_date': '2021-08-18'}, {'end_date': '2022-05-12', 'facility': 'S2', 'site': 'hou', 'start_date': '2022-04-12'}, {'end_date': '2022-09-30', 'facility': 'S3', 'site': 'hou', 'start_date': '2022-05-05'}, {'end_date': '2023-12-10', 'facility': 'M1', 'site': 'epc', 'start_date': '2023-01-15'}, {'end_date': '2023-12-10', 'facility': 'S2', 'site': 'epc', 'start_date': '2023-01-15'}, {'end_date': '2015-12-01', 'facility': 'S10', 'site': 'mao', 'start_date': '2014-09-24'}, {'end_date': '2023-12-18', 'facility': 'C1', 'site': 'sgp', 'start_date': '2016-11-02'}, {'end_date': '2023-09-28', 'facility': 'E13', 'site': 'sgp', 'start_date': '2016-11-04'}, {'end_date': '2023-09-28', 'facility': 'I10', 'site': 'sgp', 'start_date': '2016-11-28'}, {'end_date': '2023-09-28', 'facility': 'I8', 'site': 'sgp', 'start_date': '2016-12-05'}, {'end_date': '2023-09-28', 'facility': 'I9', 'site': 'sgp', 'start_date': '2016-11-28'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2023-12-16'\n", + "date_end = '2023-12-18'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['rain_rate', 'reflectivity_factor_sband20c', 'reflectivity_factor_cband20c']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'rain_rate'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/LSSONDE/LSSONDE_tutorial.ipynb b/VAPs/quicklook/LSSONDE/LSSONDE_tutorial.ipynb new file mode 100644 index 00000000..9d53a719 --- /dev/null +++ b/VAPs/quicklook/LSSONDE/LSSONDE_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# LSSONDE.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/lssonde) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using lssonde as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `lssonde.c1`, where `lssonde` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `acx` and facility `M1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/acx/acxlssondeM1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"lssonde\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"acx\"\n", + "facility = \"M1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/LSSONDE/lssonde.c1.ipynb b/VAPs/quicklook/LSSONDE/lssonde.c1.ipynb new file mode 100644 index 00000000..3a9c6d6c --- /dev/null +++ b/VAPs/quicklook/LSSONDE/lssonde.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# LSSONDE.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/lssonde) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'lssonde'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2015-02-10', 'facility': 'M1', 'site': 'acx', 'start_date': '2015-01-12'}, {'end_date': '2000-10-08', 'facility': 'B1', 'site': 'sgp', 'start_date': '1997-06-16'}, {'end_date': '2000-10-08', 'facility': 'B4', 'site': 'sgp', 'start_date': '1997-06-16'}, {'end_date': '2000-10-08', 'facility': 'B5', 'site': 'sgp', 'start_date': '1997-06-16'}, {'end_date': '2000-10-05', 'facility': 'B6', 'site': 'sgp', 'start_date': '1997-06-16'}, {'end_date': '2023-12-18', 'facility': 'C1', 'site': 'sgp', 'start_date': '1994-04-11'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'B1' )\n", + "\n", + "date_start = '2000-10-07'\n", + "date_end = '2000-10-08'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['pres', 'tdry', 'rh']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],linestyle='None')\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'pres'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],linestyle='None')\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],linestyle='None')\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/MASCPARTICLES/MASCPARTICLES_tutorial.ipynb b/VAPs/quicklook/MASCPARTICLES/MASCPARTICLES_tutorial.ipynb new file mode 100644 index 00000000..1b42502a --- /dev/null +++ b/VAPs/quicklook/MASCPARTICLES/MASCPARTICLES_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# MASCPARTICLES.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/mascparticles) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using mascparticles as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `mascparticles.c1`, where `mascparticles` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `oli` and facility `M1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/oli/olimascparticlesM1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"mascparticles\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"oli\"\n", + "facility = \"M1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/MASCPARTICLES/mascparticles.c1.ipynb b/VAPs/quicklook/MASCPARTICLES/mascparticles.c1.ipynb new file mode 100644 index 00000000..0c674843 --- /dev/null +++ b/VAPs/quicklook/MASCPARTICLES/mascparticles.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# MASCPARTICLES.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/mascparticles) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'mascparticles'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2018-07-16', 'facility': 'M1', 'site': 'oli', 'start_date': '2015-11-01'}, {'end_date': '2023-12-18', 'facility': 'C1', 'site': 'nsa', 'start_date': '2021-04-27'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'oli', 'M1' )\n", + "\n", + "date_start = '2018-07-14'\n", + "date_end = '2018-07-16'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['snowflake_fall_speed', 'maximum_dimension_avg']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'snowflake_fall_speed'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'snowflake_fall_speed'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/MASCPARTICLES/mascparticlesavg.c1.ipynb b/VAPs/quicklook/MASCPARTICLES/mascparticlesavg.c1.ipynb new file mode 100644 index 00000000..4af2c6c2 --- /dev/null +++ b/VAPs/quicklook/MASCPARTICLES/mascparticlesavg.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# MASCPARTICLESAVG.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/mascparticles) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'mascparticlesavg'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2018-07-18', 'facility': 'M1', 'site': 'oli', 'start_date': '2015-11-04'}, {'end_date': '2023-12-18', 'facility': 'C1', 'site': 'nsa', 'start_date': '2021-05-06'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'oli', 'M1' )\n", + "\n", + "date_start = '2018-07-16'\n", + "date_end = '2018-07-18'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['fall_speed_avg', 'maximum_dimension_avg']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'num_particles_total'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'fall_speed_avg'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/MERGED-COMMON/MERGED-COMMON_tutorial.ipynb b/VAPs/quicklook/MERGED-COMMON/MERGED-COMMON_tutorial.ipynb new file mode 100644 index 00000000..085cc66b --- /dev/null +++ b/VAPs/quicklook/MERGED-COMMON/MERGED-COMMON_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AAFMERGEDCLDSD.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/merged-common) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using aafmergedcldsd as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `aafmergedcldsd.c1`, where `aafmergedcldsd` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `cor` and facility `F1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/cor/coraafmergedcldsdF1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"aafmergedcldsd\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"cor\"\n", + "facility = \"F1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/MERGED-COMMON/aafmergedcldsd.c1.ipynb b/VAPs/quicklook/MERGED-COMMON/aafmergedcldsd.c1.ipynb new file mode 100644 index 00000000..09576f45 --- /dev/null +++ b/VAPs/quicklook/MERGED-COMMON/aafmergedcldsd.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AAFMERGEDCLDSD.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/merged-common) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'aafmergedcldsd'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2018-12-08', 'facility': 'F1', 'site': 'cor', 'start_date': '2018-11-04'}, {'end_date': '2018-02-19', 'facility': 'F1', 'site': 'ena', 'start_date': '2017-06-21'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'cor', 'F1' )\n", + "\n", + "date_start = '2018-12-06'\n", + "date_end = '2018-12-08'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['total_number_concentration', 'number_concentration']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'total_number_concentration'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/MERGEDSMPSAPS/MERGEDSMPSAPS_tutorial.ipynb b/VAPs/quicklook/MERGEDSMPSAPS/MERGEDSMPSAPS_tutorial.ipynb new file mode 100644 index 00000000..0b305fe9 --- /dev/null +++ b/VAPs/quicklook/MERGEDSMPSAPS/MERGEDSMPSAPS_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# MERGEDSMPSAPS.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/mergedsmpsaps) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using mergedsmpsaps as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `mergedsmpsaps.c1`, where `mergedsmpsaps` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `hou` and facility `M1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/hou/houmergedsmpsapsM1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"mergedsmpsaps\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"hou\"\n", + "facility = \"M1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/MERGEDSMPSAPS/mergedsmpsaps.c1.ipynb b/VAPs/quicklook/MERGEDSMPSAPS/mergedsmpsaps.c1.ipynb new file mode 100644 index 00000000..ff408f74 --- /dev/null +++ b/VAPs/quicklook/MERGEDSMPSAPS/mergedsmpsaps.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# MERGEDSMPSAPS.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/mergedsmpsaps) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'mergedsmpsaps'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2022-09-30', 'facility': 'M1', 'site': 'hou', 'start_date': '2021-09-08'}, {'end_date': '2023-11-16', 'facility': 'M1', 'site': 'epc', 'start_date': '2023-01-19'}, {'end_date': '2023-12-09', 'facility': 'E13', 'site': 'sgp', 'start_date': '2016-11-15'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'E13' )\n", + "\n", + "date_start = '2023-12-07'\n", + "date_end = '2023-12-09'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['merged_diameter_mobility', 'merged_dN_dlogDp', 'merged_total_N_conc']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'effective_density'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'merged_diameter_mobility'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/MERGESONDE/MERGESONDE_tutorial.ipynb b/VAPs/quicklook/MERGESONDE/MERGESONDE_tutorial.ipynb new file mode 100644 index 00000000..c2f745f8 --- /dev/null +++ b/VAPs/quicklook/MERGESONDE/MERGESONDE_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# MERGESONDE1MACE.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/mergesonde) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using mergesonde1mace as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `mergesonde1mace.c1`, where `mergesonde1mace` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `fkb` and facility `M1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/fkb/fkbmergesonde1maceM1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"mergesonde1mace\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"fkb\"\n", + "facility = \"M1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/MERGESONDE/mergesonde1mace.c1.ipynb b/VAPs/quicklook/MERGESONDE/mergesonde1mace.c1.ipynb new file mode 100644 index 00000000..b8abfc16 --- /dev/null +++ b/VAPs/quicklook/MERGESONDE/mergesonde1mace.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# MERGESONDE1MACE.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/mergesonde) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'mergesonde1mace'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2007-12-31', 'facility': 'M1', 'site': 'fkb', 'start_date': '2007-04-01'}, {'end_date': '2008-12-28', 'facility': 'M1', 'site': 'hfe', 'start_date': '2008-05-15'}, {'end_date': '2012-02-09', 'facility': 'M1', 'site': 'gan', 'start_date': '2011-10-01'}, {'end_date': '2010-12-30', 'facility': 'M1', 'site': 'grw', 'start_date': '2009-05-02'}, {'end_date': '2005-09-15', 'facility': 'M1', 'site': 'pye', 'start_date': '2005-04-20'}, {'end_date': '2015-06-29', 'facility': 'C1', 'site': 'nsa', 'start_date': '2001-04-01'}, {'end_date': '2006-12-31', 'facility': 'M1', 'site': 'nim', 'start_date': '2006-01-10'}, {'end_date': '2015-06-29', 'facility': 'C1', 'site': 'sgp', 'start_date': '1996-07-15'}, {'end_date': '2014-06-29', 'facility': 'C1', 'site': 'twp', 'start_date': '2000-01-01'}, {'end_date': '2013-09-08', 'facility': 'C2', 'site': 'twp', 'start_date': '2002-01-01'}, {'end_date': '2015-01-05', 'facility': 'C3', 'site': 'twp', 'start_date': '2011-01-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2015-06-27'\n", + "date_end = '2015-06-29'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['temp', 'rh', 'vap_pres']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'precip'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'temp'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/MERGESONDE/mergesonde2mace.c1.ipynb b/VAPs/quicklook/MERGESONDE/mergesonde2mace.c1.ipynb new file mode 100644 index 00000000..8c73a979 --- /dev/null +++ b/VAPs/quicklook/MERGESONDE/mergesonde2mace.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# MERGESONDE2MACE.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/mergesonde) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'mergesonde2mace'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2005-09-15', 'facility': 'M1', 'site': 'pye', 'start_date': '2005-04-20'}, {'end_date': '2007-01-07', 'facility': 'M1', 'site': 'nim', 'start_date': '2006-01-10'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'pye', 'M1' )\n", + "\n", + "date_start = '2005-09-13'\n", + "date_end = '2005-09-15'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['temp', 'rh', 'vap_pres']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'precip'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'temp'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/MFRSRCLDOD/.ipynb_checkpoints/mfrsrcldod1min.c1-checkpoint.ipynb b/VAPs/quicklook/MFRSRCLDOD/.ipynb_checkpoints/mfrsrcldod1min.c1-checkpoint.ipynb new file mode 100644 index 00000000..41b70061 --- /dev/null +++ b/VAPs/quicklook/MFRSRCLDOD/.ipynb_checkpoints/mfrsrcldod1min.c1-checkpoint.ipynb @@ -0,0 +1,799 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# MFRSRCLDOD1MIN.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/mfrsrcldod) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'mfrsrcldod1min'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2017-11-01', 'facility': 'M1', 'site': 'asi', 'start_date': '2016-05-02'}, {'end_date': '2019-04-30', 'facility': 'M1', 'site': 'cor', 'start_date': '2018-09-18'}, {'end_date': '2013-07-01', 'facility': 'M1', 'site': 'pvc', 'start_date': '2012-07-09'}, {'end_date': '2015-08-29', 'facility': 'M1', 'site': 'mao', 'start_date': '2015-04-17'}, {'end_date': '2012-04-01', 'facility': 'M1', 'site': 'pgh', 'start_date': '2011-07-19'}, {'end_date': '2012-02-05', 'facility': 'M1', 'site': 'gan', 'start_date': '2011-10-09'}, {'end_date': '2010-12-29', 'facility': 'M1', 'site': 'grw', 'start_date': '2009-05-04'}, {'end_date': '2018-03-13', 'facility': 'S1', 'site': 'mcq', 'start_date': '2016-04-01'}, {'end_date': '2007-11-15', 'facility': 'M1', 'site': 'fkb', 'start_date': '2007-05-07'}, {'end_date': '2019-10-27', 'facility': 'C1', 'site': 'ena', 'start_date': '2014-06-01'}, {'end_date': '2005-09-15', 'facility': 'M1', 'site': 'pye', 'start_date': '2005-02-19'}, {'end_date': '2021-01-29', 'facility': 'C1', 'site': 'sgp', 'start_date': '1998-01-01'}, {'end_date': '2011-10-19', 'facility': 'E10', 'site': 'sgp', 'start_date': '1997-10-31'}, {'end_date': '2021-09-21', 'facility': 'E11', 'site': 'sgp', 'start_date': '1997-08-23'}, {'end_date': '2021-09-21', 'facility': 'E12', 'site': 'sgp', 'start_date': '2001-07-24'}, {'end_date': '2022-06-30', 'facility': 'E13', 'site': 'sgp', 'start_date': '1998-07-10'}, {'end_date': '2021-09-21', 'facility': 'E15', 'site': 'sgp', 'start_date': '1997-09-10'}, {'end_date': '2011-11-15', 'facility': 'E16', 'site': 'sgp', 'start_date': '1997-08-21'}, {'end_date': '2009-11-17', 'facility': 'E18', 'site': 'sgp', 'start_date': '1997-10-17'}, {'end_date': '2011-05-23', 'facility': 'E19', 'site': 'sgp', 'start_date': '1998-07-09'}, {'end_date': '2009-10-14', 'facility': 'E1', 'site': 'sgp', 'start_date': '1997-11-21'}, {'end_date': '2011-11-17', 'facility': 'E20', 'site': 'sgp', 'start_date': '1999-04-24'}, {'end_date': '2009-12-01', 'facility': 'E22', 'site': 'sgp', 'start_date': '1999-01-15'}, {'end_date': '2009-11-14', 'facility': 'E24', 'site': 'sgp', 'start_date': '1997-11-26'}, {'end_date': '2002-04-08', 'facility': 'E25', 'site': 'sgp', 'start_date': '1998-01-11'}, {'end_date': '2009-12-04', 'facility': 'E27', 'site': 'sgp', 'start_date': '2003-12-30'}, {'end_date': '2009-10-20', 'facility': 'E2', 'site': 'sgp', 'start_date': '1997-11-05'}, {'end_date': '2021-06-22', 'facility': 'E31', 'site': 'sgp', 'start_date': '2011-11-26'}, {'end_date': '2021-09-21', 'facility': 'E32', 'site': 'sgp', 'start_date': '2011-11-26'}, {'end_date': '2021-09-21', 'facility': 'E33', 'site': 'sgp', 'start_date': '2011-09-27'}, {'end_date': '2021-09-21', 'facility': 'E34', 'site': 'sgp', 'start_date': '2011-09-28'}, {'end_date': '2021-09-21', 'facility': 'E35', 'site': 'sgp', 'start_date': '2011-09-28'}, {'end_date': '2021-09-21', 'facility': 'E36', 'site': 'sgp', 'start_date': '2011-10-18'}, {'end_date': '2021-09-21', 'facility': 'E37', 'site': 'sgp', 'start_date': '2011-11-02'}, {'end_date': '2017-10-15', 'facility': 'E38', 'site': 'sgp', 'start_date': '2011-12-15'}, {'end_date': '2009-10-28', 'facility': 'E3', 'site': 'sgp', 'start_date': '1998-07-24'}, {'end_date': '2011-09-26', 'facility': 'E4', 'site': 'sgp', 'start_date': '1997-12-20'}, {'end_date': '2009-11-02', 'facility': 'E5', 'site': 'sgp', 'start_date': '1998-03-22'}, {'end_date': '2011-10-18', 'facility': 'E6', 'site': 'sgp', 'start_date': '2003-12-19'}, {'end_date': '2011-11-14', 'facility': 'E7', 'site': 'sgp', 'start_date': '1999-07-12'}, {'end_date': '2009-11-10', 'facility': 'E8', 'site': 'sgp', 'start_date': '1997-09-03'}, {'end_date': '2021-09-21', 'facility': 'E9', 'site': 'sgp', 'start_date': '2008-03-25'}, {'end_date': '2014-06-04', 'facility': 'C1', 'site': 'twp', 'start_date': '1999-10-22'}, {'end_date': '2013-09-09', 'facility': 'C2', 'site': 'twp', 'start_date': '1999-09-08'}, {'end_date': '2014-10-05', 'facility': 'C3', 'site': 'twp', 'start_date': '2002-03-07'}]" + ] + }, + { + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ac6764f5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The following locations and date ranges are available for this VAP:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sitefacilitystart_dateend_date
0asiM12016-05-022017-11-01
1corM12018-09-182019-04-30
2pvcM12012-07-092013-07-01
3maoM12015-04-172015-08-29
4pghM12011-07-192012-04-01
5ganM12011-10-092012-02-05
6grwM12009-05-042010-12-29
7mcqS12016-04-012018-03-13
8fkbM12007-05-072007-11-15
9enaC12014-06-012019-10-27
10pyeM12005-02-192005-09-15
11sgpC11998-01-012021-01-29
12sgpE101997-10-312011-10-19
13sgpE111997-08-232021-09-21
14sgpE122001-07-242021-09-21
15sgpE131998-07-102022-06-30
16sgpE151997-09-102021-09-21
17sgpE161997-08-212011-11-15
18sgpE181997-10-172009-11-17
19sgpE191998-07-092011-05-23
20sgpE11997-11-212009-10-14
21sgpE201999-04-242011-11-17
22sgpE221999-01-152009-12-01
23sgpE241997-11-262009-11-14
24sgpE251998-01-112002-04-08
25sgpE272003-12-302009-12-04
26sgpE21997-11-052009-10-20
27sgpE312011-11-262021-06-22
28sgpE322011-11-262021-09-21
29sgpE332011-09-272021-09-21
30sgpE342011-09-282021-09-21
31sgpE352011-09-282021-09-21
32sgpE362011-10-182021-09-21
33sgpE372011-11-022021-09-21
34sgpE382011-12-152017-10-15
35sgpE31998-07-242009-10-28
36sgpE41997-12-202011-09-26
37sgpE51998-03-222009-11-02
38sgpE62003-12-192011-10-18
39sgpE71999-07-122011-11-14
40sgpE81997-09-032009-11-10
41sgpE92008-03-252021-09-21
42twpC11999-10-222014-06-04
43twpC21999-09-082013-09-09
44twpC32002-03-072014-10-05
\n", + "
" + ], + "text/plain": [ + " site facility start_date end_date\n", + "0 asi M1 2016-05-02 2017-11-01\n", + "1 cor M1 2018-09-18 2019-04-30\n", + "2 pvc M1 2012-07-09 2013-07-01\n", + "3 mao M1 2015-04-17 2015-08-29\n", + "4 pgh M1 2011-07-19 2012-04-01\n", + "5 gan M1 2011-10-09 2012-02-05\n", + "6 grw M1 2009-05-04 2010-12-29\n", + "7 mcq S1 2016-04-01 2018-03-13\n", + "8 fkb M1 2007-05-07 2007-11-15\n", + "9 ena C1 2014-06-01 2019-10-27\n", + "10 pye M1 2005-02-19 2005-09-15\n", + "11 sgp C1 1998-01-01 2021-01-29\n", + "12 sgp E10 1997-10-31 2011-10-19\n", + "13 sgp E11 1997-08-23 2021-09-21\n", + "14 sgp E12 2001-07-24 2021-09-21\n", + "15 sgp E13 1998-07-10 2022-06-30\n", + "16 sgp E15 1997-09-10 2021-09-21\n", + "17 sgp E16 1997-08-21 2011-11-15\n", + "18 sgp E18 1997-10-17 2009-11-17\n", + "19 sgp E19 1998-07-09 2011-05-23\n", + "20 sgp E1 1997-11-21 2009-10-14\n", + "21 sgp E20 1999-04-24 2011-11-17\n", + "22 sgp E22 1999-01-15 2009-12-01\n", + "23 sgp E24 1997-11-26 2009-11-14\n", + "24 sgp E25 1998-01-11 2002-04-08\n", + "25 sgp E27 2003-12-30 2009-12-04\n", + "26 sgp E2 1997-11-05 2009-10-20\n", + "27 sgp E31 2011-11-26 2021-06-22\n", + "28 sgp E32 2011-11-26 2021-09-21\n", + "29 sgp E33 2011-09-27 2021-09-21\n", + "30 sgp E34 2011-09-28 2021-09-21\n", + "31 sgp E35 2011-09-28 2021-09-21\n", + "32 sgp E36 2011-10-18 2021-09-21\n", + "33 sgp E37 2011-11-02 2021-09-21\n", + "34 sgp E38 2011-12-15 2017-10-15\n", + "35 sgp E3 1998-07-24 2009-10-28\n", + "36 sgp E4 1997-12-20 2011-09-26\n", + "37 sgp E5 1998-03-22 2009-11-02\n", + "38 sgp E6 2003-12-19 2011-10-18\n", + "39 sgp E7 1999-07-12 2011-11-14\n", + "40 sgp E8 1997-09-03 2009-11-10\n", + "41 sgp E9 2008-03-25 2021-09-21\n", + "42 twp C1 1999-10-22 2014-06-04\n", + "43 twp C2 1999-09-08 2013-09-09\n", + "44 twp C3 2002-03-07 2014-10-05" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2021-01-27'\n", + "date_end = '2021-01-29'" + ] + }, + { + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/data/archive/sgp/sgpmfrsrcldod1minC1.c1'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['20210127', '20210128', '20210129']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "51feea2e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['/data/archive/sgp/sgpmfrsrcldod1minC1.c1/sgpmfrsrcldod1minC1.c1.20210127.000000.cdf',\n", + " '/data/archive/sgp/sgpmfrsrcldod1minC1.c1/sgpmfrsrcldod1minC1.c1.20210128.000000.cdf',\n", + " '/data/archive/sgp/sgpmfrsrcldod1minC1.c1/sgpmfrsrcldod1minC1.c1.20210129.000000.cdf']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "cannot reindex or align along dimension 'n_Io' because of conflicting dimension sizes: {113, 114}", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[7], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Load files as a single dataset\u001b[39;00m\n\u001b[1;32m 2\u001b[0m files_list \u001b[38;5;241m=\u001b[39m files_filter \n\u001b[0;32m----> 3\u001b[0m ds \u001b[38;5;241m=\u001b[39m \u001b[43mact\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mio\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marmfiles\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_netcdf\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfiles_list\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4\u001b[0m ds\u001b[38;5;241m.\u001b[39mclean\u001b[38;5;241m.\u001b[39mcleanup()\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlen\u001b[39m(files_list)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m files loaded\u001b[39m\u001b[38;5;124m'\u001b[39m)\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/io/armfiles.py:168\u001b[0m, in \u001b[0;36mread_netcdf\u001b[0;34m(filenames, concat_dim, return_None, combine, decode_times, use_cftime, use_base_time, combine_attrs, cleanup_qc, keep_variables, **kwargs)\u001b[0m\n\u001b[1;32m 164\u001b[0m ds \u001b[38;5;241m=\u001b[39m xr\u001b[38;5;241m.\u001b[39mopen_mfdataset(filenames, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 166\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 167\u001b[0m \u001b[38;5;66;03m# When all else fails raise the orginal exception\u001b[39;00m\n\u001b[0;32m--> 168\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exception\n\u001b[1;32m 170\u001b[0m \u001b[38;5;66;03m# If requested use base_time and time_offset to derive time. Assumes that the units\u001b[39;00m\n\u001b[1;32m 171\u001b[0m \u001b[38;5;66;03m# of both are in seconds and that the value is number of seconds since epoch.\u001b[39;00m\n\u001b[1;32m 172\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m use_base_time:\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/io/armfiles.py:143\u001b[0m, in \u001b[0;36mread_netcdf\u001b[0;34m(filenames, concat_dim, return_None, combine, decode_times, use_cftime, use_base_time, combine_attrs, cleanup_qc, keep_variables, **kwargs)\u001b[0m\n\u001b[1;32m 139\u001b[0m except_tuple \u001b[38;5;241m=\u001b[39m except_tuple \u001b[38;5;241m+\u001b[39m (\u001b[38;5;167;01mFileNotFoundError\u001b[39;00m, \u001b[38;5;167;01mOSError\u001b[39;00m)\n\u001b[1;32m 141\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 142\u001b[0m \u001b[38;5;66;03m# Read data file with Xarray function\u001b[39;00m\n\u001b[0;32m--> 143\u001b[0m ds \u001b[38;5;241m=\u001b[39m \u001b[43mxr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen_mfdataset\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilenames\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 145\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m except_tuple \u001b[38;5;28;01mas\u001b[39;00m exception:\n\u001b[1;32m 146\u001b[0m \u001b[38;5;66;03m# If requested return None for File not found error\u001b[39;00m\n\u001b[1;32m 147\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(exception)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mFileNotFoundError\u001b[39m\u001b[38;5;124m'\u001b[39m:\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/backends/api.py:1026\u001b[0m, in \u001b[0;36mopen_mfdataset\u001b[0;34m(paths, chunks, concat_dim, compat, preprocess, engine, data_vars, coords, combine, parallel, join, attrs_file, combine_attrs, **kwargs)\u001b[0m\n\u001b[1;32m 1013\u001b[0m combined \u001b[38;5;241m=\u001b[39m _nested_combine(\n\u001b[1;32m 1014\u001b[0m datasets,\n\u001b[1;32m 1015\u001b[0m concat_dims\u001b[38;5;241m=\u001b[39mconcat_dim,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1021\u001b[0m combine_attrs\u001b[38;5;241m=\u001b[39mcombine_attrs,\n\u001b[1;32m 1022\u001b[0m )\n\u001b[1;32m 1023\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m combine \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mby_coords\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 1024\u001b[0m \u001b[38;5;66;03m# Redo ordering from coordinates, ignoring how they were ordered\u001b[39;00m\n\u001b[1;32m 1025\u001b[0m \u001b[38;5;66;03m# previously\u001b[39;00m\n\u001b[0;32m-> 1026\u001b[0m combined \u001b[38;5;241m=\u001b[39m \u001b[43mcombine_by_coords\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1027\u001b[0m \u001b[43m \u001b[49m\u001b[43mdatasets\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1028\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1029\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_vars\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_vars\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1030\u001b[0m \u001b[43m \u001b[49m\u001b[43mcoords\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcoords\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1031\u001b[0m \u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1032\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombine_attrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcombine_attrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1033\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1034\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1035\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 1036\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m is an invalid option for the keyword argument\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1037\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m ``combine``\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(combine)\n\u001b[1;32m 1038\u001b[0m )\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/combine.py:982\u001b[0m, in \u001b[0;36mcombine_by_coords\u001b[0;34m(data_objects, compat, data_vars, coords, fill_value, join, combine_attrs, datasets)\u001b[0m\n\u001b[1;32m 980\u001b[0m concatenated_grouped_by_data_vars \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m 981\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m \u001b[38;5;28mvars\u001b[39m, datasets_with_same_vars \u001b[38;5;129;01min\u001b[39;00m grouped_by_vars:\n\u001b[0;32m--> 982\u001b[0m concatenated \u001b[38;5;241m=\u001b[39m \u001b[43m_combine_single_variable_hypercube\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 983\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mlist\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mdatasets_with_same_vars\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 984\u001b[0m \u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 985\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_vars\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_vars\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 986\u001b[0m \u001b[43m \u001b[49m\u001b[43mcoords\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcoords\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 987\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 988\u001b[0m \u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 989\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombine_attrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcombine_attrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 990\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 991\u001b[0m concatenated_grouped_by_data_vars\u001b[38;5;241m.\u001b[39mappend(concatenated)\n\u001b[1;32m 993\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m merge(\n\u001b[1;32m 994\u001b[0m concatenated_grouped_by_data_vars,\n\u001b[1;32m 995\u001b[0m compat\u001b[38;5;241m=\u001b[39mcompat,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 998\u001b[0m combine_attrs\u001b[38;5;241m=\u001b[39mcombine_attrs,\n\u001b[1;32m 999\u001b[0m )\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/combine.py:640\u001b[0m, in \u001b[0;36m_combine_single_variable_hypercube\u001b[0;34m(datasets, fill_value, data_vars, coords, compat, join, combine_attrs)\u001b[0m\n\u001b[1;32m 637\u001b[0m _check_dimension_depth_tile_ids(combined_ids)\n\u001b[1;32m 639\u001b[0m \u001b[38;5;66;03m# Concatenate along all of concat_dims one by one to create single ds\u001b[39;00m\n\u001b[0;32m--> 640\u001b[0m concatenated \u001b[38;5;241m=\u001b[39m \u001b[43m_combine_nd\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 641\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombined_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 642\u001b[0m \u001b[43m \u001b[49m\u001b[43mconcat_dims\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconcat_dims\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 643\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_vars\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_vars\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 644\u001b[0m \u001b[43m \u001b[49m\u001b[43mcoords\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcoords\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 645\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 646\u001b[0m \u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 647\u001b[0m \u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 648\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombine_attrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcombine_attrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 649\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 651\u001b[0m \u001b[38;5;66;03m# Check the overall coordinates are monotonically increasing\u001b[39;00m\n\u001b[1;32m 652\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m dim \u001b[38;5;129;01min\u001b[39;00m concat_dims:\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/combine.py:239\u001b[0m, in \u001b[0;36m_combine_nd\u001b[0;34m(combined_ids, concat_dims, data_vars, coords, compat, fill_value, join, combine_attrs)\u001b[0m\n\u001b[1;32m 235\u001b[0m \u001b[38;5;66;03m# Each iteration of this loop reduces the length of the tile_ids tuples\u001b[39;00m\n\u001b[1;32m 236\u001b[0m \u001b[38;5;66;03m# by one. It always combines along the first dimension, removing the first\u001b[39;00m\n\u001b[1;32m 237\u001b[0m \u001b[38;5;66;03m# element of the tuple\u001b[39;00m\n\u001b[1;32m 238\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m concat_dim \u001b[38;5;129;01min\u001b[39;00m concat_dims:\n\u001b[0;32m--> 239\u001b[0m combined_ids \u001b[38;5;241m=\u001b[39m \u001b[43m_combine_all_along_first_dim\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 240\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombined_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 241\u001b[0m \u001b[43m \u001b[49m\u001b[43mdim\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconcat_dim\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 242\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_vars\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_vars\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 243\u001b[0m \u001b[43m \u001b[49m\u001b[43mcoords\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcoords\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 244\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 245\u001b[0m \u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 246\u001b[0m \u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 247\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombine_attrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcombine_attrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 248\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 249\u001b[0m (combined_ds,) \u001b[38;5;241m=\u001b[39m combined_ids\u001b[38;5;241m.\u001b[39mvalues()\n\u001b[1;32m 250\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m combined_ds\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/combine.py:275\u001b[0m, in \u001b[0;36m_combine_all_along_first_dim\u001b[0;34m(combined_ids, dim, data_vars, coords, compat, fill_value, join, combine_attrs)\u001b[0m\n\u001b[1;32m 273\u001b[0m combined_ids \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mdict\u001b[39m(\u001b[38;5;28msorted\u001b[39m(group))\n\u001b[1;32m 274\u001b[0m datasets \u001b[38;5;241m=\u001b[39m combined_ids\u001b[38;5;241m.\u001b[39mvalues()\n\u001b[0;32m--> 275\u001b[0m new_combined_ids[new_id] \u001b[38;5;241m=\u001b[39m \u001b[43m_combine_1d\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 276\u001b[0m \u001b[43m \u001b[49m\u001b[43mdatasets\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdim\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata_vars\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcoords\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcombine_attrs\u001b[49m\n\u001b[1;32m 277\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 278\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m new_combined_ids\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/combine.py:298\u001b[0m, in \u001b[0;36m_combine_1d\u001b[0;34m(datasets, concat_dim, compat, data_vars, coords, fill_value, join, combine_attrs)\u001b[0m\n\u001b[1;32m 296\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m concat_dim \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 297\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 298\u001b[0m combined \u001b[38;5;241m=\u001b[39m \u001b[43mconcat\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 299\u001b[0m \u001b[43m \u001b[49m\u001b[43mdatasets\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 300\u001b[0m \u001b[43m \u001b[49m\u001b[43mdim\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconcat_dim\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 301\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_vars\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_vars\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 302\u001b[0m \u001b[43m \u001b[49m\u001b[43mcoords\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcoords\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 303\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 304\u001b[0m \u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 305\u001b[0m \u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 306\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombine_attrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcombine_attrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 307\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 308\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[1;32m 309\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mencountered unexpected variable\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mstr\u001b[39m(err):\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/concat.py:248\u001b[0m, in \u001b[0;36mconcat\u001b[0;34m(objs, dim, data_vars, coords, compat, positions, fill_value, join, combine_attrs)\u001b[0m\n\u001b[1;32m 236\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _dataarray_concat(\n\u001b[1;32m 237\u001b[0m objs,\n\u001b[1;32m 238\u001b[0m dim\u001b[38;5;241m=\u001b[39mdim,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 245\u001b[0m combine_attrs\u001b[38;5;241m=\u001b[39mcombine_attrs,\n\u001b[1;32m 246\u001b[0m )\n\u001b[1;32m 247\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(first_obj, Dataset):\n\u001b[0;32m--> 248\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_dataset_concat\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 249\u001b[0m \u001b[43m \u001b[49m\u001b[43mobjs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 250\u001b[0m \u001b[43m \u001b[49m\u001b[43mdim\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdim\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 251\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_vars\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_vars\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 252\u001b[0m \u001b[43m \u001b[49m\u001b[43mcoords\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcoords\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 253\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 254\u001b[0m \u001b[43m \u001b[49m\u001b[43mpositions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpositions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 255\u001b[0m \u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 256\u001b[0m \u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 257\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombine_attrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcombine_attrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 258\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 259\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 260\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\n\u001b[1;32m 261\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcan only concatenate xarray Dataset and DataArray \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 262\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mobjects, got \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(first_obj)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 263\u001b[0m )\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/concat.py:471\u001b[0m, in \u001b[0;36m_dataset_concat\u001b[0;34m(datasets, dim, data_vars, coords, compat, positions, fill_value, join, combine_attrs)\u001b[0m\n\u001b[1;32m 468\u001b[0m \u001b[38;5;66;03m# Make sure we're working on a copy (we'll be loading variables)\u001b[39;00m\n\u001b[1;32m 469\u001b[0m datasets \u001b[38;5;241m=\u001b[39m [ds\u001b[38;5;241m.\u001b[39mcopy() \u001b[38;5;28;01mfor\u001b[39;00m ds \u001b[38;5;129;01min\u001b[39;00m datasets]\n\u001b[1;32m 470\u001b[0m datasets \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(\n\u001b[0;32m--> 471\u001b[0m \u001b[43malign\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mdatasets\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexclude\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[43mdim\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfill_value\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 472\u001b[0m )\n\u001b[1;32m 474\u001b[0m dim_coords, dims_sizes, coord_names, data_names \u001b[38;5;241m=\u001b[39m _parse_datasets(datasets)\n\u001b[1;32m 475\u001b[0m dim_names \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m(dim_coords)\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/alignment.py:797\u001b[0m, in \u001b[0;36malign\u001b[0;34m(join, copy, indexes, exclude, fill_value, *objects)\u001b[0m\n\u001b[1;32m 601\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 602\u001b[0m \u001b[38;5;124;03mGiven any number of Dataset and/or DataArray objects, returns new\u001b[39;00m\n\u001b[1;32m 603\u001b[0m \u001b[38;5;124;03mobjects with aligned indexes and dimension sizes.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 787\u001b[0m \n\u001b[1;32m 788\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 789\u001b[0m aligner \u001b[38;5;241m=\u001b[39m Aligner(\n\u001b[1;32m 790\u001b[0m objects,\n\u001b[1;32m 791\u001b[0m join\u001b[38;5;241m=\u001b[39mjoin,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 795\u001b[0m fill_value\u001b[38;5;241m=\u001b[39mfill_value,\n\u001b[1;32m 796\u001b[0m )\n\u001b[0;32m--> 797\u001b[0m \u001b[43maligner\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43malign\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 798\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m aligner\u001b[38;5;241m.\u001b[39mresults\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/alignment.py:585\u001b[0m, in \u001b[0;36mAligner.align\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 583\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39massert_no_index_conflict()\n\u001b[1;32m 584\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39malign_indexes()\n\u001b[0;32m--> 585\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43massert_unindexed_dim_sizes_equal\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 587\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mjoin \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124moverride\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 588\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moverride_indexes()\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/alignment.py:484\u001b[0m, in \u001b[0;36mAligner.assert_unindexed_dim_sizes_equal\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 482\u001b[0m add_err_msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 483\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(sizes) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[0;32m--> 484\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 485\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcannot reindex or align along dimension \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdim\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 486\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbecause of conflicting dimension sizes: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00msizes\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m add_err_msg\n\u001b[1;32m 487\u001b[0m )\n", + "\u001b[0;31mValueError\u001b[0m: cannot reindex or align along dimension 'n_Io' because of conflicting dimension sizes: {113, 114}" + ] + } + ], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['optical_depth_instantaneous']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'optical_depth_instantaneous'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'optical_depth_instantaneous'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/MFRSRCLDOD/MFRSRCLDOD_tutorial.ipynb b/VAPs/quicklook/MFRSRCLDOD/MFRSRCLDOD_tutorial.ipynb new file mode 100644 index 00000000..ecd26e71 --- /dev/null +++ b/VAPs/quicklook/MFRSRCLDOD/MFRSRCLDOD_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# MFRSRCLDOD1MIN.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/mfrsrcldod) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using mfrsrcldod1min as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `mfrsrcldod1min.c1`, where `mfrsrcldod1min` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `asi` and facility `M1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/asi/asimfrsrcldod1minM1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"mfrsrcldod1min\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"asi\"\n", + "facility = \"M1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/MFRSRCLDOD/mfrsrcldod1min.c1.ipynb b/VAPs/quicklook/MFRSRCLDOD/mfrsrcldod1min.c1.ipynb new file mode 100644 index 00000000..8b384458 --- /dev/null +++ b/VAPs/quicklook/MFRSRCLDOD/mfrsrcldod1min.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# MFRSRCLDOD1MIN.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/mfrsrcldod) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'mfrsrcldod1min'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2017-11-01', 'facility': 'M1', 'site': 'asi', 'start_date': '2016-05-02'}, {'end_date': '2019-10-27', 'facility': 'C1', 'site': 'ena', 'start_date': '2014-06-01'}, {'end_date': '2007-11-15', 'facility': 'M1', 'site': 'fkb', 'start_date': '2007-05-07'}, {'end_date': '2015-08-29', 'facility': 'M1', 'site': 'mao', 'start_date': '2015-04-17'}, {'end_date': '2018-03-13', 'facility': 'S1', 'site': 'mcq', 'start_date': '2016-04-01'}, {'end_date': '2019-04-30', 'facility': 'M1', 'site': 'cor', 'start_date': '2018-09-18'}, {'end_date': '2012-02-05', 'facility': 'M1', 'site': 'gan', 'start_date': '2011-10-09'}, {'end_date': '2010-12-29', 'facility': 'M1', 'site': 'grw', 'start_date': '2009-05-04'}, {'end_date': '2013-07-01', 'facility': 'M1', 'site': 'pvc', 'start_date': '2012-07-09'}, {'end_date': '2005-09-15', 'facility': 'M1', 'site': 'pye', 'start_date': '2005-02-19'}, {'end_date': '2012-04-01', 'facility': 'M1', 'site': 'pgh', 'start_date': '2011-07-19'}, {'end_date': '2021-01-29', 'facility': 'C1', 'site': 'sgp', 'start_date': '1998-01-01'}, {'end_date': '2011-10-19', 'facility': 'E10', 'site': 'sgp', 'start_date': '1997-10-31'}, {'end_date': '2021-09-21', 'facility': 'E11', 'site': 'sgp', 'start_date': '1997-08-23'}, {'end_date': '2021-09-21', 'facility': 'E12', 'site': 'sgp', 'start_date': '2001-07-24'}, {'end_date': '2022-06-30', 'facility': 'E13', 'site': 'sgp', 'start_date': '1998-07-10'}, {'end_date': '2021-09-21', 'facility': 'E15', 'site': 'sgp', 'start_date': '1997-09-10'}, {'end_date': '2011-11-15', 'facility': 'E16', 'site': 'sgp', 'start_date': '1997-08-21'}, {'end_date': '2009-11-17', 'facility': 'E18', 'site': 'sgp', 'start_date': '1997-10-17'}, {'end_date': '2011-05-23', 'facility': 'E19', 'site': 'sgp', 'start_date': '1998-07-09'}, {'end_date': '2009-10-14', 'facility': 'E1', 'site': 'sgp', 'start_date': '1997-11-21'}, {'end_date': '2011-11-17', 'facility': 'E20', 'site': 'sgp', 'start_date': '1999-04-24'}, {'end_date': '2009-12-01', 'facility': 'E22', 'site': 'sgp', 'start_date': '1999-01-15'}, {'end_date': '2009-11-14', 'facility': 'E24', 'site': 'sgp', 'start_date': '1997-11-26'}, {'end_date': '2002-04-08', 'facility': 'E25', 'site': 'sgp', 'start_date': '1998-01-11'}, {'end_date': '2009-12-04', 'facility': 'E27', 'site': 'sgp', 'start_date': '2003-12-30'}, {'end_date': '2009-10-20', 'facility': 'E2', 'site': 'sgp', 'start_date': '1997-11-05'}, {'end_date': '2021-06-22', 'facility': 'E31', 'site': 'sgp', 'start_date': '2011-11-26'}, {'end_date': '2021-09-21', 'facility': 'E32', 'site': 'sgp', 'start_date': '2011-11-26'}, {'end_date': '2021-09-21', 'facility': 'E33', 'site': 'sgp', 'start_date': '2011-09-27'}, {'end_date': '2021-09-21', 'facility': 'E34', 'site': 'sgp', 'start_date': '2011-09-28'}, {'end_date': '2021-09-21', 'facility': 'E35', 'site': 'sgp', 'start_date': '2011-09-28'}, {'end_date': '2021-09-21', 'facility': 'E36', 'site': 'sgp', 'start_date': '2011-10-18'}, {'end_date': '2021-09-21', 'facility': 'E37', 'site': 'sgp', 'start_date': '2011-11-02'}, {'end_date': '2017-10-15', 'facility': 'E38', 'site': 'sgp', 'start_date': '2011-12-15'}, {'end_date': '2009-10-28', 'facility': 'E3', 'site': 'sgp', 'start_date': '1998-07-24'}, {'end_date': '2011-09-26', 'facility': 'E4', 'site': 'sgp', 'start_date': '1997-12-20'}, {'end_date': '2009-11-02', 'facility': 'E5', 'site': 'sgp', 'start_date': '1998-03-22'}, {'end_date': '2011-10-18', 'facility': 'E6', 'site': 'sgp', 'start_date': '2003-12-19'}, {'end_date': '2011-11-14', 'facility': 'E7', 'site': 'sgp', 'start_date': '1999-07-12'}, {'end_date': '2009-11-10', 'facility': 'E8', 'site': 'sgp', 'start_date': '1997-09-03'}, {'end_date': '2021-09-21', 'facility': 'E9', 'site': 'sgp', 'start_date': '2008-03-25'}, {'end_date': '2014-06-04', 'facility': 'C1', 'site': 'twp', 'start_date': '1999-10-22'}, {'end_date': '2013-09-09', 'facility': 'C2', 'site': 'twp', 'start_date': '1999-09-08'}, {'end_date': '2014-10-05', 'facility': 'C3', 'site': 'twp', 'start_date': '2002-03-07'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2021-01-27'\n", + "date_end = '2021-01-29'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['optical_depth_instantaneous']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'optical_depth_instantaneous'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'optical_depth_instantaneous'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/MICROBASE/.ipynb_checkpoints/microbasepi2.c1-checkpoint.ipynb b/VAPs/quicklook/MICROBASE/.ipynb_checkpoints/microbasepi2.c1-checkpoint.ipynb new file mode 100644 index 00000000..72c386b4 --- /dev/null +++ b/VAPs/quicklook/MICROBASE/.ipynb_checkpoints/microbasepi2.c1-checkpoint.ipynb @@ -0,0 +1,468 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# MICROBASEPI2.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/microbase) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'microbasepi2'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2011-03-22', 'facility': 'C1', 'site': 'nsa', 'start_date': '2002-01-01'}, {'end_date': '2010-12-30', 'facility': 'C1', 'site': 'sgp', 'start_date': '1996-11-08'}, {'end_date': '2011-02-25', 'facility': 'C1', 'site': 'twp', 'start_date': '1999-07-01'}, {'end_date': '2009-02-13', 'facility': 'C2', 'site': 'twp', 'start_date': '2002-01-01'}, {'end_date': '2011-02-27', 'facility': 'C3', 'site': 'twp', 'start_date': '2005-11-04'}]" + ] + }, + { + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ac6764f5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The following locations and date ranges are available for this VAP:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sitefacilitystart_dateend_date
0nsaC12002-01-012011-03-22
1sgpC11996-11-082010-12-30
2twpC11999-07-012011-02-25
3twpC22002-01-012009-02-13
4twpC32005-11-042011-02-27
\n", + "
" + ], + "text/plain": [ + " site facility start_date end_date\n", + "0 nsa C1 2002-01-01 2011-03-22\n", + "1 sgp C1 1996-11-08 2010-12-30\n", + "2 twp C1 1999-07-01 2011-02-25\n", + "3 twp C2 2002-01-01 2009-02-13\n", + "4 twp C3 2005-11-04 2011-02-27" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2010-12-28'\n", + "date_end = '2010-12-30'" + ] + }, + { + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/data/archive/sgp/sgpmicrobasepi2C1.c1'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['20101228', '20101229', '20101230']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "51feea2e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['/data/archive/sgp/sgpmicrobasepi2C1.c1/sgpmicrobasepi2C1.c1.20101228.000000.cdf',\n", + " '/data/archive/sgp/sgpmicrobasepi2C1.c1/sgpmicrobasepi2C1.c1.20101229.000000.cdf',\n", + " '/data/archive/sgp/sgpmicrobasepi2C1.c1/sgpmicrobasepi2C1.c1.20101230.000000.cdf']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "2d9b10cd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: cftime in /home/kefeimo/.conda/envs/jupyter-vaps/lib/python3.8/site-packages (1.6.2)\n", + "Requirement already satisfied: numpy>1.13.3 in /home/kefeimo/.conda/envs/jupyter-vaps/lib/python3.8/site-packages (from cftime) (1.24.2)\n" + ] + } + ], + "source": [ + "! pip install cftime " + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "Failed to decode variable 'time_offset': unable to decode time units 'seconds since base_time' with 'the default calendar'. Try opening your dataset with decode_times=False or installing cftime if it is not installed.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/coding/times.py:184\u001b[0m, in \u001b[0;36m_decode_cf_datetime_dtype\u001b[0;34m(data, units, calendar, use_cftime)\u001b[0m\n\u001b[1;32m 183\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 184\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mdecode_cf_datetime\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexample_value\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43munits\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcalendar\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43muse_cftime\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 185\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m:\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/coding/times.py:308\u001b[0m, in \u001b[0;36mdecode_cf_datetime\u001b[0;34m(num_dates, units, calendar, use_cftime)\u001b[0m\n\u001b[1;32m 307\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m use_cftime:\n\u001b[0;32m--> 308\u001b[0m dates \u001b[38;5;241m=\u001b[39m \u001b[43m_decode_datetime_with_cftime\u001b[49m\u001b[43m(\u001b[49m\u001b[43mflat_num_dates\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43munits\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcalendar\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 309\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/coding/times.py:208\u001b[0m, in \u001b[0;36m_decode_datetime_with_cftime\u001b[0;34m(num_dates, units, calendar)\u001b[0m\n\u001b[1;32m 206\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m num_dates\u001b[38;5;241m.\u001b[39msize \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 207\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m np\u001b[38;5;241m.\u001b[39masarray(\n\u001b[0;32m--> 208\u001b[0m \u001b[43mcftime\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnum2date\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnum_dates\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43munits\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcalendar\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43monly_use_cftime_datetimes\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 209\u001b[0m )\n\u001b[1;32m 210\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", + "File \u001b[0;32msrc/cftime/_cftime.pyx:580\u001b[0m, in \u001b[0;36mcftime._cftime.num2date\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32msrc/cftime/_cftime.pyx:110\u001b[0m, in \u001b[0;36mcftime._cftime._dateparse\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32msrc/cftime/_cftime.pyx:767\u001b[0m, in \u001b[0;36mcftime._cftime._parse_date\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: Unable to parse date string 'base_time'", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/conventions.py:551\u001b[0m, in \u001b[0;36mdecode_cf_variables\u001b[0;34m(variables, attributes, concat_characters, mask_and_scale, decode_times, decode_coords, drop_variables, use_cftime, decode_timedelta)\u001b[0m\n\u001b[1;32m 550\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 551\u001b[0m new_vars[k] \u001b[38;5;241m=\u001b[39m \u001b[43mdecode_cf_variable\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 552\u001b[0m \u001b[43m \u001b[49m\u001b[43mk\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 553\u001b[0m \u001b[43m \u001b[49m\u001b[43mv\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 554\u001b[0m \u001b[43m \u001b[49m\u001b[43mconcat_characters\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconcat_characters\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 555\u001b[0m \u001b[43m \u001b[49m\u001b[43mmask_and_scale\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmask_and_scale\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 556\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecode_times\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_times\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 557\u001b[0m \u001b[43m \u001b[49m\u001b[43mstack_char_dim\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstack_char_dim\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 558\u001b[0m \u001b[43m \u001b[49m\u001b[43muse_cftime\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muse_cftime\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 559\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecode_timedelta\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_timedelta\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 560\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 561\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/conventions.py:397\u001b[0m, in \u001b[0;36mdecode_cf_variable\u001b[0;34m(name, var, concat_characters, mask_and_scale, decode_times, decode_endianness, stack_char_dim, use_cftime, decode_timedelta)\u001b[0m\n\u001b[1;32m 396\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m decode_times:\n\u001b[0;32m--> 397\u001b[0m var \u001b[38;5;241m=\u001b[39m \u001b[43mtimes\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mCFDatetimeCoder\u001b[49m\u001b[43m(\u001b[49m\u001b[43muse_cftime\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muse_cftime\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdecode\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvar\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 399\u001b[0m dimensions, data, attributes, encoding \u001b[38;5;241m=\u001b[39m variables\u001b[38;5;241m.\u001b[39munpack_for_decoding(var)\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/coding/times.py:716\u001b[0m, in \u001b[0;36mCFDatetimeCoder.decode\u001b[0;34m(self, variable, name)\u001b[0m\n\u001b[1;32m 715\u001b[0m calendar \u001b[38;5;241m=\u001b[39m pop_to(attrs, encoding, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcalendar\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 716\u001b[0m dtype \u001b[38;5;241m=\u001b[39m \u001b[43m_decode_cf_datetime_dtype\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43munits\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcalendar\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43muse_cftime\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 717\u001b[0m transform \u001b[38;5;241m=\u001b[39m partial(\n\u001b[1;32m 718\u001b[0m decode_cf_datetime,\n\u001b[1;32m 719\u001b[0m units\u001b[38;5;241m=\u001b[39munits,\n\u001b[1;32m 720\u001b[0m calendar\u001b[38;5;241m=\u001b[39mcalendar,\n\u001b[1;32m 721\u001b[0m use_cftime\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39muse_cftime,\n\u001b[1;32m 722\u001b[0m )\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/coding/times.py:194\u001b[0m, in \u001b[0;36m_decode_cf_datetime_dtype\u001b[0;34m(data, units, calendar, use_cftime)\u001b[0m\n\u001b[1;32m 189\u001b[0m msg \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 190\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124munable to decode time units \u001b[39m\u001b[38;5;132;01m{\u001b[39;00munits\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m with \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcalendar_msg\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m. Try \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 191\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mopening your dataset with decode_times=False or installing cftime \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 192\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mif it is not installed.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 193\u001b[0m )\n\u001b[0;32m--> 194\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(msg)\n\u001b[1;32m 195\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", + "\u001b[0;31mValueError\u001b[0m: unable to decode time units 'seconds since base_time' with 'the default calendar'. Try opening your dataset with decode_times=False or installing cftime if it is not installed.", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[7], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Load files as a single dataset\u001b[39;00m\n\u001b[1;32m 2\u001b[0m files_list \u001b[38;5;241m=\u001b[39m files_filter \n\u001b[0;32m----> 3\u001b[0m ds \u001b[38;5;241m=\u001b[39m \u001b[43mact\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mio\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marmfiles\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_netcdf\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfiles_list\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4\u001b[0m ds\u001b[38;5;241m.\u001b[39mclean\u001b[38;5;241m.\u001b[39mcleanup()\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlen\u001b[39m(files_list)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m files loaded\u001b[39m\u001b[38;5;124m'\u001b[39m)\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/io/armfiles.py:168\u001b[0m, in \u001b[0;36mread_netcdf\u001b[0;34m(filenames, concat_dim, return_None, combine, decode_times, use_cftime, use_base_time, combine_attrs, cleanup_qc, keep_variables, **kwargs)\u001b[0m\n\u001b[1;32m 164\u001b[0m ds \u001b[38;5;241m=\u001b[39m xr\u001b[38;5;241m.\u001b[39mopen_mfdataset(filenames, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 166\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 167\u001b[0m \u001b[38;5;66;03m# When all else fails raise the orginal exception\u001b[39;00m\n\u001b[0;32m--> 168\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exception\n\u001b[1;32m 170\u001b[0m \u001b[38;5;66;03m# If requested use base_time and time_offset to derive time. Assumes that the units\u001b[39;00m\n\u001b[1;32m 171\u001b[0m \u001b[38;5;66;03m# of both are in seconds and that the value is number of seconds since epoch.\u001b[39;00m\n\u001b[1;32m 172\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m use_base_time:\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/io/armfiles.py:143\u001b[0m, in \u001b[0;36mread_netcdf\u001b[0;34m(filenames, concat_dim, return_None, combine, decode_times, use_cftime, use_base_time, combine_attrs, cleanup_qc, keep_variables, **kwargs)\u001b[0m\n\u001b[1;32m 139\u001b[0m except_tuple \u001b[38;5;241m=\u001b[39m except_tuple \u001b[38;5;241m+\u001b[39m (\u001b[38;5;167;01mFileNotFoundError\u001b[39;00m, \u001b[38;5;167;01mOSError\u001b[39;00m)\n\u001b[1;32m 141\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 142\u001b[0m \u001b[38;5;66;03m# Read data file with Xarray function\u001b[39;00m\n\u001b[0;32m--> 143\u001b[0m ds \u001b[38;5;241m=\u001b[39m \u001b[43mxr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen_mfdataset\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilenames\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 145\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m except_tuple \u001b[38;5;28;01mas\u001b[39;00m exception:\n\u001b[1;32m 146\u001b[0m \u001b[38;5;66;03m# If requested return None for File not found error\u001b[39;00m\n\u001b[1;32m 147\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(exception)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mFileNotFoundError\u001b[39m\u001b[38;5;124m'\u001b[39m:\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/backends/api.py:998\u001b[0m, in \u001b[0;36mopen_mfdataset\u001b[0;34m(paths, chunks, concat_dim, compat, preprocess, engine, data_vars, coords, combine, parallel, join, attrs_file, combine_attrs, **kwargs)\u001b[0m\n\u001b[1;32m 995\u001b[0m open_ \u001b[38;5;241m=\u001b[39m open_dataset\n\u001b[1;32m 996\u001b[0m getattr_ \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m\n\u001b[0;32m--> 998\u001b[0m datasets \u001b[38;5;241m=\u001b[39m [open_(p, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mopen_kwargs) \u001b[38;5;28;01mfor\u001b[39;00m p \u001b[38;5;129;01min\u001b[39;00m paths]\n\u001b[1;32m 999\u001b[0m closers \u001b[38;5;241m=\u001b[39m [getattr_(ds, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_close\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mfor\u001b[39;00m ds \u001b[38;5;129;01min\u001b[39;00m datasets]\n\u001b[1;32m 1000\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m preprocess \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/backends/api.py:998\u001b[0m, in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 995\u001b[0m open_ \u001b[38;5;241m=\u001b[39m open_dataset\n\u001b[1;32m 996\u001b[0m getattr_ \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m\n\u001b[0;32m--> 998\u001b[0m datasets \u001b[38;5;241m=\u001b[39m [\u001b[43mopen_\u001b[49m\u001b[43m(\u001b[49m\u001b[43mp\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mopen_kwargs\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m p \u001b[38;5;129;01min\u001b[39;00m paths]\n\u001b[1;32m 999\u001b[0m closers \u001b[38;5;241m=\u001b[39m [getattr_(ds, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_close\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mfor\u001b[39;00m ds \u001b[38;5;129;01min\u001b[39;00m datasets]\n\u001b[1;32m 1000\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m preprocess \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/backends/api.py:541\u001b[0m, in \u001b[0;36mopen_dataset\u001b[0;34m(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, backend_kwargs, **kwargs)\u001b[0m\n\u001b[1;32m 529\u001b[0m decoders \u001b[38;5;241m=\u001b[39m _resolve_decoders_kwargs(\n\u001b[1;32m 530\u001b[0m decode_cf,\n\u001b[1;32m 531\u001b[0m open_backend_dataset_parameters\u001b[38;5;241m=\u001b[39mbackend\u001b[38;5;241m.\u001b[39mopen_dataset_parameters,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 537\u001b[0m decode_coords\u001b[38;5;241m=\u001b[39mdecode_coords,\n\u001b[1;32m 538\u001b[0m )\n\u001b[1;32m 540\u001b[0m overwrite_encoded_chunks \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124moverwrite_encoded_chunks\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[0;32m--> 541\u001b[0m backend_ds \u001b[38;5;241m=\u001b[39m \u001b[43mbackend\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen_dataset\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 542\u001b[0m \u001b[43m \u001b[49m\u001b[43mfilename_or_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 543\u001b[0m \u001b[43m \u001b[49m\u001b[43mdrop_variables\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdrop_variables\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 544\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mdecoders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 545\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 546\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 547\u001b[0m ds \u001b[38;5;241m=\u001b[39m _dataset_from_backend_dataset(\n\u001b[1;32m 548\u001b[0m backend_ds,\n\u001b[1;32m 549\u001b[0m filename_or_obj,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 557\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 558\u001b[0m )\n\u001b[1;32m 559\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ds\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/backends/netCDF4_.py:592\u001b[0m, in \u001b[0;36mNetCDF4BackendEntrypoint.open_dataset\u001b[0;34m(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, group, mode, format, clobber, diskless, persist, lock, autoclose)\u001b[0m\n\u001b[1;32m 590\u001b[0m store_entrypoint \u001b[38;5;241m=\u001b[39m StoreBackendEntrypoint()\n\u001b[1;32m 591\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m close_on_error(store):\n\u001b[0;32m--> 592\u001b[0m ds \u001b[38;5;241m=\u001b[39m \u001b[43mstore_entrypoint\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen_dataset\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 593\u001b[0m \u001b[43m \u001b[49m\u001b[43mstore\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 594\u001b[0m \u001b[43m \u001b[49m\u001b[43mmask_and_scale\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmask_and_scale\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 595\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecode_times\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_times\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 596\u001b[0m \u001b[43m \u001b[49m\u001b[43mconcat_characters\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconcat_characters\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 597\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecode_coords\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_coords\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 598\u001b[0m \u001b[43m \u001b[49m\u001b[43mdrop_variables\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdrop_variables\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 599\u001b[0m \u001b[43m \u001b[49m\u001b[43muse_cftime\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muse_cftime\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 600\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecode_timedelta\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_timedelta\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 601\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 602\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ds\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/backends/store.py:35\u001b[0m, in \u001b[0;36mStoreBackendEntrypoint.open_dataset\u001b[0;34m(self, store, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta)\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;28mvars\u001b[39m, attrs \u001b[38;5;241m=\u001b[39m store\u001b[38;5;241m.\u001b[39mload()\n\u001b[1;32m 33\u001b[0m encoding \u001b[38;5;241m=\u001b[39m store\u001b[38;5;241m.\u001b[39mget_encoding()\n\u001b[0;32m---> 35\u001b[0m \u001b[38;5;28mvars\u001b[39m, attrs, coord_names \u001b[38;5;241m=\u001b[39m \u001b[43mconventions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdecode_cf_variables\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 36\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mvars\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 37\u001b[0m \u001b[43m \u001b[49m\u001b[43mattrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 38\u001b[0m \u001b[43m \u001b[49m\u001b[43mmask_and_scale\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmask_and_scale\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 39\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecode_times\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_times\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 40\u001b[0m \u001b[43m \u001b[49m\u001b[43mconcat_characters\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconcat_characters\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 41\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecode_coords\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_coords\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 42\u001b[0m \u001b[43m \u001b[49m\u001b[43mdrop_variables\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdrop_variables\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 43\u001b[0m \u001b[43m \u001b[49m\u001b[43muse_cftime\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muse_cftime\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 44\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecode_timedelta\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_timedelta\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 45\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 47\u001b[0m ds \u001b[38;5;241m=\u001b[39m Dataset(\u001b[38;5;28mvars\u001b[39m, attrs\u001b[38;5;241m=\u001b[39mattrs)\n\u001b[1;32m 48\u001b[0m ds \u001b[38;5;241m=\u001b[39m ds\u001b[38;5;241m.\u001b[39mset_coords(coord_names\u001b[38;5;241m.\u001b[39mintersection(\u001b[38;5;28mvars\u001b[39m))\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/conventions.py:562\u001b[0m, in \u001b[0;36mdecode_cf_variables\u001b[0;34m(variables, attributes, concat_characters, mask_and_scale, decode_times, decode_coords, drop_variables, use_cftime, decode_timedelta)\u001b[0m\n\u001b[1;32m 551\u001b[0m new_vars[k] \u001b[38;5;241m=\u001b[39m decode_cf_variable(\n\u001b[1;32m 552\u001b[0m k,\n\u001b[1;32m 553\u001b[0m v,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 559\u001b[0m decode_timedelta\u001b[38;5;241m=\u001b[39mdecode_timedelta,\n\u001b[1;32m 560\u001b[0m )\n\u001b[1;32m 561\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m--> 562\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(e)(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFailed to decode variable \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mk\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 563\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m decode_coords \u001b[38;5;129;01min\u001b[39;00m [\u001b[38;5;28;01mTrue\u001b[39;00m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcoordinates\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mall\u001b[39m\u001b[38;5;124m\"\u001b[39m]:\n\u001b[1;32m 564\u001b[0m var_attrs \u001b[38;5;241m=\u001b[39m new_vars[k]\u001b[38;5;241m.\u001b[39mattrs\n", + "\u001b[0;31mValueError\u001b[0m: Failed to decode variable 'time_offset': unable to decode time units 'seconds since base_time' with 'the default calendar'. Try opening your dataset with decode_times=False or installing cftime if it is not installed." + ] + } + ], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['mwr_scale_factor', 'liquid_water_content', 'aqc_liquid_water_content']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'mwr_scale_factor'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/MICROBASE/.ipynb_checkpoints/microbasepiavg.c1-checkpoint.ipynb b/VAPs/quicklook/MICROBASE/.ipynb_checkpoints/microbasepiavg.c1-checkpoint.ipynb new file mode 100644 index 00000000..6f4b3ae0 --- /dev/null +++ b/VAPs/quicklook/MICROBASE/.ipynb_checkpoints/microbasepiavg.c1-checkpoint.ipynb @@ -0,0 +1,1757 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# MICROBASEPIAVG.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/microbase) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'microbasepiavg'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2011-03-22', 'facility': 'C1', 'site': 'nsa', 'start_date': '2002-01-01'}, {'end_date': '2010-12-30', 'facility': 'C1', 'site': 'sgp', 'start_date': '1996-11-08'}, {'end_date': '2011-02-25', 'facility': 'C1', 'site': 'twp', 'start_date': '1999-07-01'}, {'end_date': '2009-02-13', 'facility': 'C2', 'site': 'twp', 'start_date': '2002-01-01'}, {'end_date': '2011-02-27', 'facility': 'C3', 'site': 'twp', 'start_date': '2005-11-04'}]" + ] + }, + { + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ac6764f5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The following locations and date ranges are available for this VAP:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sitefacilitystart_dateend_date
0nsaC12002-01-012011-03-22
1sgpC11996-11-082010-12-30
2twpC11999-07-012011-02-25
3twpC22002-01-012009-02-13
4twpC32005-11-042011-02-27
\n", + "
" + ], + "text/plain": [ + " site facility start_date end_date\n", + "0 nsa C1 2002-01-01 2011-03-22\n", + "1 sgp C1 1996-11-08 2010-12-30\n", + "2 twp C1 1999-07-01 2011-02-25\n", + "3 twp C2 2002-01-01 2009-02-13\n", + "4 twp C3 2005-11-04 2011-02-27" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2010-12-28'\n", + "date_end = '2010-12-30'" + ] + }, + { + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/data/archive/sgp/sgpmicrobasepiavgC1.c1'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['20101228', '20101229', '20101230']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "51feea2e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['/data/archive/sgp/sgpmicrobasepiavgC1.c1/sgpmicrobasepiavgC1.c1.20101228.001000.cdf',\n", + " '/data/archive/sgp/sgpmicrobasepiavgC1.c1/sgpmicrobasepiavgC1.c1.20101229.001000.cdf',\n", + " '/data/archive/sgp/sgpmicrobasepiavgC1.c1/sgpmicrobasepiavgC1.c1.20101230.001000.cdf']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3 files loaded\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:                    (time: 216, nheights: 233)\n",
+       "Coordinates:\n",
+       "  * time                       (time) datetime64[ns] 2010-12-28T00:10:00 ... ...\n",
+       "Dimensions without coordinates: nheights\n",
+       "Data variables: (12/17)\n",
+       "    base_time                  (time) datetime64[ns] 2010-12-28T00:10:00 ... ...\n",
+       "    time_offset                (time) datetime64[ns] 2010-12-28T00:10:00 ... ...\n",
+       "    Heights                    (time, nheights) float32 dask.array<chunksize=(72, 233), meta=np.ndarray>\n",
+       "    Avg_Retrieved_LWC          (time, nheights) float32 dask.array<chunksize=(72, 233), meta=np.ndarray>\n",
+       "    Avg_Retrieved_IWC          (time, nheights) float32 dask.array<chunksize=(72, 233), meta=np.ndarray>\n",
+       "    Avg_LiqEffectiveRadius     (time, nheights) float32 dask.array<chunksize=(72, 233), meta=np.ndarray>\n",
+       "    ...                         ...\n",
+       "    Integrated_CloudFraction   (time) float32 dask.array<chunksize=(72,), meta=np.ndarray>\n",
+       "    aqc_CloudFraction          (time) float32 dask.array<chunksize=(72,), meta=np.ndarray>\n",
+       "    aqc_CloudMissing           (time) float32 dask.array<chunksize=(72,), meta=np.ndarray>\n",
+       "    lat                        (time) float32 36.61 36.61 36.61 ... 36.61 36.61\n",
+       "    lon                        (time) float32 -97.49 -97.49 ... -97.49 -97.49\n",
+       "    alt                        (time) float32 318.0 318.0 318.0 ... 318.0 318.0\n",
+       "Attributes: (12/13)\n",
+       "    process_version:                $State: vap-microbasepi-1.2-1.sol5_10 $\n",
+       "    command_line:                   microbasepi -d 20101228 -f sgpC1\n",
+       "    site_id:                        sgp\n",
+       "    facility_id:                    C1: Lamont, Oklahoma\n",
+       "    input_datastreams_description:  A string consisting of the datastream(s),...\n",
+       "    input_datastreams_num:          3\n",
+       "    ...                             ...\n",
+       "    history:                        created by user dsmgr on machine garnet a...\n",
+       "    _file_dates:                    ['20101228', '20101229', '20101230']\n",
+       "    _file_times:                    ['001000', '001000', '001000']\n",
+       "    datastream:                     sgpmicrobasepiavgC1.c1\n",
+       "    _datastream:                    sgpmicrobasepiavgC1.c1\n",
+       "    _arm_standards_flag:            1
" + ], + "text/plain": [ + "\n", + "Dimensions: (time: 216, nheights: 233)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 2010-12-28T00:10:00 ... ...\n", + "Dimensions without coordinates: nheights\n", + "Data variables: (12/17)\n", + " base_time (time) datetime64[ns] 2010-12-28T00:10:00 ... ...\n", + " time_offset (time) datetime64[ns] 2010-12-28T00:10:00 ... ...\n", + " Heights (time, nheights) float32 dask.array\n", + " Avg_Retrieved_LWC (time, nheights) float32 dask.array\n", + " Avg_Retrieved_IWC (time, nheights) float32 dask.array\n", + " Avg_LiqEffectiveRadius (time, nheights) float32 dask.array\n", + " ... ...\n", + " Integrated_CloudFraction (time) float32 dask.array\n", + " aqc_CloudFraction (time) float32 dask.array\n", + " aqc_CloudMissing (time) float32 dask.array\n", + " lat (time) float32 36.61 36.61 36.61 ... 36.61 36.61\n", + " lon (time) float32 -97.49 -97.49 ... -97.49 -97.49\n", + " alt (time) float32 318.0 318.0 318.0 ... 318.0 318.0\n", + "Attributes: (12/13)\n", + " process_version: $State: vap-microbasepi-1.2-1.sol5_10 $\n", + " command_line: microbasepi -d 20101228 -f sgpC1\n", + " site_id: sgp\n", + " facility_id: C1: Lamont, Oklahoma\n", + " input_datastreams_description: A string consisting of the datastream(s),...\n", + " input_datastreams_num: 3\n", + " ... ...\n", + " history: created by user dsmgr on machine garnet a...\n", + " _file_dates: ['20101228', '20101229', '20101230']\n", + " _file_times: ['001000', '001000', '001000']\n", + " datastream: sgpmicrobasepiavgC1.c1\n", + " _datastream: sgpmicrobasepiavgC1.c1\n", + " _arm_standards_flag: 1" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['avg_retrieved_lwc', 'avg_retrieved_iwc', 'avg_liq_effective_radius']\n", + "variables_to_plot = ['avg_retrieved_iwc', 'avg_liq_effective_radius']" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "'avg_retrieved_iwc'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[11], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m ts_display\u001b[38;5;241m.\u001b[39madd_subplots((\u001b[38;5;28mlen\u001b[39m(variables_to_plot),), figsize \u001b[38;5;241m=\u001b[39m (\u001b[38;5;241m9.5\u001b[39m,\u001b[38;5;241m4\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mlen\u001b[39m(variables_to_plot)))\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i,v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(variables_to_plot):\n\u001b[0;32m----> 5\u001b[0m ts_ax \u001b[38;5;241m=\u001b[39m ts_display\u001b[38;5;241m.\u001b[39mplot(v, subplot_index\u001b[38;5;241m=\u001b[39m(i,), set_title\u001b[38;5;241m=\u001b[39m\u001b[43mds\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvariables\u001b[49m\u001b[43m[\u001b[49m\u001b[43mv\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241m.\u001b[39mattrs[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlong_name\u001b[39m\u001b[38;5;124m'\u001b[39m],)\n\u001b[1;32m 6\u001b[0m ts_ax\u001b[38;5;241m.\u001b[39mgrid()\n\u001b[1;32m 8\u001b[0m plt\u001b[38;5;241m.\u001b[39mshow()\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/utils.py:453\u001b[0m, in \u001b[0;36mFrozen.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 452\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__getitem__\u001b[39m(\u001b[38;5;28mself\u001b[39m, key: K) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m V:\n\u001b[0;32m--> 453\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmapping\u001b[49m\u001b[43m[\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m]\u001b[49m\n", + "\u001b[0;31mKeyError\u001b[0m: 'avg_retrieved_iwc'" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "fa59298dbac54ea38a2f036fff77b0e0", + "version_major": 2, + "version_minor": 0 + }, + "image/png": "", + "text/html": [ + "\n", + "
\n", + "
\n", + " Figure\n", + "
\n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'avg_retrieved_lwc'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/MICROBASE/MICROBASE_tutorial.ipynb b/VAPs/quicklook/MICROBASE/MICROBASE_tutorial.ipynb new file mode 100644 index 00000000..0db35219 --- /dev/null +++ b/VAPs/quicklook/MICROBASE/MICROBASE_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# MICROBASEPI.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/microbase) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using microbasepi as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `microbasepi.c1`, where `microbasepi` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `nsa` and facility `C1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/nsa/nsamicrobasepiC1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"microbasepi\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"nsa\"\n", + "facility = \"C1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/MICROBASE/microbasepi.c1.ipynb b/VAPs/quicklook/MICROBASE/microbasepi.c1.ipynb new file mode 100644 index 00000000..a88ae73e --- /dev/null +++ b/VAPs/quicklook/MICROBASE/microbasepi.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# MICROBASEPI.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/microbase) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'microbasepi'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2011-03-22', 'facility': 'C1', 'site': 'nsa', 'start_date': '2009-01-01'}, {'end_date': '2010-12-30', 'facility': 'C1', 'site': 'sgp', 'start_date': '2010-01-01'}, {'end_date': '2011-02-25', 'facility': 'C1', 'site': 'twp', 'start_date': '2005-03-01'}, {'end_date': '2009-02-13', 'facility': 'C2', 'site': 'twp', 'start_date': '2007-01-01'}, {'end_date': '2011-02-27', 'facility': 'C3', 'site': 'twp', 'start_date': '2010-01-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2010-12-28'\n", + "date_end = '2010-12-30'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['MwrScaleFactor', 'Retrieved_LWC', 'Retrieved_IWC']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'MwrScaleFactor'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/MICROBASE/microbasepi2.c1.ipynb b/VAPs/quicklook/MICROBASE/microbasepi2.c1.ipynb new file mode 100644 index 00000000..f4a219e1 --- /dev/null +++ b/VAPs/quicklook/MICROBASE/microbasepi2.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# MICROBASEPI2.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/microbase) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'microbasepi2'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2011-03-22', 'facility': 'C1', 'site': 'nsa', 'start_date': '2002-01-01'}, {'end_date': '2010-12-30', 'facility': 'C1', 'site': 'sgp', 'start_date': '1996-11-08'}, {'end_date': '2011-02-25', 'facility': 'C1', 'site': 'twp', 'start_date': '1999-07-01'}, {'end_date': '2009-02-13', 'facility': 'C2', 'site': 'twp', 'start_date': '2002-01-01'}, {'end_date': '2011-02-27', 'facility': 'C3', 'site': 'twp', 'start_date': '2005-11-04'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2010-12-28'\n", + "date_end = '2010-12-30'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['mwr_scale_factor', 'liquid_water_content', 'aqc_liquid_water_content']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'mwr_scale_factor'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/MICROBASE/microbasepiavg.c1.ipynb b/VAPs/quicklook/MICROBASE/microbasepiavg.c1.ipynb new file mode 100644 index 00000000..2f07f5fb --- /dev/null +++ b/VAPs/quicklook/MICROBASE/microbasepiavg.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# MICROBASEPIAVG.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/microbase) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'microbasepiavg'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2011-03-22', 'facility': 'C1', 'site': 'nsa', 'start_date': '2002-01-01'}, {'end_date': '2010-12-30', 'facility': 'C1', 'site': 'sgp', 'start_date': '1996-11-08'}, {'end_date': '2011-02-25', 'facility': 'C1', 'site': 'twp', 'start_date': '1999-07-01'}, {'end_date': '2009-02-13', 'facility': 'C2', 'site': 'twp', 'start_date': '2002-01-01'}, {'end_date': '2011-02-27', 'facility': 'C3', 'site': 'twp', 'start_date': '2005-11-04'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2010-12-28'\n", + "date_end = '2010-12-30'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['avg_retrieved_lwc', 'avg_retrieved_iwc', 'avg_liq_effective_radius']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'avg_retrieved_lwc'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/MPLAVG/MPLAVG_tutorial.ipynb b/VAPs/quicklook/MPLAVG/MPLAVG_tutorial.ipynb new file mode 100644 index 00000000..eaaa4b29 --- /dev/null +++ b/VAPs/quicklook/MPLAVG/MPLAVG_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# MPLPOLAVG.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/mplavg) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using mplpolavg as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `mplpolavg.c1`, where `mplpolavg` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `fkb` and facility `M1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/fkb/fkbmplpolavgM1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"mplpolavg\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"fkb\"\n", + "facility = \"M1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/MPLAVG/mplpolavg.c1.ipynb b/VAPs/quicklook/MPLAVG/mplpolavg.c1.ipynb new file mode 100644 index 00000000..60b74172 --- /dev/null +++ b/VAPs/quicklook/MPLAVG/mplpolavg.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# MPLPOLAVG.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/mplavg) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'mplpolavg'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2007-12-31', 'facility': 'M1', 'site': 'fkb', 'start_date': '2007-03-17'}, {'end_date': '2008-12-28', 'facility': 'M1', 'site': 'hfe', 'start_date': '2008-05-14'}, {'end_date': '2011-01-05', 'facility': 'M1', 'site': 'grw', 'start_date': '2009-04-11'}, {'end_date': '2010-07-06', 'facility': 'C1', 'site': 'nsa', 'start_date': '2006-09-28'}, {'end_date': '2010-05-31', 'facility': 'C1', 'site': 'sgp', 'start_date': '2006-06-23'}, {'end_date': '2010-11-03', 'facility': 'C1', 'site': 'twp', 'start_date': '2007-02-01'}, {'end_date': '2009-02-13', 'facility': 'C2', 'site': 'twp', 'start_date': '2006-11-27'}, {'end_date': '2011-08-17', 'facility': 'C3', 'site': 'twp', 'start_date': '2006-08-31'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2010-05-29'\n", + "date_end = '2010-05-31'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['preliminary_cbh_cross_pol', 'preliminary_cbh_co_pol']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'range_offset'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'preliminary_cbh_cross_pol'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/MPLCMASK/30smplcmask1zwang.c1.ipynb b/VAPs/quicklook/MPLCMASK/30smplcmask1zwang.c1.ipynb new file mode 100644 index 00000000..fc3c4094 --- /dev/null +++ b/VAPs/quicklook/MPLCMASK/30smplcmask1zwang.c1.ipynb @@ -0,0 +1,368 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# 30SMPLCMASK1ZWANG.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/mplcmask) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = '30smplcmask1zwang'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2019-05-01', 'facility': 'M1', 'site': 'cor', 'start_date': '2018-09-24'}, {'end_date': '2017-10-31', 'facility': 'M1', 'site': 'asi', 'start_date': '2016-07-06'}, {'end_date': '2017-01-03', 'facility': 'M1', 'site': 'awr', 'start_date': '2015-12-01'}, {'end_date': '2016-01-18', 'facility': 'S1', 'site': 'awr', 'start_date': '2015-12-06'}, {'end_date': '2015-02-12', 'facility': 'M1', 'site': 'acx', 'start_date': '2015-01-09'}, {'end_date': '2020-06-01', 'facility': 'M1', 'site': 'anx', 'start_date': '2020-02-11'}, {'end_date': '2020-06-01', 'facility': 'S2', 'site': 'anx', 'start_date': '2020-01-03'}, {'end_date': '2008-12-28', 'facility': 'M1', 'site': 'hfe', 'start_date': '2008-05-15'}, {'end_date': '2023-12-05', 'facility': 'M1', 'site': 'epc', 'start_date': '2023-01-15'}, {'end_date': '2007-12-31', 'facility': 'M1', 'site': 'fkb', 'start_date': '2007-03-17'}, {'end_date': '2018-03-24', 'facility': 'M1', 'site': 'mar', 'start_date': '2017-10-19'}, {'end_date': '2022-09-30', 'facility': 'M1', 'site': 'hou', 'start_date': '2021-08-12'}, {'end_date': '2023-12-16', 'facility': 'C1', 'site': 'ena', 'start_date': '2013-10-03'}, {'end_date': '2013-08-12', 'facility': 'M1', 'site': 'mag', 'start_date': '2012-12-13'}, {'end_date': '2015-12-01', 'facility': 'M1', 'site': 'mao', 'start_date': '2014-01-03'}, {'end_date': '2012-02-09', 'facility': 'M1', 'site': 'gan', 'start_date': '2011-09-30'}, {'end_date': '2011-01-05', 'facility': 'M1', 'site': 'grw', 'start_date': '2009-04-11'}, {'end_date': '2023-06-15', 'facility': 'M1', 'site': 'guc', 'start_date': '2021-09-01'}, {'end_date': '2020-09-04', 'facility': 'M1', 'site': 'mos', 'start_date': '2019-10-11'}, {'end_date': '2013-07-02', 'facility': 'M1', 'site': 'pvc', 'start_date': '2012-06-24'}, {'end_date': '2021-06-14', 'facility': 'M1', 'site': 'oli', 'start_date': '2013-09-12'}, {'end_date': '2023-12-16', 'facility': 'C1', 'site': 'nsa', 'start_date': '1998-05-22'}, {'end_date': '2023-12-16', 'facility': 'C1', 'site': 'sgp', 'start_date': '2010-06-01'}, {'end_date': '2014-09-13', 'facility': 'M1', 'site': 'tmp', 'start_date': '2014-01-16'}, {'end_date': '2014-07-07', 'facility': 'C1', 'site': 'twp', 'start_date': '1999-08-11'}, {'end_date': '2009-02-13', 'facility': 'C2', 'site': 'twp', 'start_date': '1998-11-19'}, {'end_date': '2015-01-06', 'facility': 'C3', 'site': 'twp', 'start_date': '2002-08-08'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2023-12-14'\n", + "date_end = '2023-12-16'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['cloud_base', 'cloud_top', 'cloud_mask']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'linear_depol_ratio'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'cloud_base'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + }, + { + "cell_type": "markdown", + "id": "706c3051", + "metadata": {}, + "source": [ + "## Backscatter Plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5f84df8e", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "backscatter_var = 'backscatter'\n", + "backscatter_fill_val = -9999.0\n", + "\n", + "# apply log function to backscatter variable\n", + "if not 'log' in ds[backscatter_var].attrs['units']:\n", + " ds = act.corrections.ceil.correct_ceil(ds, fill_value=backscatter_fill_val, var_name=backscatter_var)\n", + "\n", + "backscatter_display = act.plotting.TimeSeriesDisplay(ds, subplot_shape=(1,), figsize=(9.5, 5))\n", + "backscatter_ax = backscatter_display.plot(backscatter_var, subplot_index=(0,), set_title=ds.variables[backscatter_var].attrs['long_name'])\n", + "\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/MPLCMASK/MPLCMASK_tutorial.ipynb b/VAPs/quicklook/MPLCMASK/MPLCMASK_tutorial.ipynb new file mode 100644 index 00000000..acaf16c9 --- /dev/null +++ b/VAPs/quicklook/MPLCMASK/MPLCMASK_tutorial.ipynb @@ -0,0 +1,867 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# 30SMPLCMASK1ZWANG.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/mplcmask) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using 30smplcmask1zwang as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `30smplcmask1zwang.c1`, where `30smplcmask1zwang` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `cor` and facility `M1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/cor/cor30smplcmask1zwangM1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"30smplcmask1zwang\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"cor\"\n", + "facility = \"M1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "id": "2e2746a4", + "metadata": {}, + "source": [ + "## Backscatter Plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1eaa2114", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "backscatter_var = 'backscatter'\n", + "backscatter_fill_val = -9999.0\n", + "\n", + "# apply log function to backscatter variable\n", + "if not 'log' in ds[backscatter_var].attrs['units']:\n", + " ds = act.corrections.ceil.correct_ceil(ds, fill_value=backscatter_fill_val, var_name=backscatter_var)\n", + "\n", + "backscatter_display = act.plotting.TimeSeriesDisplay(ds, subplot_shape=(1,), figsize=(9.5, 5))\n", + "backscatter_ax = backscatter_display.plot(backscatter_var, subplot_index=(0,), set_title=ds.variables[backscatter_var].attrs['long_name'])\n", + "\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/MPLCMASKML/MPLCMASKML_tutorial.ipynb b/VAPs/quicklook/MPLCMASKML/MPLCMASKML_tutorial.ipynb new file mode 100644 index 00000000..6e0e9573 --- /dev/null +++ b/VAPs/quicklook/MPLCMASKML/MPLCMASKML_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# MPLCMASKML.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/mplcmaskml) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using mplcmaskml as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `mplcmaskml.c1`, where `mplcmaskml` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `awr` and facility `M1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/awr/awrmplcmaskmlM1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"mplcmaskml\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"awr\"\n", + "facility = \"M1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/MPLCMASKML/mplcmaskml.c1.ipynb b/VAPs/quicklook/MPLCMASKML/mplcmaskml.c1.ipynb new file mode 100644 index 00000000..795f305a --- /dev/null +++ b/VAPs/quicklook/MPLCMASKML/mplcmaskml.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# MPLCMASKML.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/mplcmaskml) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'mplcmaskml'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2017-01-02', 'facility': 'M1', 'site': 'awr', 'start_date': '2015-12-01'}, {'end_date': '2020-06-01', 'facility': 'M1', 'site': 'anx', 'start_date': '2020-02-13'}, {'end_date': '2020-06-01', 'facility': 'S2', 'site': 'anx', 'start_date': '2020-01-03'}, {'end_date': '2021-06-13', 'facility': 'M1', 'site': 'oli', 'start_date': '2013-09-13'}, {'end_date': '2023-12-04', 'facility': 'C1', 'site': 'ena', 'start_date': '2013-10-03'}, {'end_date': '2022-09-29', 'facility': 'M1', 'site': 'hou', 'start_date': '2021-08-13'}, {'end_date': '2019-04-30', 'facility': 'M1', 'site': 'cor', 'start_date': '2018-09-27'}, {'end_date': '2023-12-15', 'facility': 'C1', 'site': 'nsa', 'start_date': '2010-10-09'}, {'end_date': '2023-12-14', 'facility': 'C1', 'site': 'sgp', 'start_date': '2010-07-10'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2023-12-12'\n", + "date_end = '2023-12-14'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['cloud_mask', 'cloud_base', 'cloud_top']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'backscatter'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'cloud_mask'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/MPLNOR/.ipynb_checkpoints/mplnor1camp.c1-checkpoint.ipynb b/VAPs/quicklook/MPLNOR/.ipynb_checkpoints/mplnor1camp.c1-checkpoint.ipynb new file mode 100644 index 00000000..d37d9027 --- /dev/null +++ b/VAPs/quicklook/MPLNOR/.ipynb_checkpoints/mplnor1camp.c1-checkpoint.ipynb @@ -0,0 +1,1732 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# MPLNOR1CAMP.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/mplnor) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'mplnor1camp'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2004-05-11', 'facility': 'C1', 'site': 'sgp', 'start_date': '1996-05-01'}, {'end_date': '1999-11-18', 'facility': 'C2', 'site': 'twp', 'start_date': '1998-11-20'}]" + ] + }, + { + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ac6764f5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The following locations and date ranges are available for this VAP:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sitefacilitystart_dateend_date
0sgpC11996-05-012004-05-11
1twpC21998-11-201999-11-18
\n", + "
" + ], + "text/plain": [ + " site facility start_date end_date\n", + "0 sgp C1 1996-05-01 2004-05-11\n", + "1 twp C2 1998-11-20 1999-11-18" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2004-05-10'\n", + "date_end = '2004-05-11'" + ] + }, + { + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/data/archive/sgp/sgpmplnor1campC1.c1'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['20040510', '20040511']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "51feea2e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['/data/archive/sgp/sgpmplnor1campC1.c1/sgpmplnor1campC1.c1.20040510.000020.cdf',\n", + " '/data/archive/sgp/sgpmplnor1campC1.c1/sgpmplnor1campC1.c1.20040511.000011.cdf']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "77 files loaded\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:                     (time: 1420, height: 445, nlayers: 5)\n",
+       "Coordinates:\n",
+       "  * height                      (height) float32 0.02998 0.1199 ... 39.88 39.97\n",
+       "  * time                        (time) datetime64[ns] 2004-05-10T00:00:20 ......\n",
+       "Dimensions without coordinates: nlayers\n",
+       "Data variables: (12/20)\n",
+       "    base_time                   datetime64[ns] 2004-05-10T00:00:20\n",
+       "    time_offset                 (time) datetime64[ns] 2004-05-10T00:00:20 ......\n",
+       "    backscatter                 (time, height) float32 dask.array<chunksize=(1420, 445), meta=np.ndarray>\n",
+       "    background_signal           (time) float32 dask.array<chunksize=(1420,), meta=np.ndarray>\n",
+       "    cloud_base_height           (time, nlayers) float32 dask.array<chunksize=(1420, 5), meta=np.ndarray>\n",
+       "    cloud_top_height            (time, nlayers) float32 dask.array<chunksize=(1420, 5), meta=np.ndarray>\n",
+       "    ...                          ...\n",
+       "    detector_temp               (time) float32 dask.array<chunksize=(1420,), meta=np.ndarray>\n",
+       "    instrument_temp             (time) float32 dask.array<chunksize=(1420,), meta=np.ndarray>\n",
+       "    laser_temp                  (time) float32 dask.array<chunksize=(1420,), meta=np.ndarray>\n",
+       "    lat                         float32 ...\n",
+       "    lon                         float32 ...\n",
+       "    alt                         float32 ...\n",
+       "Attributes: (12/24)\n",
+       "    Date:                        Tue May 11 19:11:20 2004\n",
+       "    Version:                     $State: process-vap-mplnor-2.9-0 $\n",
+       "    Command_Line:                mplnor -d 20040510\n",
+       "    Input_Platforms:             sgpmplC1.a1\n",
+       "    BW_Version:                  Working_4_1\n",
+       "    Comment:                     Pass-through VAP to improve the data quality\n",
+       "    ...                          ...\n",
+       "    history:                     created by user dsmgr on machine fore at 11-...\n",
+       "    _file_dates:                 ['20040510']\n",
+       "    _file_times:                 ['000020']\n",
+       "    datastream:                  sgpmplnor1campC1.c1\n",
+       "    _datastream:                 sgpmplnor1campC1.c1\n",
+       "    _arm_standards_flag:         1
" + ], + "text/plain": [ + "\n", + "Dimensions: (time: 1420, height: 445, nlayers: 5)\n", + "Coordinates:\n", + " * height (height) float32 0.02998 0.1199 ... 39.88 39.97\n", + " * time (time) datetime64[ns] 2004-05-10T00:00:20 ......\n", + "Dimensions without coordinates: nlayers\n", + "Data variables: (12/20)\n", + " base_time datetime64[ns] 2004-05-10T00:00:20\n", + " time_offset (time) datetime64[ns] 2004-05-10T00:00:20 ......\n", + " backscatter (time, height) float32 dask.array\n", + " background_signal (time) float32 dask.array\n", + " cloud_base_height (time, nlayers) float32 dask.array\n", + " cloud_top_height (time, nlayers) float32 dask.array\n", + " ... ...\n", + " detector_temp (time) float32 dask.array\n", + " instrument_temp (time) float32 dask.array\n", + " laser_temp (time) float32 dask.array\n", + " lat float32 ...\n", + " lon float32 ...\n", + " alt float32 ...\n", + "Attributes: (12/24)\n", + " Date: Tue May 11 19:11:20 2004\n", + " Version: $State: process-vap-mplnor-2.9-0 $\n", + " Command_Line: mplnor -d 20040510\n", + " Input_Platforms: sgpmplC1.a1\n", + " BW_Version: Working_4_1\n", + " Comment: Pass-through VAP to improve the data quality\n", + " ... ...\n", + " history: created by user dsmgr on machine fore at 11-...\n", + " _file_dates: ['20040510']\n", + " _file_times: ['000020']\n", + " datastream: sgpmplnor1campC1.c1\n", + " _datastream: sgpmplnor1campC1.c1\n", + " _arm_standards_flag: 1" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter[0]\n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['backscatter', 'cloud_base_height', 'cloud_top_height']" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b62b704e7cb241cea9f9f03bf8d662ea", + "version_major": 2, + "version_minor": 0 + }, + "image/png": "", + "text/html": [ + "\n", + "
\n", + "
\n", + " Figure\n", + "
\n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "ec47d67b4aed4584b5a963f8c19161df", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "AppLayout(children=(Dropdown(description='Field:', index=1, layout=Layout(grid_area='header', margin='0px 30% …" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'backscatter'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/MPLNOR/MPLNOR_tutorial.ipynb b/VAPs/quicklook/MPLNOR/MPLNOR_tutorial.ipynb new file mode 100644 index 00000000..13ff4d43 --- /dev/null +++ b/VAPs/quicklook/MPLNOR/MPLNOR_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# MPLNOR1CAMP.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/mplnor) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using mplnor1camp as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `mplnor1camp.c1`, where `mplnor1camp` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `sgp` and facility `C1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/sgp/sgpmplnor1campC1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"mplnor1camp\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"sgp\"\n", + "facility = \"C1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/MPLNOR/mplnor1camp.c1.ipynb b/VAPs/quicklook/MPLNOR/mplnor1camp.c1.ipynb new file mode 100644 index 00000000..ebfd1041 --- /dev/null +++ b/VAPs/quicklook/MPLNOR/mplnor1camp.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# MPLNOR1CAMP.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/mplnor) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'mplnor1camp'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2004-05-11', 'facility': 'C1', 'site': 'sgp', 'start_date': '1996-05-01'}, {'end_date': '1999-11-18', 'facility': 'C2', 'site': 'twp', 'start_date': '1998-11-20'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2004-05-10'\n", + "date_end = '2004-05-11'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['backscatter', 'cloud_base_height', 'cloud_top_height']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'backscatter'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/MWRRET/MWRRET_tutorial.ipynb b/VAPs/quicklook/MWRRET/MWRRET_tutorial.ipynb new file mode 100644 index 00000000..735e20d6 --- /dev/null +++ b/VAPs/quicklook/MWRRET/MWRRET_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# MWRRET1LILJCLOU.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/mwrret) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using mwrret1liljclou as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `mwrret1liljclou.c1`, where `mwrret1liljclou` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `asi` and facility `M1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/asi/asimwrret1liljclouM1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"mwrret1liljclou\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"asi\"\n", + "facility = \"M1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/MWRRET/mwrret1liljclou.c1.ipynb b/VAPs/quicklook/MWRRET/mwrret1liljclou.c1.ipynb new file mode 100644 index 00000000..409a8fc3 --- /dev/null +++ b/VAPs/quicklook/MWRRET/mwrret1liljclou.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# MWRRET1LILJCLOU.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/mwrret) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'mwrret1liljclou'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2017-11-01', 'facility': 'M1', 'site': 'asi', 'start_date': '2016-05-21'}, {'end_date': '2017-10-31', 'facility': 'S1', 'site': 'asi', 'start_date': '2016-05-28'}, {'end_date': '2012-02-08', 'facility': 'M1', 'site': 'gan', 'start_date': '2011-10-05'}, {'end_date': '2023-12-14', 'facility': 'C1', 'site': 'nsa', 'start_date': '2011-01-01'}, {'end_date': '2023-12-14', 'facility': 'C1', 'site': 'sgp', 'start_date': '2011-01-01'}, {'end_date': '2014-05-19', 'facility': 'C1', 'site': 'twp', 'start_date': '2011-01-01'}, {'end_date': '2013-09-08', 'facility': 'C2', 'site': 'twp', 'start_date': '2009-02-15'}, {'end_date': '2015-01-04', 'facility': 'C3', 'site': 'twp', 'start_date': '2011-01-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2023-12-12'\n", + "date_end = '2023-12-14'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['be_pwv', 'be_lwp']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'be_pwv'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'be_pwv'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/MWRRET/mwrret1liljclou.c2.ipynb b/VAPs/quicklook/MWRRET/mwrret1liljclou.c2.ipynb new file mode 100644 index 00000000..182a4cd1 --- /dev/null +++ b/VAPs/quicklook/MWRRET/mwrret1liljclou.c2.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# MWRRET1LILJCLOU.C2 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/mwrret) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'mwrret1liljclou'\n", + "DATA_LEVEL = 'c2'\n", + "LOCATIONS = [{'end_date': '2017-01-01', 'facility': 'M1', 'site': 'awr', 'start_date': '2016-01-30'}, {'end_date': '2016-01-17', 'facility': 'S1', 'site': 'awr', 'start_date': '2015-12-06'}, {'end_date': '2020-06-01', 'facility': 'M1', 'site': 'anx', 'start_date': '2019-12-01'}, {'end_date': '2023-06-16', 'facility': 'M1', 'site': 'guc', 'start_date': '2021-09-01'}, {'end_date': '2022-10-01', 'facility': 'M1', 'site': 'hou', 'start_date': '2021-08-28'}, {'end_date': '2008-01-01', 'facility': 'M1', 'site': 'fkb', 'start_date': '2007-03-14'}, {'end_date': '2015-12-01', 'facility': 'M1', 'site': 'mao', 'start_date': '2014-01-08'}, {'end_date': '2018-03-23', 'facility': 'M1', 'site': 'mar', 'start_date': '2017-10-21'}, {'end_date': '2018-03-09', 'facility': 'S1', 'site': 'mcq', 'start_date': '2016-12-10'}, {'end_date': '2008-12-24', 'facility': 'M1', 'site': 'hfe', 'start_date': '2008-05-09'}, {'end_date': '2019-05-01', 'facility': 'M1', 'site': 'cor', 'start_date': '2018-09-23'}, {'end_date': '2013-09-26', 'facility': 'M1', 'site': 'mag', 'start_date': '2012-10-05'}, {'end_date': '2011-01-06', 'facility': 'M1', 'site': 'grw', 'start_date': '2009-04-30'}, {'end_date': '2013-05-20', 'facility': 'M1', 'site': 'pvc', 'start_date': '2012-06-29'}, {'end_date': '2005-09-15', 'facility': 'M1', 'site': 'pye', 'start_date': '2005-02-01'}, {'end_date': '2011-04-25', 'facility': 'M1', 'site': 'sbs', 'start_date': '2010-09-27'}, {'end_date': '2020-10-01', 'facility': 'M1', 'site': 'mos', 'start_date': '2019-10-11'}, {'end_date': '2012-03-27', 'facility': 'M1', 'site': 'pgh', 'start_date': '2011-06-20'}, {'end_date': '2022-12-31', 'facility': 'C1', 'site': 'nsa', 'start_date': '2001-04-01'}, {'end_date': '2010-12-31', 'facility': 'C2', 'site': 'nsa', 'start_date': '2001-04-01'}, {'end_date': '2007-01-08', 'facility': 'M1', 'site': 'nim', 'start_date': '2005-11-28'}, {'end_date': '2009-10-16', 'facility': 'B1', 'site': 'sgp', 'start_date': '2001-04-01'}, {'end_date': '2009-04-20', 'facility': 'B4', 'site': 'sgp', 'start_date': '2001-04-01'}, {'end_date': '2009-04-21', 'facility': 'B5', 'site': 'sgp', 'start_date': '2001-04-01'}, {'end_date': '2009-01-29', 'facility': 'B6', 'site': 'sgp', 'start_date': '2001-04-01'}, {'end_date': '2021-12-31', 'facility': 'C1', 'site': 'sgp', 'start_date': '1996-09-01'}, {'end_date': '2014-05-18', 'facility': 'C1', 'site': 'twp', 'start_date': '1996-10-11'}, {'end_date': '2009-02-14', 'facility': 'C2', 'site': 'twp', 'start_date': '1998-10-28'}, {'end_date': '2013-12-31', 'facility': 'C3', 'site': 'twp', 'start_date': '2002-03-12'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'B1' )\n", + "\n", + "date_start = '2009-10-14'\n", + "date_end = '2009-10-16'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['be_pwv', 'be_lwp']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'be_pwv'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'be_pwv'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/MWRRETV2/MWRRETV2_tutorial.ipynb b/VAPs/quicklook/MWRRETV2/MWRRETV2_tutorial.ipynb new file mode 100644 index 00000000..9f78a297 --- /dev/null +++ b/VAPs/quicklook/MWRRETV2/MWRRETV2_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# MWRRET2TURN.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/mwrretv2) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using mwrret2turn as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `mwrret2turn.c1`, where `mwrret2turn` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `oli` and facility `M1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/oli/olimwrret2turnM1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"mwrret2turn\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"oli\"\n", + "facility = \"M1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/MWRRETV2/mwrret2turn.c1.ipynb b/VAPs/quicklook/MWRRETV2/mwrret2turn.c1.ipynb new file mode 100644 index 00000000..a7c4f415 --- /dev/null +++ b/VAPs/quicklook/MWRRETV2/mwrret2turn.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# MWRRET2TURN.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/mwrretv2) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'mwrret2turn'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2019-09-30', 'facility': 'M1', 'site': 'oli', 'start_date': '2014-11-11'}, {'end_date': '2022-07-31', 'facility': 'C1', 'site': 'ena', 'start_date': '2014-05-01'}, {'end_date': '2022-10-01', 'facility': 'M1', 'site': 'hou', 'start_date': '2021-10-01'}, {'end_date': '2019-10-31', 'facility': 'C1', 'site': 'sgp', 'start_date': '2011-10-01'}, {'end_date': '2022-08-31', 'facility': 'E32', 'site': 'sgp', 'start_date': '2016-06-29'}, {'end_date': '2022-07-08', 'facility': 'E37', 'site': 'sgp', 'start_date': '2016-05-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2019-10-29'\n", + "date_end = '2019-10-31'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['phys_pwv', 'phys_pwv_uncertainty', 'phys_lwp']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'phys_pwv'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'phys_pwv'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/NAVMET-AIR/NAVMET-AIR_tutorial.ipynb b/VAPs/quicklook/NAVMET-AIR/NAVMET-AIR_tutorial.ipynb new file mode 100644 index 00000000..9cc0606e --- /dev/null +++ b/VAPs/quicklook/NAVMET-AIR/NAVMET-AIR_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AAFNAVIWG.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/navmet-air) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using aafnaviwg as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `aafnaviwg.c1`, where `aafnaviwg` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `cor` and facility `F1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/cor/coraafnaviwgF1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"aafnaviwg\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"cor\"\n", + "facility = \"F1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/NAVMET-AIR/aafnaviwg.c1.ipynb b/VAPs/quicklook/NAVMET-AIR/aafnaviwg.c1.ipynb new file mode 100644 index 00000000..b8e4dbdc --- /dev/null +++ b/VAPs/quicklook/NAVMET-AIR/aafnaviwg.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AAFNAVIWG.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/navmet-air) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'aafnaviwg'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2018-12-08', 'facility': 'F1', 'site': 'cor', 'start_date': '2018-11-04'}, {'end_date': '2018-02-19', 'facility': 'F1', 'site': 'ena', 'start_date': '2017-03-31'}, {'end_date': '2016-09-22', 'facility': 'F1', 'site': 'sgp', 'start_date': '2016-04-25'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'F1' )\n", + "\n", + "date_start = '2016-09-20'\n", + "date_end = '2016-09-22'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['wgs_alt', 'press_alt', 'radar_alt']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'wgs_alt'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/NDROP/NDROP_tutorial.ipynb b/VAPs/quicklook/NDROP/NDROP_tutorial.ipynb new file mode 100644 index 00000000..6916edf6 --- /dev/null +++ b/VAPs/quicklook/NDROP/NDROP_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# NDROPMFRSR.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/ndrop) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using ndropmfrsr as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `ndropmfrsr.c1`, where `ndropmfrsr` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `asi` and facility `M1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/asi/asindropmfrsrM1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"ndropmfrsr\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"asi\"\n", + "facility = \"M1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/NDROP/ndropmfrsr.c1.ipynb b/VAPs/quicklook/NDROP/ndropmfrsr.c1.ipynb new file mode 100644 index 00000000..82b42fe9 --- /dev/null +++ b/VAPs/quicklook/NDROP/ndropmfrsr.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# NDROPMFRSR.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/ndrop) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'ndropmfrsr'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2017-10-31', 'facility': 'M1', 'site': 'asi', 'start_date': '2016-05-23'}, {'end_date': '2019-10-30', 'facility': 'C1', 'site': 'ena', 'start_date': '2014-06-01'}, {'end_date': '2010-12-29', 'facility': 'M1', 'site': 'grw', 'start_date': '2009-05-04'}, {'end_date': '2015-05-18', 'facility': 'E13', 'site': 'sgp', 'start_date': '1999-01-01'}, {'end_date': '2020-09-30', 'facility': 'C1', 'site': 'sgp', 'start_date': '1998-01-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'E13' )\n", + "\n", + "date_start = '2015-05-16'\n", + "date_end = '2015-05-18'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['drop_number_conc', 'drop_number_conc_adiabatic', 'lwp_adiabatic']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'optical_depth_instantaneous'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'drop_number_conc'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/NEPHELOMETER/NEPHELOMETER_tutorial.ipynb b/VAPs/quicklook/NEPHELOMETER/NEPHELOMETER_tutorial.ipynb new file mode 100644 index 00000000..d0272bcb --- /dev/null +++ b/VAPs/quicklook/NEPHELOMETER/NEPHELOMETER_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AOSNEPHDRY.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/nephelometer) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using aosnephdry as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `aosnephdry.c1`, where `aosnephdry` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `pvc` and facility `M1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/pvc/pvcaosnephdryM1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"aosnephdry\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"pvc\"\n", + "facility = \"M1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/NEPHELOMETER/aosnephdry.c1.ipynb b/VAPs/quicklook/NEPHELOMETER/aosnephdry.c1.ipynb new file mode 100644 index 00000000..67145537 --- /dev/null +++ b/VAPs/quicklook/NEPHELOMETER/aosnephdry.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AOSNEPHDRY.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/nephelometer) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'aosnephdry'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2013-06-24', 'facility': 'M1', 'site': 'pvc', 'start_date': '2012-07-16'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'pvc', 'M1' )\n", + "\n", + "date_start = '2013-06-22'\n", + "date_end = '2013-06-24'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['Bs_B_Dry_Neph3W_1', 'Bs_G_Dry_Neph3W_1', 'Bs_R_Dry_Neph3W_1']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'Bs_B_Dry_Neph3W_1'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'Bs_B_Dry_Neph3W_1'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/NEPHELOMETER/aosnephwet.c1.ipynb b/VAPs/quicklook/NEPHELOMETER/aosnephwet.c1.ipynb new file mode 100644 index 00000000..44b1a62f --- /dev/null +++ b/VAPs/quicklook/NEPHELOMETER/aosnephwet.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AOSNEPHWET.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/nephelometer) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'aosnephwet'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2013-06-24', 'facility': 'M1', 'site': 'pvc', 'start_date': '2012-07-16'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'pvc', 'M1' )\n", + "\n", + "date_start = '2013-06-22'\n", + "date_end = '2013-06-24'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['Bs_B_Wet_Neph3W_2', 'Bs_G_Wet_Neph3W_2', 'Bs_R_Wet_Neph3W_2']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'Bs_B_Wet_Neph3W_2'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'Bs_B_Wet_Neph3W_2'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/OKMSOIL/.ipynb_checkpoints/okmsoil.c1-checkpoint.ipynb b/VAPs/quicklook/OKMSOIL/.ipynb_checkpoints/okmsoil.c1-checkpoint.ipynb new file mode 100644 index 00000000..b2aa8667 --- /dev/null +++ b/VAPs/quicklook/OKMSOIL/.ipynb_checkpoints/okmsoil.c1-checkpoint.ipynb @@ -0,0 +1,2048 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# OKMSOIL.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/okmsoil) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'okmsoil'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2020-10-22', 'facility': 'X1', 'site': 'sgp', 'start_date': '1998-01-01'}]" + ] + }, + { + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ac6764f5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The following locations and date ranges are available for this VAP:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sitefacilitystart_dateend_date
0sgpX11998-01-012020-10-22
\n", + "
" + ], + "text/plain": [ + " site facility start_date end_date\n", + "0 sgp X1 1998-01-01 2020-10-22" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'X1' )\n", + "\n", + "date_start = '2020-10-20'\n", + "date_end = '2020-10-22'" + ] + }, + { + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/data/archive/sgp/sgpokmsoilX1.c1'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['20201020', '20201021', '20201022']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "51feea2e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['/data/archive/sgp/sgpokmsoilX1.c1/sgpokmsoilX1.c1.20201020.000000.nc',\n", + " '/data/archive/sgp/sgpokmsoilX1.c1/sgpokmsoilX1.c1.20201021.000000.nc',\n", + " '/data/archive/sgp/sgpokmsoilX1.c1/sgpokmsoilX1.c1.20201022.000000.nc']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3 files loaded\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:                      (time: 144, bound: 2, station_number: 133,\n",
+       "                                  depth: 3)\n",
+       "Coordinates:\n",
+       "  * time                         (time) datetime64[ns] 2020-10-20 ... 2020-10...\n",
+       "  * station_number               (station_number) float32 110.0 1.0 ... 108.0\n",
+       "  * depth                        (depth) int32 5 25 60\n",
+       "Dimensions without coordinates: bound\n",
+       "Data variables: (12/15)\n",
+       "    base_time                    (time) datetime64[ns] 2020-10-20 ... 2020-10-22\n",
+       "    time_offset                  (time) datetime64[ns] 2020-10-20 ... 2020-10...\n",
+       "    time_bounds                  (time, bound) object dask.array<chunksize=(48, 2), meta=np.ndarray>\n",
+       "    sensor_temperature_rise      (time, depth, station_number) float32 dask.array<chunksize=(48, 3, 133), meta=np.ndarray>\n",
+       "    qc_sensor_temperature_rise   (time, depth, station_number) int32 dask.array<chunksize=(48, 3, 133), meta=np.ndarray>\n",
+       "    matric_potential             (time, depth, station_number) float32 dask.array<chunksize=(48, 3, 133), meta=np.ndarray>\n",
+       "    ...                           ...\n",
+       "    fractional_water_index       (time, depth, station_number) float32 dask.array<chunksize=(48, 3, 133), meta=np.ndarray>\n",
+       "    qc_fractional_water_index    (time, depth, station_number) int32 dask.array<chunksize=(48, 3, 133), meta=np.ndarray>\n",
+       "    station                      (time, station_number) |S20 dask.array<chunksize=(48, 133), meta=np.ndarray>\n",
+       "    lat                          (time, station_number) float32 dask.array<chunksize=(48, 133), meta=np.ndarray>\n",
+       "    lon                          (time, station_number) float32 dask.array<chunksize=(48, 133), meta=np.ndarray>\n",
+       "    alt                          (time, station_number) float32 dask.array<chunksize=(48, 133), meta=np.ndarray>\n",
+       "Attributes: (12/53)\n",
+       "    Version:                          $State: xdc-sgp30okm-9.0-1.el5 $\n",
+       "    command_line:                     idl -R -n okmsoil -s sgp -f X1 -b 20201...\n",
+       "    dod_version:                      okmsoil-c1-1.0\n",
+       "    date:                             \n",
+       "    process_version:                  vap-okmsoil-1.0-1.el7\n",
+       "    idl_version:                      \n",
+       "    ...                               ...\n",
+       "    doi:                              10.5439/1432043\n",
+       "    history:                          created by user dsmgr on machine flint ...\n",
+       "    _file_dates:                      ['20201020', '20201021', '20201022']\n",
+       "    _file_times:                      ['000000', '000000', '000000']\n",
+       "    _datastream:                      sgpokmsoilX1.c1\n",
+       "    _arm_standards_flag:              1
" + ], + "text/plain": [ + "\n", + "Dimensions: (time: 144, bound: 2, station_number: 133,\n", + " depth: 3)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 2020-10-20 ... 2020-10...\n", + " * station_number (station_number) float32 110.0 1.0 ... 108.0\n", + " * depth (depth) int32 5 25 60\n", + "Dimensions without coordinates: bound\n", + "Data variables: (12/15)\n", + " base_time (time) datetime64[ns] 2020-10-20 ... 2020-10-22\n", + " time_offset (time) datetime64[ns] 2020-10-20 ... 2020-10...\n", + " time_bounds (time, bound) object dask.array\n", + " sensor_temperature_rise (time, depth, station_number) float32 dask.array\n", + " qc_sensor_temperature_rise (time, depth, station_number) int32 dask.array\n", + " matric_potential (time, depth, station_number) float32 dask.array\n", + " ... ...\n", + " fractional_water_index (time, depth, station_number) float32 dask.array\n", + " qc_fractional_water_index (time, depth, station_number) int32 dask.array\n", + " station (time, station_number) |S20 dask.array\n", + " lat (time, station_number) float32 dask.array\n", + " lon (time, station_number) float32 dask.array\n", + " alt (time, station_number) float32 dask.array\n", + "Attributes: (12/53)\n", + " Version: $State: xdc-sgp30okm-9.0-1.el5 $\n", + " command_line: idl -R -n okmsoil -s sgp -f X1 -b 20201...\n", + " dod_version: okmsoil-c1-1.0\n", + " date: \n", + " process_version: vap-okmsoil-1.0-1.el7\n", + " idl_version: \n", + " ... ...\n", + " doi: 10.5439/1432043\n", + " history: created by user dsmgr on machine flint ...\n", + " _file_dates: ['20201020', '20201021', '20201022']\n", + " _file_times: ['000000', '000000', '000000']\n", + " _datastream: sgpokmsoilX1.c1\n", + " _arm_standards_flag: 1" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['sensor_temperature_rise', 'matric_potential', 'volumetric_water_content']" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "Dimensions of C (133, 3, 144) should be one smaller than X(144) and Y(3) while using shading='flat' see help(pcolormesh)", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[9], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m ts_display\u001b[38;5;241m.\u001b[39madd_subplots((\u001b[38;5;28mlen\u001b[39m(variables_to_plot),), figsize \u001b[38;5;241m=\u001b[39m (\u001b[38;5;241m9.5\u001b[39m,\u001b[38;5;241m4\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mlen\u001b[39m(variables_to_plot)))\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i,v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(variables_to_plot):\n\u001b[0;32m----> 5\u001b[0m ts_ax \u001b[38;5;241m=\u001b[39m \u001b[43mts_display\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mplot\u001b[49m\u001b[43m(\u001b[49m\u001b[43mv\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msubplot_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mi\u001b[49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mset_title\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mds\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvariables\u001b[49m\u001b[43m[\u001b[49m\u001b[43mv\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mattrs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mlong_name\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6\u001b[0m ts_ax\u001b[38;5;241m.\u001b[39mgrid()\n\u001b[1;32m 8\u001b[0m plt\u001b[38;5;241m.\u001b[39mshow()\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/plotting/timeseriesdisplay.py:588\u001b[0m, in \u001b[0;36mTimeSeriesDisplay.plot\u001b[0;34m(self, field, dsname, subplot_index, cmap, set_title, add_nan, day_night_background, invert_y_axis, abs_limits, time_rng, y_rng, use_var_for_y, set_shading, assessment_overplot, overplot_marker, overplot_behind, overplot_markersize, assessment_overplot_category, assessment_overplot_category_color, force_line_plot, labels, cbar_label, cbar_h_adjust, secondary_y, y_axis_flag_meanings, colorbar_labels, cb_friendly, **kwargs)\u001b[0m\n\u001b[1;32m 586\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 587\u001b[0m kwargs[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mface\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m--> 588\u001b[0m mesh \u001b[38;5;241m=\u001b[39m \u001b[43max\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpcolormesh\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 589\u001b[0m \u001b[43m \u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43masarray\u001b[49m\u001b[43m(\u001b[49m\u001b[43mxdata\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 590\u001b[0m \u001b[43m \u001b[49m\u001b[43mydata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 591\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtranspose\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 592\u001b[0m \u001b[43m \u001b[49m\u001b[43mshading\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mset_shading\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 593\u001b[0m \u001b[43m \u001b[49m\u001b[43mcmap\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcmap\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 594\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 595\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 597\u001b[0m \u001b[38;5;66;03m# Set Title\u001b[39;00m\n\u001b[1;32m 598\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m set_title \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/__init__.py:1442\u001b[0m, in \u001b[0;36m_preprocess_data..inner\u001b[0;34m(ax, data, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1439\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 1440\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minner\u001b[39m(ax, \u001b[38;5;241m*\u001b[39margs, data\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 1441\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m data \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1442\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43max\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mmap\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43msanitize_sequence\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1444\u001b[0m bound \u001b[38;5;241m=\u001b[39m new_sig\u001b[38;5;241m.\u001b[39mbind(ax, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 1445\u001b[0m auto_label \u001b[38;5;241m=\u001b[39m (bound\u001b[38;5;241m.\u001b[39marguments\u001b[38;5;241m.\u001b[39mget(label_namer)\n\u001b[1;32m 1446\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m bound\u001b[38;5;241m.\u001b[39mkwargs\u001b[38;5;241m.\u001b[39mget(label_namer))\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/axes/_axes.py:6220\u001b[0m, in \u001b[0;36mAxes.pcolormesh\u001b[0;34m(self, alpha, norm, cmap, vmin, vmax, shading, antialiased, *args, **kwargs)\u001b[0m\n\u001b[1;32m 6217\u001b[0m shading \u001b[38;5;241m=\u001b[39m shading\u001b[38;5;241m.\u001b[39mlower()\n\u001b[1;32m 6218\u001b[0m kwargs\u001b[38;5;241m.\u001b[39msetdefault(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnone\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m-> 6220\u001b[0m X, Y, C, shading \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_pcolorargs\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mpcolormesh\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6221\u001b[0m \u001b[43m \u001b[49m\u001b[43mshading\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mshading\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6222\u001b[0m coords \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mstack([X, Y], axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m 6223\u001b[0m \u001b[38;5;66;03m# convert to one dimensional array, except for 3D RGB(A) arrays\u001b[39;00m\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/axes/_axes.py:5751\u001b[0m, in \u001b[0;36mAxes._pcolorargs\u001b[0;34m(self, funcname, shading, *args, **kwargs)\u001b[0m\n\u001b[1;32m 5749\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m shading \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mflat\u001b[39m\u001b[38;5;124m'\u001b[39m:\n\u001b[1;32m 5750\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (Nx, Ny) \u001b[38;5;241m!=\u001b[39m (ncols \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m, nrows \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m):\n\u001b[0;32m-> 5751\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDimensions of C \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mC\u001b[38;5;241m.\u001b[39mshape\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m should\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 5752\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m be one smaller than X(\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mNx\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m) and Y(\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mNy\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 5753\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m while using shading=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mflat\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 5754\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m see help(\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfuncname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 5755\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m: \u001b[38;5;66;03m# ['nearest', 'gouraud']:\u001b[39;00m\n\u001b[1;32m 5756\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (Nx, Ny) \u001b[38;5;241m!=\u001b[39m (ncols, nrows):\n", + "\u001b[0;31mTypeError\u001b[0m: Dimensions of C (133, 3, 144) should be one smaller than X(144) and Y(3) while using shading='flat' see help(pcolormesh)" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "dc399e8005384c0e9f15675a731f43f7", + "version_major": 2, + "version_minor": 0 + }, + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA7YAAASwCAYAAADPBNYLAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABi0UlEQVR4nO3df3RV5Z3o/08gkKA1sUAJIIjRaqXlqkO4IrF8Ha3GQceWGeeKY5eog/c2Vy0DqV5FZvmD5axMO6tO6w9Qr6DjXWgz/hxmbkbNzFhFwRlJg+MIrb1CDWgiTRwT1DYI7O8fLjJNE5Rf5yQPvF5rnT/Ow7OTZ283cb/Z5+QUZFmWBQAAACRqUH8vAAAAAPaHsAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAnbHHvhhRfiggsuiLFjx0ZBQUE89dRTn7nN888/HxUVFVFcXBzHHnts3HPPPblfKAAAQKKEbY59+OGHcfLJJ8ddd921R/M3btwY5513XkyfPj2amprixhtvjLlz58bjjz+e45UCAACkqSDLsqy/F3GoKCgoiCeffDJmzpy52znXX399rFixItavX989Vl1dHa+++mqsXr06D6sEAABIS2F/L4CeVq9eHVVVVT3Gzj333Fi6dGl8/PHHMWTIkD636+rqiq6uru7nO3fujPfeey9GjBgRBQUFOV0zAAAc6rIsi61bt8bYsWNj0CAvjM03YTvAtLa2RllZWY+xsrKy2L59e7S1tcWYMWP63K62tjZuvfXWfCwRAADYjU2bNsW4ceP6exmHHGE7AP32HdZdrxb/tDuvCxYsiJqamu7nHR0dcfTRR8emTZuipKQkNwsFAAAiIqKzszPGjx8fRxxxRH8v5ZAkbAeY0aNHR2tra4+xLVu2RGFhYYwYMWK32xUVFUVRUVGv8ZKSEmELAAB54m2A/cOLvweYadOmRUNDQ4+xZ599NqZMmbLb99cCAAAcyoRtjn3wwQexdu3aWLt2bUR88nE+a9eujebm5oj45CXEs2fP7p5fXV0db731VtTU1MT69etj2bJlsXTp0rj22mv7Y/kAAAADnpci59iaNWvizDPP7H6+632wl112WTz44IPR0tLSHbkREeXl5VFfXx/z58+Pu+++O8aOHRt33HFHXHjhhXlfOwAAQAp8ju1BqrOzM0pLS6Ojo8N7bAEAIMdcf/cvL0UGAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbPNk8eLFUV5eHsXFxVFRURErV6781PnLly+Pk08+OQ477LAYM2ZMXHHFFdHe3p6n1QIAAKRD2OZBXV1dzJs3LxYuXBhNTU0xffr0mDFjRjQ3N/c5/8UXX4zZs2fHnDlz4vXXX49HH300XnnllbjyyivzvHIAAICBT9jmwe233x5z5syJK6+8MiZOnBg/+MEPYvz48bFkyZI+57/88stxzDHHxNy5c6O8vDy++tWvxre+9a1Ys2ZNnlcOAAAw8AnbHNu2bVs0NjZGVVVVj/GqqqpYtWpVn9tUVlbG5s2bo76+PrIsi3fffTcee+yxOP/883f7fbq6uqKzs7PHAwAA4FAgbHOsra0tduzYEWVlZT3Gy8rKorW1tc9tKisrY/ny5TFr1qwYOnRojB49Oo488si48847d/t9amtro7S0tPsxfvz4A7ofAAAAA5WwzZOCgoIez7Ms6zW2y7p162Lu3Llx0003RWNjYzz99NOxcePGqK6u3u3XX7BgQXR0dHQ/Nm3adEDXDwAAMFAV9vcCDnYjR46MwYMH97o7u2XLll53cXepra2N008/Pa677rqIiDjppJPi8MMPj+nTp8dtt90WY8aM6bVNUVFRFBUVHfgdAAAAGODcsc2xoUOHRkVFRTQ0NPQYb2hoiMrKyj63+eijj2LQoJ7/aQYPHhwRn9zpBQAA4D8J2zyoqamJ+++/P5YtWxbr16+P+fPnR3Nzc/dLixcsWBCzZ8/unn/BBRfEE088EUuWLIkNGzbESy+9FHPnzo1TTz01xo4d21+7AQAAMCB5KXIezJo1K9rb22PRokXR0tISkyZNivr6+pgwYUJERLS0tPT4TNvLL788tm7dGnfddVd85zvfiSOPPDLOOuus+O53v9tfuwAAADBgFWRe23pQ6uzsjNLS0ujo6IiSkpL+Xg4AABzUXH/3Ly9FBgAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmzzZPHixVFeXh7FxcVRUVERK1eu/NT5XV1dsXDhwpgwYUIUFRXFcccdF8uWLcvTagEAANJR2N8LOBTU1dXFvHnzYvHixXH66afHvffeGzNmzIh169bF0Ucf3ec2F110Ubz77ruxdOnS+OIXvxhbtmyJ7du353nlAAAAA19BlmVZfy/iYDd16tSYPHlyLFmypHts4sSJMXPmzKitre01/+mnn46LL744NmzYEMOHD9+n79nZ2RmlpaXR0dERJSUl+7x2AADgs7n+7l9eipxj27Zti8bGxqiqquoxXlVVFatWrepzmxUrVsSUKVPie9/7Xhx11FFxwgknxLXXXhu/+tWv8rFkAACApHgpco61tbXFjh07oqysrMd4WVlZtLa29rnNhg0b4sUXX4zi4uJ48skno62tLa666qp47733dvs+266urujq6up+3tnZeeB2AgAAYABzxzZPCgoKejzPsqzX2C47d+6MgoKCWL58eZx66qlx3nnnxe233x4PPvjgbu/a1tbWRmlpafdj/PjxB3wfAAAABiJhm2MjR46MwYMH97o7u2XLll53cXcZM2ZMHHXUUVFaWto9NnHixMiyLDZv3tznNgsWLIiOjo7ux6ZNmw7cTgAAAAxgwjbHhg4dGhUVFdHQ0NBjvKGhISorK/vc5vTTT4933nknPvjgg+6xN954IwYNGhTjxo3rc5uioqIoKSnp8QAAADgUCNs8qKmpifvvvz+WLVsW69evj/nz50dzc3NUV1dHxCd3W2fPnt09/5JLLokRI0bEFVdcEevWrYsXXnghrrvuuviTP/mTGDZsWH/tBgAAwIDkl0flwaxZs6K9vT0WLVoULS0tMWnSpKivr48JEyZERERLS0s0Nzd3z//c5z4XDQ0N8e1vfzumTJkSI0aMiIsuuihuu+22/toFAACAAcvn2B6kfI4WAADkj+vv/uWlyAAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2ebJ48eIoLy+P4uLiqKioiJUrV+7Rdi+99FIUFhbGKaecktsFAgAAJErY5kFdXV3MmzcvFi5cGE1NTTF9+vSYMWNGNDc3f+p2HR0dMXv27Pja176Wp5UCAACkpyDLsqy/F3Gwmzp1akyePDmWLFnSPTZx4sSYOXNm1NbW7na7iy++OI4//vgYPHhwPPXUU7F27do9/p6dnZ1RWloaHR0dUVJSsj/LBwAAPoPr7/7ljm2Obdu2LRobG6OqqqrHeFVVVaxatWq32z3wwAPx5ptvxs0337xH36erqys6Ozt7PAAAAA4FwjbH2traYseOHVFWVtZjvKysLFpbW/vc5uc//3nccMMNsXz58igsLNyj71NbWxulpaXdj/Hjx+/32gEAAFIgbPOkoKCgx/Msy3qNRUTs2LEjLrnkkrj11lvjhBNO2OOvv2DBgujo6Oh+bNq0ab/XDAAAkII9ux3IPhs5cmQMHjy4193ZLVu29LqLGxGxdevWWLNmTTQ1NcU111wTERE7d+6MLMuisLAwnn322TjrrLN6bVdUVBRFRUW52QkAAIABzB3bHBs6dGhUVFREQ0NDj/GGhoaorKzsNb+kpCRee+21WLt2bfejuro6vvSlL8XatWtj6tSp+Vo6AABAEtyxzYOampq49NJLY8qUKTFt2rS47777orm5OaqrqyPik5cRv/322/HQQw/FoEGDYtKkST22HzVqVBQXF/caBwAAQNjmxaxZs6K9vT0WLVoULS0tMWnSpKivr48JEyZERERLS8tnfqYtAAAAffM5tgcpn6MFAAD54/q7f3mPLQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGGbJ4sXL47y8vIoLi6OioqKWLly5W7nPvHEE3HOOefEF77whSgpKYlp06bFM888k8fVAgAApEPY5kFdXV3MmzcvFi5cGE1NTTF9+vSYMWNGNDc39zn/hRdeiHPOOSfq6+ujsbExzjzzzLjggguiqakpzysHAAAY+AqyLMv6exEHu6lTp8bkyZNjyZIl3WMTJ06MmTNnRm1t7R59ja985Ssxa9asuOmmm/ZofmdnZ5SWlkZHR0eUlJTs07oBAIA94/q7f7ljm2Pbtm2LxsbGqKqq6jFeVVUVq1at2qOvsXPnzti6dWsMHz58t3O6urqis7OzxwMAAOBQIGxzrK2tLXbs2BFlZWU9xsvKyqK1tXWPvsb3v//9+PDDD+Oiiy7a7Zza2tooLS3tfowfP36/1g0AAJAKYZsnBQUFPZ5nWdZrrC+PPPJI3HLLLVFXVxejRo3a7bwFCxZER0dH92PTpk37vWYAAIAUFPb3Ag52I0eOjMGDB/e6O7tly5Zed3F/W11dXcyZMyceffTROPvssz91blFRURQVFe33egEAAFLjjm2ODR06NCoqKqKhoaHHeENDQ1RWVu52u0ceeSQuv/zyePjhh+P888/P9TIBAACS5Y5tHtTU1MSll14aU6ZMiWnTpsV9990Xzc3NUV1dHRGfvIz47bffjoceeigiPona2bNnxw9/+MM47bTTuu/2Dhs2LEpLS/ttPwAAAAYiYZsHs2bNivb29li0aFG0tLTEpEmTor6+PiZMmBARES0tLT0+0/bee++N7du3x9VXXx1XX3119/hll10WDz74YL6XDwAAMKD5HNuDlM/RAgCA/HH93b+8xxYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwzZPFixdHeXl5FBcXR0VFRaxcufJT5z///PNRUVERxcXFceyxx8Y999yTp5UCAACkRdjmQV1dXcybNy8WLlwYTU1NMX369JgxY0Y0Nzf3OX/jxo1x3nnnxfTp06OpqSluvPHGmDt3bjz++ON5XjkAAMDAV5BlWdbfizjYTZ06NSZPnhxLlizpHps4cWLMnDkzamtre82//vrrY8WKFbF+/fruserq6nj11Vdj9erVe/Q9Ozs7o7S0NDo6OqKkpGT/dwIAANgt19/9yx3bHNu2bVs0NjZGVVVVj/GqqqpYtWpVn9usXr261/xzzz031qxZEx9//HHO1goAAJCiwv5ewMGura0tduzYEWVlZT3Gy8rKorW1tc9tWltb+5y/ffv2aGtrizFjxvTapqurK7q6urqfd3R0RMQn/3IEAADk1q7rbi+I7R/CNk8KCgp6PM+yrNfYZ83va3yX2trauPXWW3uNjx8/fm+XCgAA7KP29vYoLS3t72UccoRtjo0cOTIGDx7c6+7sli1bet2V3WX06NF9zi8sLIwRI0b0uc2CBQuipqam+/n7778fEyZMiObmZn+xPkVnZ2eMHz8+Nm3a5L0Qu+EY7RnH6bM5RnvGcdozjtNnc4z2jOP02RyjPdPR0RFHH310DB8+vL+XckgStjk2dOjQqKioiIaGhviDP/iD7vGGhob4xje+0ec206ZNi7/7u7/rMfbss8/GlClTYsiQIX1uU1RUFEVFRb3GS0tL/QDaAyUlJY7TZ3CM9ozj9Nkcoz3jOO0Zx+mzOUZ7xnH6bI7Rnhk0yK8x6g+Oeh7U1NTE/fffH8uWLYv169fH/Pnzo7m5OaqrqyPik7uts2fP7p5fXV0db731VtTU1MT69etj2bJlsXTp0rj22mv7axcAAAAGLHds82DWrFnR3t4eixYtipaWlpg0aVLU19fHhAkTIiKipaWlx2falpeXR319fcyfPz/uvvvuGDt2bNxxxx1x4YUX9tcuAAAADFjCNk+uuuqquOqqq/r8swcffLDX2BlnnBE/+clP9vn7FRUVxc0339zny5P5T47TZ3OM9ozj9Nkcoz3jOO0Zx+mzOUZ7xnH6bI7RnnGc+ldB5vdRAwAAkDDvsQUAACBpwhYAAICkCVsAAACSJmwHqNra2viv//W/xhFHHBGjRo2KmTNnxs9+9rMec7Isi1tuuSXGjh0bw4YNi9/93d+N119/vcecrq6u+Pa3vx0jR46Mww8/PL7+9a/H5s2be8z5j//4j7j00kujtLQ0SktL49JLL433338/17t4QOTzOP35n/95VFZWxmGHHRZHHnlkrnftgMnXMfrFL34Rc+bMifLy8hg2bFgcd9xxcfPNN8e2bdvysp/7K5/n0te//vU4+uijo7i4OMaMGROXXnppvPPOOznfx/2Vz2P0m3NPOeWUKCgoiLVr1+Zq1w6ofB6nY445JgoKCno8brjhhpzv44GQ7/Pp//7f/xtTp06NYcOGxciRI+MP//APc7p/B0K+jtGPf/zjXufRrscrr7ySl33dH/k8l9544434xje+ESNHjoySkpI4/fTT47nnnsv5Ph4I+TxOP/nJT+Kcc86JI488MkaMGBH/43/8j/jggw9yvo/760Ado/vuuy9+93d/N0pKSqKgoKDP6+qUr78HrIwB6dxzz80eeOCB7N///d+ztWvXZueff3529NFHZx988EH3nL/4i7/IjjjiiOzxxx/PXnvttWzWrFnZmDFjss7Ozu451dXV2VFHHZU1NDRkP/nJT7IzzzwzO/nkk7Pt27d3z/m93/u9bNKkSdmqVauyVatWZZMmTcp+//d/P6/7u6/yeZxuuumm7Pbbb89qamqy0tLSfO7mfsnXMfqHf/iH7PLLL8+eeeaZ7M0338z+9m//Nhs1alT2ne98J+/7vC/yeS7dfvvt2erVq7Nf/OIX2UsvvZRNmzYtmzZtWl73d1/k8xjtMnfu3GzGjBlZRGRNTU352M39ls/jNGHChGzRokVZS0tL92Pr1q153d99lc/j9Nhjj2Wf//znsyVLlmQ/+9nPsp/+9KfZo48+mtf93Rf5OkZdXV09zqGWlpbsyiuvzI455phs586ded/vvZXPc+mLX/xidt5552Wvvvpq9sYbb2RXXXVVdthhh2UtLS153ed9ka/j9Pbbb2ef//zns+rq6uynP/1p9q//+q9ZZWVlduGFF+Z9n/fWgTpGf/VXf5XV1tZmtbW1WURk//Ef/9Hre6V8/T1QCdtEbNmyJYuI7Pnnn8+yLMt27tyZjR49OvuLv/iL7jm//vWvs9LS0uyee+7JsizL3n///WzIkCHZj370o+45b7/9djZo0KDs6aefzrIsy9atW5dFRPbyyy93z1m9enUWEdlPf/rTfOzaAZWr4/SbHnjggaTC9rfl4xjt8r3vfS8rLy/P0Z7kVj6P09/+7d9mBQUF2bZt23K0N7mR62NUX1+fnXjiidnrr7+eVNj+tlwepwkTJmR/9Vd/lZ8dybFcHaePP/44O+qoo7L7778/j3uTG/n6ubRt27Zs1KhR2aJFi3K4N7mTq+P0y1/+MouI7IUXXuie09nZmUVE9o//+I/52LUDKlfH6d57781GjRqV7dixo3tOU1NTFhHZz3/+83zs2gGzL8foNz333HN9hu3Bdv09UHgpciI6OjoiImL48OEREbFx48ZobW2Nqqqq7jlFRUVxxhlnxKpVqyIiorGxMT7++OMec8aOHRuTJk3qnrN69eooLS2NqVOnds857bTTorS0tHtOSnJ1nA4m+TxGHR0d3d8nNfk6Tu+9914sX748KisrY8iQIbnanZzI5TF6991347//9/8e/+f//J847LDD8rE7OZPrc+m73/1ujBgxIk455ZT48z//82Re/v/bcnWcfvKTn8Tbb78dgwYNit/5nd+JMWPGxIwZM3q9dDAF+fq5tGLFimhra4vLL788R3uSW7k6TiNGjIiJEyfGQw89FB9++GFs37497r333igrK4uKiop87d4Bk6vj1NXVFUOHDo1Bg/4zM4YNGxYRES+++GJud+oA25djtCcOtuvvgULYJiDLsqipqYmvfvWrMWnSpIiIaG1tjYiIsrKyHnPLysq6/6y1tTWGDh0an//85z91zqhRo3p9z1GjRnXPSUUuj9PBIp/H6M0334w777wzqqurD/Ru5Fw+jtP1118fhx9+eIwYMSKam5vjb//2b3O1OzmRy2OUZVlcfvnlUV1dHVOmTMn1ruRUrs+lP/3TP40f/ehH8dxzz8U111wTP/jBD+Kqq67K5S7lRC6P04YNGyIi4pZbbok/+7M/i7//+7+Pz3/+83HGGWfEe++9l9P9OpDy+fN76dKlce6558b48eMP9G7kXC6PU0FBQTQ0NERTU1McccQRUVxcHH/1V38VTz/9dFK/eyMit8fprLPOitbW1vjLv/zL2LZtW/zHf/xH3HjjjRER0dLSktP9OpD29RjtiYPp+nsgEbYJuOaaa+Lf/u3f4pFHHun1ZwUFBT2eZ1nWa+y3/facvubvydcZaHJ9nA4G+TpG77zzTvze7/1e/Lf/9t/iyiuv3L9F94N8HKfrrrsumpqa4tlnn43BgwfH7NmzI8uy/V98nuTyGN15553R2dkZCxYsOHAL7ie5Ppfmz58fZ5xxRpx00klx5ZVXxj333BNLly6N9vb2A7MDeZLL47Rz586IiFi4cGFceOGFUVFREQ888EAUFBTEo48+eoD2IPfy9fN78+bN8cwzz8ScOXP2b8H9JJfHKcuyuOqqq2LUqFGxcuXK+Nd//df4xje+Eb//+7+fVLBF5PY4feUrX4m//uu/ju9///tx2GGHxejRo+PYY4+NsrKyGDx48IHbiRw70Mfos77Gvn4d/pOwHeC+/e1vx4oVK+K5556LcePGdY+PHj06IqLXv+ps2bKl+1+RRo8e3f0vZZ8259133+31fX/5y1/2+teogSzXx+lgkK9j9M4778SZZ54Z06ZNi/vuuy8Xu5JT+TpOI0eOjBNOOCHOOeec+NGPfhT19fXx8ssv52KXDrhcH6N//ud/jpdffjmKioqisLAwvvjFL0ZExJQpU+Kyyy7L2X4daP3xc+m0006LiIj/9//+3wHZh3zI9XEaM2ZMRER8+ctf7v7zoqKiOPbYY6O5ufnA71AO5PNceuCBB2LEiBHx9a9//UDvRs7l42fT3//938ePfvSjOP3002Py5MmxePHiGDZsWPz1X/91LnftgMrH+XTJJZdEa2trvP3229He3h633HJL/PKXv4zy8vJc7dYBtT/HaE8cLNffA05O38HLPtu5c2d29dVXZ2PHjs3eeOONPv989OjR2Xe/+93usa6urj7f4F9XV9c955133unzl0f9y7/8S/ecl19+OZk3r+frOP2m1H55VD6P0ebNm7Pjjz8+u/jii/v8DbcDWX+cS7s0NzdnEZE999xzB26HciBfx+itt97KXnvtte7HM888k0VE9thjj2WbNm3K8V7uv/48l/7u7/4ui4jsrbfeOoB7lBv5Ok4dHR1ZUVFRj18eteuXI91777252r0DIt/n0s6dO7Py8vJkfpv9Lvk6TitWrMgGDRrU6zePn3DCCdmf//mf52LXDqj+/Nm0dOnS7LDDDuvztwMPJAfiGP2mz/rlUalefw9UwnaA+p//839mpaWl2Y9//OMev37/o48+6p7zF3/xF1lpaWn2xBNPZK+99lr2x3/8x33+SvZx48Zl//iP/5j95Cc/yc4666w+P+7npJNOylavXp2tXr06+y//5b8k8+vG83mc3nrrraypqSm79dZbs8997nNZU1NT1tTUNOA/WiNfx+jtt9/OvvjFL2ZnnXVWtnnz5h7fKwX5Ok7/8i//kt15551ZU1NT9otf/CL753/+5+yrX/1qdtxxx2W//vWv877feyOff99+08aNG5P6rcj5Ok6rVq3Kbr/99qypqSnbsGFDVldXl40dOzb7+te/nvd93hf5PJ/+9E//NDvqqKOyZ555JvvpT3+azZkzJxs1alT23nvv5XWf91a+/8794z/+YxYR2bp16/K2jwdCvo7TL3/5y2zEiBHZH/7hH2Zr167Nfvazn2XXXnttNmTIkGzt2rV53++9lc/z6c4778waGxuzn/3sZ9ldd92VDRs2LPvhD3+Y1/3dFwfqGLW0tGRNTU3Z//7f/7v7N2k3NTVl7e3t3XNSvv4eqITtABURfT4eeOCB7jk7d+7Mbr755mz06NFZUVFR9v/9f/9f9tprr/X4Or/61a+ya665Jhs+fHg2bNiw7Pd///ez5ubmHnPa29uzb37zm9kRRxyRHXHEEdk3v/nNAf8varvk8zhddtllfX6vgX6XLV/H6IEHHtjt90pBvo7Tv/3bv2VnnnlmNnz48KyoqCg75phjsurq6mzz5s352tV9ls+/b78ptbDN13FqbGzMpk6dmpWWlmbFxcXZl770pezmm2/OPvzww3zt6n7J5/m0bdu27Dvf+U42atSo7IgjjsjOPvvs7N///d/zsZv7Jd9/5/74j/84q6yszPVuHXD5PE6vvPJKVlVVlQ0fPjw74ogjstNOOy2rr6/Px27ut3wep0svvTQbPnx4NnTo0Oykk07KHnrooXzs4n47UMfo5ptv/syvk/L190BVkGUJ/bYSAAAA+C1+eRQAAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2OfbCCy/EBRdcEGPHjo2CgoJ46qmnPnOb559/PioqKqK4uDiOPfbYuOeee3K/UAAAgEQJ2xz78MMP4+STT4677rprj+Zv3LgxzjvvvJg+fXo0NTXFjTfeGHPnzo3HH388xysFAABIU0GWZVl/L+JQUVBQEE8++WTMnDlzt3Ouv/76WLFiRaxfv757rLq6Ol599dVYvXp1HlYJAACQlsL+XgA9rV69OqqqqnqMnXvuubF06dL4+OOPY8iQIX1u19XVFV1dXd3Pd+7cGe+9916MGDEiCgoKcrpmAAA41GVZFlu3bo2xY8fGoEFeGJtvwnaAaW1tjbKysh5jZWVlsX379mhra4sxY8b0uV1tbW3ceuut+VgiAACwG5s2bYpx48b19zIOOcJ2APrtO6y7Xi3+aXdeFyxYEDU1Nd3POzo64uijj45NmzZFSUlJbhYKAABERERnZ2eMHz8+jjjiiP5eyiFJ2A4wo0ePjtbW1h5jW7ZsicLCwhgxYsRutysqKoqioqJe4yUlJcIWAADyxNsA+4cXfw8w06ZNi4aGhh5jzz77bEyZMmW3768FAAA4lAnbHPvggw9i7dq1sXbt2oj45ON81q5dG83NzRHxyUuIZ8+e3T2/uro63nrrraipqYn169fHsmXLYunSpXHttdf2x/IBAAAGPC9FzrE1a9bEmWee2f181/tgL7vssnjwwQejpaWlO3IjIsrLy6O+vj7mz58fd999d4wdOzbuuOOOuPDCC/O+dgAAgBT4HNuDVGdnZ5SWlkZHR4f32AIAQI65/u5fXooMAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2ObJ4sWLo7y8PIqLi6OioiJWrlz5qfOXL18eJ598chx22GExZsyYuOKKK6K9vT1PqwUAAEiHsM2Durq6mDdvXixcuDCamppi+vTpMWPGjGhubu5z/osvvhizZ8+OOXPmxOuvvx6PPvpovPLKK3HllVfmeeUAAAADn7DNg9tvvz3mzJkTV155ZUycODF+8IMfxPjx42PJkiV9zn/55ZfjmGOOiblz50Z5eXl89atfjW9961uxZs2aPK8cAABg4BO2ObZt27ZobGyMqqqqHuNVVVWxatWqPreprKyMzZs3R319fWRZFu+++2489thjcf755+/2+3R1dUVnZ2ePBwAAwKFA2OZYW1tb7NixI8rKynqMl5WVRWtra5/bVFZWxvLly2PWrFkxdOjQGD16dBx55JFx55137vb71NbWRmlpafdj/PjxB3Q/AAAABiphmycFBQU9nmdZ1mtsl3Xr1sXcuXPjpptuisbGxnj66adj48aNUV1dvduvv2DBgujo6Oh+bNq06YCuHwAAYKAq7O8FHOxGjhwZgwcP7nV3dsuWLb3u4u5SW1sbp59+elx33XUREXHSSSfF4YcfHtOnT4/bbrstxowZ02uboqKiKCoqOvA7AAAAMMC5Y5tjQ4cOjYqKimhoaOgx3tDQEJWVlX1u89FHH8WgQT3/0wwePDgiPrnTCwAAwH8StnlQU1MT999/fyxbtizWr18f8+fPj+bm5u6XFi9YsCBmz57dPf+CCy6IJ554IpYsWRIbNmyIl156KebOnRunnnpqjB07tr92AwAAYEDyUuQ8mDVrVrS3t8eiRYuipaUlJk2aFPX19TFhwoSIiGhpaenxmbaXX355bN26Ne666674zne+E0ceeWScddZZ8d3vfre/dgEAAGDAKsi8tvWg1NnZGaWlpdHR0RElJSX9vRwAADiouf7uX16KDAAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdjmyeLFi6O8vDyKi4ujoqIiVq5c+anzu7q6YuHChTFhwoQoKiqK4447LpYtW5an1QIAAKSjsL8XcCioq6uLefPmxeLFi+P000+Pe++9N2bMmBHr1q2Lo48+us9tLrroonj33Xdj6dKl8cUvfjG2bNkS27dvz/PKAQAABr6CLMuy/l7EwW7q1KkxefLkWLJkSffYxIkTY+bMmVFbW9tr/tNPPx0XX3xxbNiwIYYPH75P37OzszNKS0ujo6MjSkpK9nntAADAZ3P93b+8FDnHtm3bFo2NjVFVVdVjvKqqKlatWtXnNitWrIgpU6bE9773vTjqqKPihBNOiGuvvTZ+9atf5WPJAAAASfFS5Bxra2uLHTt2RFlZWY/xsrKyaG1t7XObDRs2xIsvvhjFxcXx5JNPRltbW1x11VXx3nvv7fZ9tl1dXdHV1dX9vLOz88DtBAAAwADmjm2eFBQU9HieZVmvsV127twZBQUFsXz58jj11FPjvPPOi9tvvz0efPDB3d61ra2tjdLS0u7H+PHjD/g+AAAADETCNsdGjhwZgwcP7nV3dsuWLb3u4u4yZsyYOOqoo6K0tLR7bOLEiZFlWWzevLnPbRYsWBAdHR3dj02bNh24nQAAABjAhG2ODR06NCoqKqKhoaHHeENDQ1RWVva5zemnnx7vvPNOfPDBB91jb7zxRgwaNCjGjRvX5zZFRUVRUlLS4wEAAHAoELZ5UFNTE/fff38sW7Ys1q9fH/Pnz4/m5uaorq6OiE/uts6ePbt7/iWXXBIjRoyIK664ItatWxcvvPBCXHfddfEnf/InMWzYsP7aDQAAgAHJL4/Kg1mzZkV7e3ssWrQoWlpaYtKkSVFfXx8TJkyIiIiWlpZobm7unv+5z30uGhoa4tvf/nZMmTIlRowYERdddFHcdttt/bULAAAAA5bPsT1I+RwtAADIH9ff/ctLkQEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZs82Tx4sVRXl4excXFUVFREStXrtyj7V566aUoLCyMU045JbcLBAAASJSwzYO6urqYN29eLFy4MJqammL69OkxY8aMaG5u/tTtOjo6Yvbs2fG1r30tTysFAABIT0GWZVl/L+JgN3Xq1Jg8eXIsWbKke2zixIkxc+bMqK2t3e12F198cRx//PExePDgeOqpp2Lt2rV7/D07OzujtLQ0Ojo6oqSkZH+WDwAAfAbX3/3LHdsc27ZtWzQ2NkZVVVWP8aqqqli1atVut3vggQfizTffjJtvvnmPvk9XV1d0dnb2eAAAABwKhG2OtbW1xY4dO6KsrKzHeFlZWbS2tva5zc9//vO44YYbYvny5VFYWLhH36e2tjZKS0u7H+PHj9/vtQMAAKRA2OZJQUFBj+dZlvUai4jYsWNHXHLJJXHrrbfGCSecsMdff8GCBdHR0dH92LRp036vGQAAIAV7djuQfTZy5MgYPHhwr7uzW7Zs6XUXNyJi69atsWbNmmhqaoprrrkmIiJ27twZWZZFYWFhPPvss3HWWWf12q6oqCiKiopysxMAAAADmDu2OTZ06NCoqKiIhoaGHuMNDQ1RWVnZa35JSUm89tprsXbt2u5HdXV1fOlLX4q1a9fG1KlT87V0AACAJLhjmwc1NTVx6aWXxpQpU2LatGlx3333RXNzc1RXV0fEJy8jfvvtt+Ohhx6KQYMGxaRJk3psP2rUqCguLu41DgAAgLDNi1mzZkV7e3ssWrQoWlpaYtKkSVFfXx8TJkyIiIiWlpbP/ExbAAAA+uZzbA9SPkcLAADyx/V3//IeWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacI2TxYvXhzl5eVRXFwcFRUVsXLlyt3OfeKJJ+Kcc86JL3zhC1FSUhLTpk2LZ555Jo+rBQAASIewzYO6urqYN29eLFy4MJqammL69OkxY8aMaG5u7nP+Cy+8EOecc07U19dHY2NjnHnmmXHBBRdEU1NTnlcOAAAw8BVkWZb19yIOdlOnTo3JkyfHkiVLuscmTpwYM2fOjNra2j36Gl/5yldi1qxZcdNNN+3R/M7OzigtLY2Ojo4oKSnZp3UDAAB7xvV3/3LHNse2bdsWjY2NUVVV1WO8qqoqVq1atUdfY+fOnbF169YYPnx4LpYIAACQtML+XsDBrq2tLXbs2BFlZWU9xsvKyqK1tXWPvsb3v//9+PDDD+Oiiy7a7Zyurq7o6urqft7Z2blvCwYAAEiMO7Z5UlBQ0ON5lmW9xvryyCOPxC233BJ1dXUxatSo3c6rra2N0tLS7sf48eP3e80AAAApELY5NnLkyBg8eHCvu7NbtmzpdRf3t9XV1cWcOXPib/7mb+Lss8/+1LkLFiyIjo6O7semTZv2e+0AAAApELY5NnTo0KioqIiGhoYe4w0NDVFZWbnb7R555JG4/PLL4+GHH47zzz//M79PUVFRlJSU9HgAAAAcCrzHNg9qamri0ksvjSlTpsS0adPivvvui+bm5qiuro6IT+62vv322/HQQw9FxCdRO3v27PjhD38Yp512Wvfd3mHDhkVpaWm/7QcAAMBAJGzzYNasWdHe3h6LFi2KlpaWmDRpUtTX18eECRMiIqKlpaXHZ9ree++9sX379rj66qvj6quv7h6/7LLL4sEHH8z38gEAAAY0n2N7kPI5WgAAkD+uv/uX99gCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtnmyePHiKC8vj+Li4qioqIiVK1d+6vznn38+Kioqori4OI499ti455578rRSAACAtAjbPKirq4t58+bFwoULo6mpKaZPnx4zZsyI5ubmPudv3LgxzjvvvJg+fXo0NTXFjTfeGHPnzo3HH388zysHAAAY+AqyLMv6exEHu6lTp8bkyZNjyZIl3WMTJ06MmTNnRm1tba/5119/faxYsSLWr1/fPVZdXR2vvvpqrF69eo++Z2dnZ5SWlkZHR0eUlJTs/04AAAC75fq7f7ljm2Pbtm2LxsbGqKqq6jFeVVUVq1at6nOb1atX95p/7rnnxpo1a+Ljjz/O2VoBAABSVNjfCzjYtbW1xY4dO6KsrKzHeFlZWbS2tva5TWtra5/zt2/fHm1tbTFmzJhe23R1dUVXV1f3846Ojoj45F+OAACA3Np13e0Fsf1D2OZJQUFBj+dZlvUa+6z5fY3vUltbG7feemuv8fHjx+/tUgEAgH3U3t4epaWl/b2MQ46wzbGRI0fG4MGDe92d3bJlS6+7sruMHj26z/mFhYUxYsSIPrdZsGBB1NTUdD9///33Y8KECdHc3OwvFvuls7Mzxo8fH5s2bfJ+EfaLc4kDyfnEgeJc4kDp6OiIo48+OoYPH97fSzkkCdscGzp0aFRUVERDQ0P8wR/8Qfd4Q0NDfOMb3+hzm2nTpsXf/d3f9Rh79tlnY8qUKTFkyJA+tykqKoqioqJe46WlpX5Ic0CUlJQ4lzggnEscSM4nDhTnEgfKoEF+jVF/cNTzoKamJu6///5YtmxZrF+/PubPnx/Nzc1RXV0dEZ/cbZ09e3b3/Orq6njrrbeipqYm1q9fH8uWLYulS5fGtdde21+7AAAAMGC5Y5sHs2bNivb29li0aFG0tLTEpEmTor6+PiZMmBARES0tLT0+07a8vDzq6+tj/vz5cffdd8fYsWPjjjvuiAsvvLC/dgEAAGDAErZ5ctVVV8VVV13V5589+OCDvcbOOOOM+MlPfrLP36+oqChuvvnmPl+eDHvDucSB4lziQHI+caA4lzhQnEv9qyDz+6gBAABImPfYAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2CZs8eLFUV5eHsXFxVFRURErV6781PnPP/98VFRURHFxcRx77LFxzz335GmlDHR7cy498cQTcc4558QXvvCFKCkpiWnTpsUzzzyTx9UykO3tz6VdXnrppSgsLIxTTjkltwskGXt7LnV1dcXChQtjwoQJUVRUFMcdd1wsW7YsT6tloNvb82n58uVx8sknx2GHHRZjxoyJK664Itrb2/O0WgaqF154IS644IIYO3ZsFBQUxFNPPfWZ27j+zh9hm6i6urqYN29eLFy4MJqammL69OkxY8aMHp+H+5s2btwY5513XkyfPj2amprixhtvjLlz58bjjz+e55Uz0OztufTCCy/EOeecE/X19dHY2BhnnnlmXHDBBdHU1JTnlTPQ7O25tEtHR0fMnj07vva1r+VppQx0+3IuXXTRRfFP//RPsXTp0vjZz34WjzzySJx44ol5XDUD1d6eTy+++GLMnj075syZE6+//no8+uij8corr8SVV16Z55Uz0Hz44Ydx8sknx1133bVH811/51lGkk499dSsurq6x9iJJ56Y3XDDDX3O/1//639lJ554Yo+xb33rW9lpp52WszWShr09l/ry5S9/Obv11lsP9NJIzL6eS7Nmzcr+7M/+LLv55puzk08+OYcrJBV7ey79wz/8Q1ZaWpq1t7fnY3kkZm/Pp7/8y7/Mjj322B5jd9xxRzZu3LicrZH0RET25JNPfuoc19/55Y5tgrZt2xaNjY1RVVXVY7yqqipWrVrV5zarV6/uNf/cc8+NNWvWxMcff5yztTKw7cu59Nt27twZW7dujeHDh+diiSRiX8+lBx54IN588824+eabc71EErEv59KKFStiypQp8b3vfS+OOuqoOOGEE+Laa6+NX/3qV/lYMgPYvpxPlZWVsXnz5qivr48sy+Ldd9+Nxx57LM4///x8LJmDiOvv/Crs7wWw99ra2mLHjh1RVlbWY7ysrCxaW1v73Ka1tbXP+du3b4+2trYYM2ZMztbLwLUv59Jv+/73vx8ffvhhXHTRRblYIonYl3Pp5z//edxwww2xcuXKKCz0vyM+sS/n0oYNG+LFF1+M4uLiePLJJ6OtrS2uuuqqeO+997zP9hC3L+dTZWVlLF++PGbNmhW//vWvY/v27fH1r3897rzzznwsmYOI6+/8csc2YQUFBT2eZ1nWa+yz5vc1zqFnb8+lXR555JG45ZZboq6uLkaNGpWr5ZGQPT2XduzYEZdccknceuutccIJJ+RreSRkb34u7dy5MwoKCmL58uVx6qmnxnnnnRe33357PPjgg+7aEhF7dz6tW7cu5s6dGzfddFM0NjbG008/HRs3bozq6up8LJWDjOvv/PFP5AkaOXJkDB48uNe/NG7ZsqXXvwrtMnr06D7nFxYWxogRI3K2Vga2fTmXdqmrq4s5c+bEo48+GmeffXYul0kC9vZc2rp1a6xZsyaamprimmuuiYhP4iTLsigsLIxnn302zjrrrLysnYFlX34ujRkzJo466qgoLS3tHps4cWJkWRabN2+O448/PqdrZuDal/OptrY2Tj/99LjuuusiIuKkk06Kww8/PKZPnx633Xabu2zsMdff+eWObYKGDh0aFRUV0dDQ0GO8oaEhKisr+9xm2rRpveY/++yzMWXKlBgyZEjO1srAti/nUsQnd2ovv/zyePjhh73niIjY+3OppKQkXnvttVi7dm33o7q6Or70pS/F2rVrY+rUqflaOgPMvvxcOv300+Odd96JDz74oHvsjTfeiEGDBsW4ceNyul4Gtn05nz766KMYNKjnJfLgwYMj4j/vtsGecP2dZ/30S6vYTz/60Y+yIUOGZEuXLs3WrVuXzZs3Lzv88MOzX/ziF1mWZdkNN9yQXXrppd3zN2zYkB122GHZ/Pnzs3Xr1mVLly7NhgwZkj322GP9tQsMEHt7Lj388MNZYWFhdvfdd2ctLS3dj/fff7+/doEBYm/Ppd/mtyKzy96eS1u3bs3GjRuX/dEf/VH2+uuvZ88//3x2/PHHZ1deeWV/7QIDyN6eTw888EBWWFiYLV68OHvzzTezF198MZsyZUp26qmn9tcuMEBs3bo1a2pqypqamrKIyG6//fasqakpe+utt7Isc/3d34Rtwu6+++5swoQJ2dChQ7PJkydnzz//fPefXXbZZdkZZ5zRY/6Pf/zj7Hd+53eyoUOHZsccc0y2ZMmSPK+YgWpvzqUzzjgji4hej8suuyz/C2fA2dufS79J2PKb9vZcWr9+fXb22Wdnw4YNy8aNG5fV1NRkH330UZ5XzUC1t+fTHXfckX35y1/Ohg0blo0ZMyb75je/mW3evDnPq2agee655z71Gsj1d/8qyDKvqQAAACBd3mMLAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhm2MvvPBCXHDBBTF27NgoKCiIp5566jO3ef7556OioiKKi4vj2GOPjXvuuSf3CwUAAEiUsM2xDz/8ME4++eS466679mj+xo0b47zzzovp06dHU1NT3HjjjTF37tx4/PHHc7xSAACANBVkWZb19yIOFQUFBfHkk0/GzJkzdzvn+uuvjxUrVsT69eu7x6qrq+PVV1+N1atX52GVAAAAaSns7wXQ0+rVq6OqqqrH2LnnnhtLly6Njz/+OIYMGdLndl1dXdHV1dX9fOfOnfHee+/FiBEjoqCgIKdrBgCAQ12WZbF169YYO3ZsDBrkhbH5JmwHmNbW1igrK+sxVlZWFtu3b4+2trYYM2ZMn9vV1tbGrbfemo8lAgAAu7Fp06YYN25cfy/jkCNsB6DfvsO669Xin3bndcGCBVFTU9P9vKOjI44++ujYtGlTlJSU5GahAABARER0dnbG+PHj44gjjujvpRyShO0AM3r06Ghtbe0xtmXLligsLIwRI0bsdruioqIoKirqNV5SUiJsAQAgT7wNsH948fcAM23atGhoaOgx9uyzz8aUKVN2+/5aAACAQ5mwzbEPPvgg1q5dG2vXro2ITz7OZ+3atdHc3BwRn7yEePbs2d3zq6ur46233oqamppYv359LFu2LJYuXRrXXnttfywfAABgwPNS5Bxbs2ZNnHnmmd3Pd70P9rLLLosHH3wwWlpauiM3IqK8vDzq6+tj/vz5cffdd8fYsWPjjjvuiAsvvDDvawcAAEiBz7E9SHV2dkZpaWl0dHR4jy0AAOSY6+/+5aXIAAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShG2eLF68OMrLy6O4uDgqKipi5cqVnzp/+fLlcfLJJ8dhhx0WY8aMiSuuuCLa29vztFoAAIB0CNs8qKuri3nz5sXChQujqakppk+fHjNmzIjm5uY+57/44osxe/bsmDNnTrz++uvx6KOPxiuvvBJXXnllnlcOAAAw8AnbPLj99ttjzpw5ceWVV8bEiRPjBz/4QYwfPz6WLFnS5/yXX345jjnmmJg7d26Ul5fHV7/61fjWt74Va9asyfPKAQAABj5hm2Pbtm2LxsbGqKqq6jFeVVUVq1at6nObysrK2Lx5c9TX10eWZfHuu+/GY489Fueff34+lgwAAJAUYZtjbW1tsWPHjigrK+sxXlZWFq2trX1uU1lZGcuXL49Zs2bF0KFDY/To0XHkkUfGnXfeudvv09XVFZ2dnT0eAAAAhwJhmycFBQU9nmdZ1mtsl3Xr1sXcuXPjpptuisbGxnj66adj48aNUV1dvduvX1tbG6Wlpd2P8ePHH9D1AwAADFQFWZZl/b2Ig9m2bdvisMMOi0cffTT+4A/+oHv8T//0T2Pt2rXx/PPP99rm0ksvjV//+tfx6KOPdo+9+OKLMX369HjnnXdizJgxvbbp6uqKrq6u7uednZ0xfvz46OjoiJKSkgO8VwAAwG/q7OyM0tJS19/9xB3bHBs6dGhUVFREQ0NDj/GGhoaorKzsc5uPPvooBg3q+Z9m8ODBEfHJnd6+FBUVRUlJSY8HAADAoUDY5kFNTU3cf//9sWzZsli/fn3Mnz8/mpubu19avGDBgpg9e3b3/AsuuCCeeOKJWLJkSWzYsCFeeumlmDt3bpx66qkxduzY/toNAACAAamwvxdwKJg1a1a0t7fHokWLoqWlJSZNmhT19fUxYcKEiIhoaWnp8Zm2l19+eWzdujXuuuuu+M53vhNHHnlknHXWWfHd7363v3YBAABgwPIe24OU1/gDAED+uP7uX16KDAAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdjmyeLFi6O8vDyKi4ujoqIiVq5c+anzu7q6YuHChTFhwoQoKiqK4447LpYtW5an1QIAAKSjsL8XcCioq6uLefPmxeLFi+P000+Pe++9N2bMmBHr1q2Lo48+us9tLrroonj33Xdj6dKl8cUvfjG2bNkS27dvz/PKAQAABr6CLMuy/l7EwW7q1KkxefLkWLJkSffYxIkTY+bMmVFbW9tr/tNPPx0XX3xxbNiwIYYPH75P37OzszNKS0ujo6MjSkpK9nntAADAZ3P93b+8FDnHtm3bFo2NjVFVVdVjvKqqKlatWtXnNitWrIgpU6bE9773vTjqqKPihBNOiGuvvTZ+9atf5WPJAAAASfFS5Bxra2uLHTt2RFlZWY/xsrKyaG1t7XObDRs2xIsvvhjFxcXx5JNPRltbW1x11VXx3nvv7fZ9tl1dXdHV1dX9vLOz88DtBAAAwADmjm2eFBQU9HieZVmvsV127twZBQUFsXz58jj11FPjvPPOi9tvvz0efPDB3d61ra2tjdLS0u7H+PHjD/g+AAAADETCNsdGjhwZgwcP7nV3dsuWLb3u4u4yZsyYOOqoo6K0tLR7bOLEiZFlWWzevLnPbRYsWBAdHR3dj02bNh24nQAAABjAhG2ODR06NCoqKqKhoaHHeENDQ1RWVva5zemnnx7vvPNOfPDBB91jb7zxRgwaNCjGjRvX5zZFRUVRUlLS4wEAAHAoELZ5UFNTE/fff38sW7Ys1q9fH/Pnz4/m5uaorq6OiE/uts6ePbt7/iWXXBIjRoyIK664ItatWxcvvPBCXHfddfEnf/InMWzYsP7aDQAAgAHJL4/Kg1mzZkV7e3ssWrQoWlpaYtKkSVFfXx8TJkyIiIiWlpZobm7unv+5z30uGhoa4tvf/nZMmTIlRowYERdddFHcdttt/bULAAAAA5bPsT1I+RwtAADIH9ff/ctLkQEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZs82Tx4sVRXl4excXFUVFREStXrtyj7V566aUoLCyMU045JbcLBAAASJSwzYO6urqYN29eLFy4MJqammL69OkxY8aMaG5u/tTtOjo6Yvbs2fG1r30tTysFAABIT0GWZVl/L+JgN3Xq1Jg8eXIsWbKke2zixIkxc+bMqK2t3e12F198cRx//PExePDgeOqpp2Lt2rV7/D07OzujtLQ0Ojo6oqSkZH+WDwAAfAbX3/3LHdsc27ZtWzQ2NkZVVVWP8aqqqli1atVut3vggQfizTffjJtvvnmPvk9XV1d0dnb2eAAAABwKhG2OtbW1xY4dO6KsrKzHeFlZWbS2tva5zc9//vO44YYbYvny5VFYWLhH36e2tjZKS0u7H+PHj9/vtQMAAKRA2OZJQUFBj+dZlvUai4jYsWNHXHLJJXHrrbfGCSecsMdff8GCBdHR0dH92LRp036vGQAAIAV7djuQfTZy5MgYPHhwr7uzW7Zs6XUXNyJi69atsWbNmmhqaoprrrkmIiJ27twZWZZFYWFhPPvss3HWWWf12q6oqCiKiopysxMAAAADmDu2OTZ06NCoqKiIhoaGHuMNDQ1RWVnZa35JSUm89tprsXbt2u5HdXV1fOlLX4q1a9fG1KlT87V0AACAJLhjmwc1NTVx6aWXxpQpU2LatGlx3333RXNzc1RXV0fEJy8jfvvtt+Ohhx6KQYMGxaRJk3psP2rUqCguLu41DgAAgLDNi1mzZkV7e3ssWrQoWlpaYtKkSVFfXx8TJkyIiIiWlpbP/ExbAAAA+uZzbA9SPkcLAADyx/V3//IeWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacI2TxYvXhzl5eVRXFwcFRUVsXLlyt3OfeKJJ+Kcc86JL3zhC1FSUhLTpk2LZ555Jo+rBQAASIewzYO6urqYN29eLFy4MJqammL69OkxY8aMaG5u7nP+Cy+8EOecc07U19dHY2NjnHnmmXHBBRdEU1NTnlcOAAAw8BVkWZb19yIOdlOnTo3JkyfHkiVLuscmTpwYM2fOjNra2j36Gl/5yldi1qxZcdNNN+3R/M7OzigtLY2Ojo4oKSnZp3UDAAB7xvV3/3LHNse2bdsWjY2NUVVV1WO8qqoqVq1atUdfY+fOnbF169YYPnx4LpYIAACQtML+XsDBrq2tLXbs2BFlZWU9xsvKyqK1tXWPvsb3v//9+PDDD+Oiiy7a7Zyurq7o6urqft7Z2blvCwYAAEiMO7Z5UlBQ0ON5lmW9xvryyCOPxC233BJ1dXUxatSo3c6rra2N0tLS7sf48eP3e80AAAApELY5NnLkyBg8eHCvu7NbtmzpdRf3t9XV1cWcOXPib/7mb+Lss8/+1LkLFiyIjo6O7semTZv2e+0AAAApELY5NnTo0KioqIiGhoYe4w0NDVFZWbnb7R555JG4/PLL4+GHH47zzz//M79PUVFRlJSU9HgAAAAcCrzHNg9qamri0ksvjSlTpsS0adPivvvui+bm5qiuro6IT+62vv322/HQQw9FxCdRO3v27PjhD38Yp512Wvfd3mHDhkVpaWm/7QcAAMBAJGzzYNasWdHe3h6LFi2KlpaWmDRpUtTX18eECRMiIqKlpaXHZ9ree++9sX379rj66qvj6quv7h6/7LLL4sEHH8z38gEAAAY0n2N7kPI5WgAAkD+uv/uX99gCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2ObJ4sWLo7y8PIqLi6OioiJWrlz5qfOff/75qKioiOLi4jj22GPjnnvuydNKAQAA0iJs86Curi7mzZsXCxcujKamppg+fXrMmDEjmpub+5y/cePGOO+882L69OnR1NQUN954Y8ydOzcef/zxPK8cAABg4CvIsizr70Uc7KZOnRqTJ0+OJUuWdI9NnDgxZs6cGbW1tb3mX3/99bFixYpYv35991h1dXW8+uqrsXr16j36np2dnVFaWhodHR1RUlKy/zsBAADsluvv/lXY3ws42G3bti0aGxvjhhtu6DFeVVUVq1at6nOb1atXR1VVVY+xc889N5YuXRoff/xxDBkypNc2XV1d0dXV1f28o6MjIj75CwYAAOTWrutu9w37h7DNsba2ttixY0eUlZX1GC8rK4vW1tY+t2ltbe1z/vbt26OtrS3GjBnTa5va2tq49dZbe42PHz9+P1YPAADsjfb29igtLe3vZRxyhG2eFBQU9HieZVmvsc+a39f4LgsWLIiampru5++//35MmDAhmpub/cViv3R2dsb48eNj06ZNXlbDfnEucSA5nzhQnEscKB0dHXH00UfH8OHD+3sphyRhm2MjR46MwYMH97o7u2XLll53ZXcZPXp0n/MLCwtjxIgRfW5TVFQURUVFvcZLS0v9kOaAKCkpcS5xQDiXOJCcTxwoziUOlEGD/H7e/uCo59jQoUOjoqIiGhoaeow3NDREZWVln9tMmzat1/xnn302pkyZ0uf7awEAAA5lwjYPampq4v77749ly5bF+vXrY/78+dHc3BzV1dUR8cnLiGfPnt09v7q6Ot56662oqamJ9evXx7Jly2Lp0qVx7bXX9tcuAAAADFheipwHs2bNivb29li0aFG0tLTEpEmTor6+PiZMmBARES0tLT0+07a8vDzq6+tj/vz5cffdd8fYsWPjjjvuiAsvvHCPv2dRUVHcfPPNfb48GfaGc4kDxbnEgeR84kBxLnGgOJf6l8+xBQAAIGleigwAAEDShC0AAABJE7YAAAAkTdgCAACQNGGbsMWLF0d5eXkUFxdHRUVFrFy58lPnP//881FRURHFxcVx7LHHxj333JOnlTLQ7c259MQTT8Q555wTX/jCF6KkpCSmTZsWzzzzTB5Xy0C2tz+XdnnppZeisLAwTjnllNwukGTs7bnU1dUVCxcujAkTJkRRUVEcd9xxsWzZsjytloFub8+n5cuXx8knnxyHHXZYjBkzJq644opob2/P02oZqF544YW44IILYuzYsVFQUBBPPfXUZ27j+jt/hG2i6urqYt68ebFw4cJoamqK6dOnx4wZM3p8bNBv2rhxY5x33nkxffr0aGpqihtvvDHmzp0bjz/+eJ5XzkCzt+fSCy+8EOecc07U19dHY2NjnHnmmXHBBRdEU1NTnlfOQLO359IuHR0dMXv27Pja176Wp5Uy0O3LuXTRRRfFP/3TP8XSpUvjZz/7WTzyyCNx4okn5nHVDFR7ez69+OKLMXv27JgzZ068/vrr8eijj8Yrr7wSV155ZZ5XzkDz4Ycfxsknnxx33XXXHs13/Z1nGUk69dRTs+rq6h5jJ554YnbDDTf0Of9//a//lZ144ok9xr71rW9lp512Ws7WSBr29lzqy5e//OXs1ltvPdBLIzH7ei7NmjUr+7M/+7Ps5ptvzk4++eQcrpBU7O259A//8A9ZaWlp1t7eno/lkZi9PZ/+8i//Mjv22GN7jN1xxx3ZuHHjcrZG0hMR2ZNPPvmpc1x/55c7tgnatm1bNDY2RlVVVY/xqqqqWLVqVZ/brF69utf8c889N9asWRMff/xxztbKwLYv59Jv27lzZ2zdujWGDx+eiyWSiH09lx544IF488034+abb871EknEvpxLK1asiClTpsT3vve9OOqoo+KEE06Ia6+9Nn71q1/lY8kMYPtyPlVWVsbmzZujvr4+siyLd999Nx577LE4//zz87FkDiKuv/OrsL8XwN5ra2uLHTt2RFlZWY/xsrKyaG1t7XOb1tbWPudv37492traYsyYMTlbLwPXvpxLv+373/9+fPjhh3HRRRflYokkYl/OpZ///Odxww03xMqVK6Ow0P+O+MS+nEsbNmyIF198MYqLi+PJJ5+Mtra2uOqqq+K9997zPttD3L6cT5WVlbF8+fKYNWtW/PrXv47t27fH17/+9bjzzjvzsWQOIq6/88sd24QVFBT0eJ5lWa+xz5rf1ziHnr09l3Z55JFH4pZbbom6uroYNWpUrpZHQvb0XNqxY0dccsklceutt8YJJ5yQr+WRkL35ubRz584oKCiI5cuXx6mnnhrnnXde3H777fHggw+6a0tE7N35tG7dupg7d27cdNNN0djYGE8//XRs3Lgxqqur87FUDjKuv/PHP5EnaOTIkTF48OBe/9K4ZcuWXv8qtMvo0aP7nF9YWBgjRozI2VoZ2PblXNqlrq4u5syZE48++micffbZuVwmCdjbc2nr1q2xZs2aaGpqimuuuSYiPomTLMuisLAwnn322TjrrLPysnYGln35uTRmzJg46qijorS0tHts4sSJkWVZbN68OY4//vicrpmBa1/Op9ra2jj99NPjuuuui4iIk046KQ4//PCYPn163Hbbbe6yscdcf+eXO7YJGjp0aFRUVERDQ0OP8YaGhqisrOxzm2nTpvWa/+yzz8aUKVNiyJAhOVsrA9u+nEsRn9ypvfzyy+Phhx/2niMiYu/PpZKSknjttddi7dq13Y/q6ur40pe+FGvXro2pU6fma+kMMPvyc+n000+Pd955Jz744IPusTfeeCMGDRoU48aNy+l6Gdj25Xz66KOPYtCgnpfIgwcPjoj/vNsGe8L1d5710y+tYj/96Ec/yoYMGZItXbo0W7duXTZv3rzs8MMPz37xi19kWZZlN9xwQ3bppZd2z9+wYUN22GGHZfPnz8/WrVuXLV26NBsyZEj22GOP9dcuMEDs7bn08MMPZ4WFhdndd9+dtbS0dD/ef//9/toFBoi9PZd+m9+KzC57ey5t3bo1GzduXPZHf/RH2euvv549//zz2fHHH59deeWV/bULDCB7ez498MADWWFhYbZ48eLszTffzF588cVsypQp2amnntpfu8AAsXXr1qypqSlramrKIiK7/fbbs6ampuytt97Kssz1d38Ttgm7++67swkTJmRDhw7NJk+enD3//PPdf3bZZZdlZ5xxRo/5P/7xj7Pf+Z3fyYYOHZodc8wx2ZIlS/K8YgaqvTmXzjjjjCwiej0uu+yy/C+cAWdvfy79JmHLb9rbc2n9+vXZ2WefnQ0bNiwbN25cVlNTk3300Ud5XjUD1d6eT3fccUf25S9/ORs2bFg2ZsyY7Jvf/Ga2efPmPK+agea555771Gsg19/9qyDLvKYCAACAdHmPLQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACTt/wcLAF7/XlacegAAAABJRU5ErkJggg==", + "text/html": [ + "\n", + "
\n", + "
\n", + " Figure\n", + "
\n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'sensor_temperature_rise'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'sensor_temperature_rise'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/OKMSOIL/OKMSOIL_tutorial.ipynb b/VAPs/quicklook/OKMSOIL/OKMSOIL_tutorial.ipynb new file mode 100644 index 00000000..444b458d --- /dev/null +++ b/VAPs/quicklook/OKMSOIL/OKMSOIL_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# OKMSOIL.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/okmsoil) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using okmsoil as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `okmsoil.c1`, where `okmsoil` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `sgp` and facility `X1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/sgp/sgpokmsoilX1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"okmsoil\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"sgp\"\n", + "facility = \"X1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/OKMSOIL/okmsoil.c1.ipynb b/VAPs/quicklook/OKMSOIL/okmsoil.c1.ipynb new file mode 100644 index 00000000..57911f93 --- /dev/null +++ b/VAPs/quicklook/OKMSOIL/okmsoil.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# OKMSOIL.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/okmsoil) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'okmsoil'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2020-10-22', 'facility': 'X1', 'site': 'sgp', 'start_date': '1998-01-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'X1' )\n", + "\n", + "date_start = '2020-10-20'\n", + "date_end = '2020-10-22'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['sensor_temperature_rise', 'matric_potential', 'volumetric_water_content']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'sensor_temperature_rise'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'sensor_temperature_rise'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/OZONE-AIR/OZONE-AIR_tutorial.ipynb b/VAPs/quicklook/OZONE-AIR/OZONE-AIR_tutorial.ipynb new file mode 100644 index 00000000..34aa8f95 --- /dev/null +++ b/VAPs/quicklook/OZONE-AIR/OZONE-AIR_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AAFO3.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/ozone-air) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using aafo3 as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `aafo3.c1`, where `aafo3` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `cor` and facility `F1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/cor/coraafo3F1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"aafo3\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"cor\"\n", + "facility = \"F1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/OZONE-AIR/aafo3.c1.ipynb b/VAPs/quicklook/OZONE-AIR/aafo3.c1.ipynb new file mode 100644 index 00000000..07c81973 --- /dev/null +++ b/VAPs/quicklook/OZONE-AIR/aafo3.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AAFO3.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/ozone-air) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'aafo3'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2018-12-08', 'facility': 'F1', 'site': 'cor', 'start_date': '2018-11-04'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'cor', 'F1' )\n", + "\n", + "date_start = '2018-12-06'\n", + "date_end = '2018-12-08'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['o3']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'o3'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/PBLHT/.ipynb_checkpoints/pblhtsonde1mcfarl.c1-checkpoint.ipynb b/VAPs/quicklook/PBLHT/.ipynb_checkpoints/pblhtsonde1mcfarl.c1-checkpoint.ipynb new file mode 100644 index 00000000..b0b96878 --- /dev/null +++ b/VAPs/quicklook/PBLHT/.ipynb_checkpoints/pblhtsonde1mcfarl.c1-checkpoint.ipynb @@ -0,0 +1,679 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# PBLHTSONDE1MCFARL.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/pblht) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'pblhtsonde1mcfarl'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2017-01-02', 'facility': 'M1', 'site': 'awr', 'start_date': '2015-11-30'}, {'end_date': '2016-01-18', 'facility': 'S1', 'site': 'awr', 'start_date': '2015-12-01'}, {'end_date': '2017-10-31', 'facility': 'S1', 'site': 'asi', 'start_date': '2016-04-29'}, {'end_date': '2015-02-09', 'facility': 'M1', 'site': 'acx', 'start_date': '2015-01-12'}, {'end_date': '2020-05-31', 'facility': 'M1', 'site': 'anx', 'start_date': '2019-12-02'}, {'end_date': '2023-12-12', 'facility': 'C1', 'site': 'nsa', 'start_date': '2002-04-28'}, {'end_date': '2012-03-31', 'facility': 'M1', 'site': 'pgh', 'start_date': '2011-06-15'}, {'end_date': '2013-06-29', 'facility': 'M1', 'site': 'pvc', 'start_date': '2012-06-25'}, {'end_date': '2021-06-14', 'facility': 'M1', 'site': 'oli', 'start_date': '2013-10-03'}, {'end_date': '2020-10-01', 'facility': 'M1', 'site': 'mos', 'start_date': '2019-10-11'}, {'end_date': '2015-12-01', 'facility': 'M1', 'site': 'mao', 'start_date': '2014-01-01'}, {'end_date': '2012-04-08', 'facility': 'M1', 'site': 'gan', 'start_date': '2011-09-15'}, {'end_date': '2011-01-05', 'facility': 'M1', 'site': 'grw', 'start_date': '2009-04-16'}, {'end_date': '2013-10-03', 'facility': 'M1', 'site': 'mag', 'start_date': '2012-10-01'}, {'end_date': '2018-03-24', 'facility': 'M1', 'site': 'mar', 'start_date': '2017-10-31'}, {'end_date': '2023-06-15', 'facility': 'M1', 'site': 'guc', 'start_date': '2021-09-01'}, {'end_date': '2023-12-12', 'facility': 'M1', 'site': 'epc', 'start_date': '2023-02-06'}, {'end_date': '2008-12-28', 'facility': 'M1', 'site': 'hfe', 'start_date': '2008-05-14'}, {'end_date': '2019-04-29', 'facility': 'M1', 'site': 'cor', 'start_date': '2018-09-27'}, {'end_date': '2022-10-01', 'facility': 'M1', 'site': 'hou', 'start_date': '2021-09-18'}, {'end_date': '2022-09-25', 'facility': 'S1', 'site': 'hou', 'start_date': '2021-08-28'}, {'end_date': '2023-12-13', 'facility': 'C1', 'site': 'ena', 'start_date': '2013-09-28'}, {'end_date': '2008-01-01', 'facility': 'M1', 'site': 'fkb', 'start_date': '2007-03-24'}, {'end_date': '2007-01-08', 'facility': 'M1', 'site': 'nim', 'start_date': '2006-01-07'}, {'end_date': '2005-09-15', 'facility': 'M1', 'site': 'pye', 'start_date': '2005-02-25'}, {'end_date': '2011-04-24', 'facility': 'M1', 'site': 'sbs', 'start_date': '2010-11-08'}, {'end_date': '2007-06-13', 'facility': 'B1', 'site': 'sgp', 'start_date': '2002-05-13'}, {'end_date': '2007-06-29', 'facility': 'B4', 'site': 'sgp', 'start_date': '2002-05-20'}, {'end_date': '2007-06-22', 'facility': 'B5', 'site': 'sgp', 'start_date': '2002-05-20'}, {'end_date': '2002-11-26', 'facility': 'B6', 'site': 'sgp', 'start_date': '2001-06-20'}, {'end_date': '2023-12-11', 'facility': 'C1', 'site': 'sgp', 'start_date': '2001-04-01'}, {'end_date': '2014-09-12', 'facility': 'M1', 'site': 'tmp', 'start_date': '2014-02-01'}, {'end_date': '2014-07-07', 'facility': 'C1', 'site': 'twp', 'start_date': '2001-04-03'}, {'end_date': '2013-08-25', 'facility': 'C2', 'site': 'twp', 'start_date': '2001-04-01'}, {'end_date': '2015-01-14', 'facility': 'C3', 'site': 'twp', 'start_date': '2002-04-28'}]" + ] + }, + { + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ac6764f5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The following locations and date ranges are available for this VAP:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sitefacilitystart_dateend_date
0awrM12015-11-302017-01-02
1awrS12015-12-012016-01-18
2asiS12016-04-292017-10-31
3acxM12015-01-122015-02-09
4anxM12019-12-022020-05-31
5nsaC12002-04-282023-12-12
6pghM12011-06-152012-03-31
7pvcM12012-06-252013-06-29
8oliM12013-10-032021-06-14
9mosM12019-10-112020-10-01
10maoM12014-01-012015-12-01
11ganM12011-09-152012-04-08
12grwM12009-04-162011-01-05
13magM12012-10-012013-10-03
14marM12017-10-312018-03-24
15gucM12021-09-012023-06-15
16epcM12023-02-062023-12-12
17hfeM12008-05-142008-12-28
18corM12018-09-272019-04-29
19houM12021-09-182022-10-01
20houS12021-08-282022-09-25
21enaC12013-09-282023-12-13
22fkbM12007-03-242008-01-01
23nimM12006-01-072007-01-08
24pyeM12005-02-252005-09-15
25sbsM12010-11-082011-04-24
26sgpB12002-05-132007-06-13
27sgpB42002-05-202007-06-29
28sgpB52002-05-202007-06-22
29sgpB62001-06-202002-11-26
30sgpC12001-04-012023-12-11
31tmpM12014-02-012014-09-12
32twpC12001-04-032014-07-07
33twpC22001-04-012013-08-25
34twpC32002-04-282015-01-14
\n", + "
" + ], + "text/plain": [ + " site facility start_date end_date\n", + "0 awr M1 2015-11-30 2017-01-02\n", + "1 awr S1 2015-12-01 2016-01-18\n", + "2 asi S1 2016-04-29 2017-10-31\n", + "3 acx M1 2015-01-12 2015-02-09\n", + "4 anx M1 2019-12-02 2020-05-31\n", + "5 nsa C1 2002-04-28 2023-12-12\n", + "6 pgh M1 2011-06-15 2012-03-31\n", + "7 pvc M1 2012-06-25 2013-06-29\n", + "8 oli M1 2013-10-03 2021-06-14\n", + "9 mos M1 2019-10-11 2020-10-01\n", + "10 mao M1 2014-01-01 2015-12-01\n", + "11 gan M1 2011-09-15 2012-04-08\n", + "12 grw M1 2009-04-16 2011-01-05\n", + "13 mag M1 2012-10-01 2013-10-03\n", + "14 mar M1 2017-10-31 2018-03-24\n", + "15 guc M1 2021-09-01 2023-06-15\n", + "16 epc M1 2023-02-06 2023-12-12\n", + "17 hfe M1 2008-05-14 2008-12-28\n", + "18 cor M1 2018-09-27 2019-04-29\n", + "19 hou M1 2021-09-18 2022-10-01\n", + "20 hou S1 2021-08-28 2022-09-25\n", + "21 ena C1 2013-09-28 2023-12-13\n", + "22 fkb M1 2007-03-24 2008-01-01\n", + "23 nim M1 2006-01-07 2007-01-08\n", + "24 pye M1 2005-02-25 2005-09-15\n", + "25 sbs M1 2010-11-08 2011-04-24\n", + "26 sgp B1 2002-05-13 2007-06-13\n", + "27 sgp B4 2002-05-20 2007-06-29\n", + "28 sgp B5 2002-05-20 2007-06-22\n", + "29 sgp B6 2001-06-20 2002-11-26\n", + "30 sgp C1 2001-04-01 2023-12-11\n", + "31 tmp M1 2014-02-01 2014-09-12\n", + "32 twp C1 2001-04-03 2014-07-07\n", + "33 twp C2 2001-04-01 2013-08-25\n", + "34 twp C3 2002-04-28 2015-01-14" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'B1' )\n", + "\n", + "date_start = '2007-06-10'\n", + "date_end = '2007-06-12'" + ] + }, + { + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/data/archive/sgp/sgppblhtsonde1mcfarlB1.c1'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['20070610', '20070611', '20070612']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "51feea2e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['/data/archive/sgp/sgppblhtsonde1mcfarlB1.c1/sgppblhtsonde1mcfarlB1.c1.20070611.150200.cdf',\n", + " '/data/archive/sgp/sgppblhtsonde1mcfarlB1.c1/sgppblhtsonde1mcfarlB1.c1.20070611.173000.cdf',\n", + " '/data/archive/sgp/sgppblhtsonde1mcfarlB1.c1/sgppblhtsonde1mcfarlB1.c1.20070611.202900.cdf',\n", + " '/data/archive/sgp/sgppblhtsonde1mcfarlB1.c1/sgppblhtsonde1mcfarlB1.c1.20070611.232800.cdf',\n", + " '/data/archive/sgp/sgppblhtsonde1mcfarlB1.c1/sgppblhtsonde1mcfarlB1.c1.20070612.052900.cdf',\n", + " '/data/archive/sgp/sgppblhtsonde1mcfarlB1.c1/sgppblhtsonde1mcfarlB1.c1.20070612.112900.cdf',\n", + " '/data/archive/sgp/sgppblhtsonde1mcfarlB1.c1/sgppblhtsonde1mcfarlB1.c1.20070612.143100.cdf',\n", + " '/data/archive/sgp/sgppblhtsonde1mcfarlB1.c1/sgppblhtsonde1mcfarlB1.c1.20070612.082900.cdf',\n", + " '/data/archive/sgp/sgppblhtsonde1mcfarlB1.c1/sgppblhtsonde1mcfarlB1.c1.20070612.233900.cdf',\n", + " '/data/archive/sgp/sgppblhtsonde1mcfarlB1.c1/sgppblhtsonde1mcfarlB1.c1.20070612.173100.cdf',\n", + " '/data/archive/sgp/sgppblhtsonde1mcfarlB1.c1/sgppblhtsonde1mcfarlB1.c1.20070612.023100.cdf']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "Coordinate variable height_ss is neither monotonically increasing nor monotonically decreasing on all datasets", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[7], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Load files as a single dataset\u001b[39;00m\n\u001b[1;32m 2\u001b[0m files_list \u001b[38;5;241m=\u001b[39m files_filter \n\u001b[0;32m----> 3\u001b[0m ds \u001b[38;5;241m=\u001b[39m \u001b[43mact\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mio\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marmfiles\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_netcdf\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfiles_list\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4\u001b[0m ds\u001b[38;5;241m.\u001b[39mclean\u001b[38;5;241m.\u001b[39mcleanup()\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlen\u001b[39m(files_list)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m files loaded\u001b[39m\u001b[38;5;124m'\u001b[39m)\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/io/armfiles.py:168\u001b[0m, in \u001b[0;36mread_netcdf\u001b[0;34m(filenames, concat_dim, return_None, combine, decode_times, use_cftime, use_base_time, combine_attrs, cleanup_qc, keep_variables, **kwargs)\u001b[0m\n\u001b[1;32m 164\u001b[0m ds \u001b[38;5;241m=\u001b[39m xr\u001b[38;5;241m.\u001b[39mopen_mfdataset(filenames, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 166\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 167\u001b[0m \u001b[38;5;66;03m# When all else fails raise the orginal exception\u001b[39;00m\n\u001b[0;32m--> 168\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exception\n\u001b[1;32m 170\u001b[0m \u001b[38;5;66;03m# If requested use base_time and time_offset to derive time. Assumes that the units\u001b[39;00m\n\u001b[1;32m 171\u001b[0m \u001b[38;5;66;03m# of both are in seconds and that the value is number of seconds since epoch.\u001b[39;00m\n\u001b[1;32m 172\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m use_base_time:\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/io/armfiles.py:143\u001b[0m, in \u001b[0;36mread_netcdf\u001b[0;34m(filenames, concat_dim, return_None, combine, decode_times, use_cftime, use_base_time, combine_attrs, cleanup_qc, keep_variables, **kwargs)\u001b[0m\n\u001b[1;32m 139\u001b[0m except_tuple \u001b[38;5;241m=\u001b[39m except_tuple \u001b[38;5;241m+\u001b[39m (\u001b[38;5;167;01mFileNotFoundError\u001b[39;00m, \u001b[38;5;167;01mOSError\u001b[39;00m)\n\u001b[1;32m 141\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 142\u001b[0m \u001b[38;5;66;03m# Read data file with Xarray function\u001b[39;00m\n\u001b[0;32m--> 143\u001b[0m ds \u001b[38;5;241m=\u001b[39m \u001b[43mxr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen_mfdataset\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilenames\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 145\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m except_tuple \u001b[38;5;28;01mas\u001b[39;00m exception:\n\u001b[1;32m 146\u001b[0m \u001b[38;5;66;03m# If requested return None for File not found error\u001b[39;00m\n\u001b[1;32m 147\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(exception)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mFileNotFoundError\u001b[39m\u001b[38;5;124m'\u001b[39m:\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/backends/api.py:1026\u001b[0m, in \u001b[0;36mopen_mfdataset\u001b[0;34m(paths, chunks, concat_dim, compat, preprocess, engine, data_vars, coords, combine, parallel, join, attrs_file, combine_attrs, **kwargs)\u001b[0m\n\u001b[1;32m 1013\u001b[0m combined \u001b[38;5;241m=\u001b[39m _nested_combine(\n\u001b[1;32m 1014\u001b[0m datasets,\n\u001b[1;32m 1015\u001b[0m concat_dims\u001b[38;5;241m=\u001b[39mconcat_dim,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1021\u001b[0m combine_attrs\u001b[38;5;241m=\u001b[39mcombine_attrs,\n\u001b[1;32m 1022\u001b[0m )\n\u001b[1;32m 1023\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m combine \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mby_coords\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 1024\u001b[0m \u001b[38;5;66;03m# Redo ordering from coordinates, ignoring how they were ordered\u001b[39;00m\n\u001b[1;32m 1025\u001b[0m \u001b[38;5;66;03m# previously\u001b[39;00m\n\u001b[0;32m-> 1026\u001b[0m combined \u001b[38;5;241m=\u001b[39m \u001b[43mcombine_by_coords\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1027\u001b[0m \u001b[43m \u001b[49m\u001b[43mdatasets\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1028\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1029\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_vars\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_vars\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1030\u001b[0m \u001b[43m \u001b[49m\u001b[43mcoords\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcoords\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1031\u001b[0m \u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1032\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombine_attrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcombine_attrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1033\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1034\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1035\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 1036\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m is an invalid option for the keyword argument\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1037\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m ``combine``\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(combine)\n\u001b[1;32m 1038\u001b[0m )\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/combine.py:982\u001b[0m, in \u001b[0;36mcombine_by_coords\u001b[0;34m(data_objects, compat, data_vars, coords, fill_value, join, combine_attrs, datasets)\u001b[0m\n\u001b[1;32m 980\u001b[0m concatenated_grouped_by_data_vars \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m 981\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m \u001b[38;5;28mvars\u001b[39m, datasets_with_same_vars \u001b[38;5;129;01min\u001b[39;00m grouped_by_vars:\n\u001b[0;32m--> 982\u001b[0m concatenated \u001b[38;5;241m=\u001b[39m \u001b[43m_combine_single_variable_hypercube\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 983\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mlist\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mdatasets_with_same_vars\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 984\u001b[0m \u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 985\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_vars\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_vars\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 986\u001b[0m \u001b[43m \u001b[49m\u001b[43mcoords\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcoords\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 987\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 988\u001b[0m \u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 989\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombine_attrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcombine_attrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 990\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 991\u001b[0m concatenated_grouped_by_data_vars\u001b[38;5;241m.\u001b[39mappend(concatenated)\n\u001b[1;32m 993\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m merge(\n\u001b[1;32m 994\u001b[0m concatenated_grouped_by_data_vars,\n\u001b[1;32m 995\u001b[0m compat\u001b[38;5;241m=\u001b[39mcompat,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 998\u001b[0m combine_attrs\u001b[38;5;241m=\u001b[39mcombine_attrs,\n\u001b[1;32m 999\u001b[0m )\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/combine.py:629\u001b[0m, in \u001b[0;36m_combine_single_variable_hypercube\u001b[0;34m(datasets, fill_value, data_vars, coords, compat, join, combine_attrs)\u001b[0m\n\u001b[1;32m 623\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(datasets) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 624\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 625\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAt least one Dataset is required to resolve variable names \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 626\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfor combined hypercube.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 627\u001b[0m )\n\u001b[0;32m--> 629\u001b[0m combined_ids, concat_dims \u001b[38;5;241m=\u001b[39m \u001b[43m_infer_concat_order_from_coords\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mlist\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mdatasets\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 631\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m fill_value \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 632\u001b[0m \u001b[38;5;66;03m# check that datasets form complete hypercube\u001b[39;00m\n\u001b[1;32m 633\u001b[0m _check_shape_tile_ids(combined_ids)\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/combine.py:116\u001b[0m, in \u001b[0;36m_infer_concat_order_from_coords\u001b[0;34m(datasets)\u001b[0m\n\u001b[1;32m 114\u001b[0m ascending \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 115\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 116\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 117\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCoordinate variable \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m is neither \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 118\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmonotonically increasing nor \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 119\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmonotonically decreasing on all datasets\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(dim)\n\u001b[1;32m 120\u001b[0m )\n\u001b[1;32m 122\u001b[0m \u001b[38;5;66;03m# Assume that any two datasets whose coord along dim starts\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;66;03m# with the same value have the same coord values throughout.\u001b[39;00m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28many\u001b[39m(index\u001b[38;5;241m.\u001b[39msize \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m index \u001b[38;5;129;01min\u001b[39;00m indexes):\n", + "\u001b[0;31mValueError\u001b[0m: Coordinate variable height_ss is neither monotonically increasing nor monotonically decreasing on all datasets" + ] + } + ], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['pbl_height_heffter', 'pbl_height_liu_liang', 'pbl_height_bulk_richardson_pt25']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'pbl_height_heffter'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/PBLHT/PBLHT_tutorial.ipynb b/VAPs/quicklook/PBLHT/PBLHT_tutorial.ipynb new file mode 100644 index 00000000..8594a837 --- /dev/null +++ b/VAPs/quicklook/PBLHT/PBLHT_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# PBLHTSONDE1MCFARL.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/pblht) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using pblhtsonde1mcfarl as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `pblhtsonde1mcfarl.c1`, where `pblhtsonde1mcfarl` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `awr` and facility `M1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/awr/awrpblhtsonde1mcfarlM1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"pblhtsonde1mcfarl\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"awr\"\n", + "facility = \"M1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/PBLHT/pblhtsonde1mcfarl.c1.ipynb b/VAPs/quicklook/PBLHT/pblhtsonde1mcfarl.c1.ipynb new file mode 100644 index 00000000..b10fdef4 --- /dev/null +++ b/VAPs/quicklook/PBLHT/pblhtsonde1mcfarl.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# PBLHTSONDE1MCFARL.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/pblht) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'pblhtsonde1mcfarl'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2017-01-02', 'facility': 'M1', 'site': 'awr', 'start_date': '2015-11-30'}, {'end_date': '2016-01-18', 'facility': 'S1', 'site': 'awr', 'start_date': '2015-12-01'}, {'end_date': '2017-10-31', 'facility': 'S1', 'site': 'asi', 'start_date': '2016-04-29'}, {'end_date': '2020-05-31', 'facility': 'M1', 'site': 'anx', 'start_date': '2019-12-02'}, {'end_date': '2015-02-09', 'facility': 'M1', 'site': 'acx', 'start_date': '2015-01-12'}, {'end_date': '2021-06-14', 'facility': 'M1', 'site': 'oli', 'start_date': '2013-10-03'}, {'end_date': '2023-06-15', 'facility': 'M1', 'site': 'guc', 'start_date': '2021-09-01'}, {'end_date': '2023-12-19', 'facility': 'C1', 'site': 'ena', 'start_date': '2013-09-28'}, {'end_date': '2022-10-01', 'facility': 'M1', 'site': 'hou', 'start_date': '2021-09-18'}, {'end_date': '2022-09-25', 'facility': 'S1', 'site': 'hou', 'start_date': '2021-08-28'}, {'end_date': '2023-12-18', 'facility': 'M1', 'site': 'epc', 'start_date': '2023-02-06'}, {'end_date': '2008-01-01', 'facility': 'M1', 'site': 'fkb', 'start_date': '2007-03-24'}, {'end_date': '2015-12-01', 'facility': 'M1', 'site': 'mao', 'start_date': '2014-01-01'}, {'end_date': '2018-03-24', 'facility': 'M1', 'site': 'mar', 'start_date': '2017-10-31'}, {'end_date': '2008-12-28', 'facility': 'M1', 'site': 'hfe', 'start_date': '2008-05-14'}, {'end_date': '2019-04-29', 'facility': 'M1', 'site': 'cor', 'start_date': '2018-09-27'}, {'end_date': '2013-10-03', 'facility': 'M1', 'site': 'mag', 'start_date': '2012-10-01'}, {'end_date': '2012-04-08', 'facility': 'M1', 'site': 'gan', 'start_date': '2011-09-15'}, {'end_date': '2011-01-05', 'facility': 'M1', 'site': 'grw', 'start_date': '2009-04-16'}, {'end_date': '2013-06-29', 'facility': 'M1', 'site': 'pvc', 'start_date': '2012-06-25'}, {'end_date': '2005-09-15', 'facility': 'M1', 'site': 'pye', 'start_date': '2005-02-25'}, {'end_date': '2011-04-24', 'facility': 'M1', 'site': 'sbs', 'start_date': '2010-11-08'}, {'end_date': '2020-10-01', 'facility': 'M1', 'site': 'mos', 'start_date': '2019-10-11'}, {'end_date': '2012-03-31', 'facility': 'M1', 'site': 'pgh', 'start_date': '2011-06-15'}, {'end_date': '2023-12-18', 'facility': 'C1', 'site': 'nsa', 'start_date': '2002-04-28'}, {'end_date': '2007-01-08', 'facility': 'M1', 'site': 'nim', 'start_date': '2006-01-07'}, {'end_date': '2007-06-13', 'facility': 'B1', 'site': 'sgp', 'start_date': '2002-05-13'}, {'end_date': '2007-06-29', 'facility': 'B4', 'site': 'sgp', 'start_date': '2002-05-20'}, {'end_date': '2007-06-22', 'facility': 'B5', 'site': 'sgp', 'start_date': '2002-05-20'}, {'end_date': '2002-11-26', 'facility': 'B6', 'site': 'sgp', 'start_date': '2001-06-20'}, {'end_date': '2023-12-18', 'facility': 'C1', 'site': 'sgp', 'start_date': '2001-04-01'}, {'end_date': '2014-09-12', 'facility': 'M1', 'site': 'tmp', 'start_date': '2014-02-01'}, {'end_date': '2014-07-07', 'facility': 'C1', 'site': 'twp', 'start_date': '2001-04-03'}, {'end_date': '2013-08-25', 'facility': 'C2', 'site': 'twp', 'start_date': '2001-04-01'}, {'end_date': '2015-01-14', 'facility': 'C3', 'site': 'twp', 'start_date': '2002-04-28'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'B1' )\n", + "\n", + "date_start = '2007-06-10'\n", + "date_end = '2007-06-12'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['pbl_height_heffter', 'pbl_height_liu_liang', 'pbl_height_bulk_richardson_pt25']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'pbl_height_heffter'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/PBLHT/pblhtsondeyr1mcfarl.c1.ipynb b/VAPs/quicklook/PBLHT/pblhtsondeyr1mcfarl.c1.ipynb new file mode 100644 index 00000000..f5f5858e --- /dev/null +++ b/VAPs/quicklook/PBLHT/pblhtsondeyr1mcfarl.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# PBLHTSONDEYR1MCFARL.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/pblht) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'pblhtsondeyr1mcfarl'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2016-12-30', 'facility': 'M1', 'site': 'awr', 'start_date': '2015-11-30'}, {'end_date': '2016-01-16', 'facility': 'S1', 'site': 'awr', 'start_date': '2015-12-01'}, {'end_date': '2016-12-31', 'facility': 'S1', 'site': 'asi', 'start_date': '2016-04-29'}, {'end_date': '2015-02-08', 'facility': 'M1', 'site': 'acx', 'start_date': '2015-01-12'}, {'end_date': '2021-06-13', 'facility': 'M1', 'site': 'oli', 'start_date': '2014-02-28'}, {'end_date': '2021-12-31', 'facility': 'C1', 'site': 'ena', 'start_date': '2013-09-28'}, {'end_date': '2022-09-30', 'facility': 'M1', 'site': 'hou', 'start_date': '2021-09-18'}, {'end_date': '2022-09-25', 'facility': 'S1', 'site': 'hou', 'start_date': '2021-08-28'}, {'end_date': '2007-12-31', 'facility': 'M1', 'site': 'fkb', 'start_date': '2007-03-24'}, {'end_date': '2014-12-31', 'facility': 'M1', 'site': 'mao', 'start_date': '2014-01-01'}, {'end_date': '2018-03-23', 'facility': 'M1', 'site': 'mar', 'start_date': '2017-10-31'}, {'end_date': '2008-12-28', 'facility': 'M1', 'site': 'hfe', 'start_date': '2008-05-14'}, {'end_date': '2019-04-29', 'facility': 'M1', 'site': 'cor', 'start_date': '2018-09-27'}, {'end_date': '2013-10-01', 'facility': 'M1', 'site': 'mag', 'start_date': '2012-10-01'}, {'end_date': '2010-03-15', 'facility': 'M1', 'site': 'grw', 'start_date': '2009-04-16'}, {'end_date': '2005-09-15', 'facility': 'M1', 'site': 'pye', 'start_date': '2005-02-25'}, {'end_date': '2020-09-16', 'facility': 'M1', 'site': 'mos', 'start_date': '2020-07-31'}, {'end_date': '2012-03-31', 'facility': 'M1', 'site': 'pgh', 'start_date': '2011-06-15'}, {'end_date': '2021-12-31', 'facility': 'C1', 'site': 'nsa', 'start_date': '2002-04-28'}, {'end_date': '2006-12-20', 'facility': 'M1', 'site': 'nim', 'start_date': '2006-01-07'}, {'end_date': '2007-06-12', 'facility': 'B1', 'site': 'sgp', 'start_date': '2002-05-13'}, {'end_date': '2007-06-29', 'facility': 'B4', 'site': 'sgp', 'start_date': '2002-05-20'}, {'end_date': '2007-06-21', 'facility': 'B5', 'site': 'sgp', 'start_date': '2002-05-20'}, {'end_date': '2002-11-25', 'facility': 'B6', 'site': 'sgp', 'start_date': '2001-06-20'}, {'end_date': '2021-12-31', 'facility': 'C1', 'site': 'sgp', 'start_date': '2001-04-01'}, {'end_date': '2013-12-02', 'facility': 'C1', 'site': 'twp', 'start_date': '2001-04-03'}, {'end_date': '2013-08-24', 'facility': 'C2', 'site': 'twp', 'start_date': '2001-04-01'}, {'end_date': '2015-01-13', 'facility': 'C3', 'site': 'twp', 'start_date': '2002-04-28'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'B1' )\n", + "\n", + "date_start = '2007-06-03'\n", + "date_end = '2007-06-05'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['pbl_height_heffter', 'pbl_height_liu_liang', 'pbl_height_bulk_richardson_pt25']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'pbl_height_heffter'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'pbl_height_heffter'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/PCCP/PCCP_tutorial.ipynb b/VAPs/quicklook/PCCP/PCCP_tutorial.ipynb new file mode 100644 index 00000000..e8b0ca67 --- /dev/null +++ b/VAPs/quicklook/PCCP/PCCP_tutorial.ipynb @@ -0,0 +1,943 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# PCCP.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/pccp) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using pccp as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `pccp.c1`, where `pccp` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `hou` and facility `S5`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/hou/houpccpS5.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"pccp\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"hou\"\n", + "facility = \"S5\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "id": "237bfd37", + "metadata": {}, + "source": [ + "## Point Cloud of Cloud of Points" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1d0eb162", + "metadata": {}, + "outputs": [], + "source": [ + "# this variable represents the index of the data point that is being shown\n", + "print(f\"Available time values: {ds.time.dt.strftime(r'%Y-%m-%d %H:%M:%S').values[0]} -- {ds.time.dt.strftime(r'%Y-%m-%d %H:%M:%S').values[-1]}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "34ab9eb0", + "metadata": {}, + "outputs": [], + "source": [ + "# Enter timestamp to plot (format: YYYY-MM-DD hh:mm:ss)\n", + "display_time = '2020-03-02 15:00:00'\n", + "\n", + "# list available time stamps\n", + "display_dt = datetime.strptime(display_time, r'%Y-%m-%d %H:%M:%S')\n", + "available_times = np.array([datetime.combine(d,t) for d, t in zip(ds.time.dt.date.values,ds.time.dt.time.values)])\n", + "# get closest time \n", + "time_index = np.argmin(np.abs(available_times - display_dt))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ad6a8741", + "metadata": {}, + "outputs": [], + "source": [ + "x_relative_var = ds.variables['x_relative'][time_index]\n", + "y_relative_var = ds.variables['y_relative'][time_index]\n", + "z_relative_var = ds.variables['z_relative'][time_index]\n", + "\n", + "# # Filter out values that exceed 50 km\n", + "ind_nonzero = tuple(np.nonzero((np.abs(x_relative_var) < 50000)))\n", + "print(len(x_relative_var[0]),' cloud points are extracted')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f84281f5", + "metadata": {}, + "outputs": [], + "source": [ + "x_slice = np.array(x_relative_var[ind_nonzero])/1000 # convert to km \n", + "y_slice = np.array(y_relative_var[ind_nonzero])/1000\n", + "z_slice = np.array(z_relative_var[ind_nonzero])/1000\n", + "\n", + "###2D plot of x,y variables in subplot(2,2,si)\n", + "def plot2D(x,y,fig,si,xlabel,ylabel):\n", + " ax = fig.add_subplot(2,2,si)\n", + " ax.scatter(x, y, s=4, marker='o', c= 'gray')\n", + " ax.set_xlabel(xlabel)\n", + " ax.set_ylabel(ylabel)\n", + " ax.xaxis.labelpad = 5\n", + " ax.yaxis.labelpad = 5\n", + "\n", + "###3D plot of x,y,z variables in subplot(2,2,si)\n", + "def plot3D(x,y,z,fig,si):\n", + " #check if data point count is sufficient for display\n", + " if (len(x)>10):\n", + " ax = fig.add_subplot(1,1,si, projection='3d')\n", + " x = [x[0:len(x)]]\n", + " y = [y[0:len(y)]]\n", + " z = [z[0:len(z)]]\n", + " x1 = int(min(min(x)))\n", + " x2 = int(max(max(x)))\n", + " y1 = int(min(min(y)))\n", + " y2 = int(max(max(y)))\n", + " z2 = int(max(max(z)))\n", + " \n", + " ax.scatter(x, y, z, c='gray', marker='o')\n", + " # ax.xaxis.set_ticks(np.arange(x1,x2,int((x2-x1+2)/4)+0.5))\n", + " # ax.yaxis.set_ticks(np.arange(y1,y2,int((y2-y1+2)/4)+0.5))\n", + " ax.view_init(elev=15, azim=-70)\n", + " ax.set_xlabel('X [km] ')\n", + " ax.set_ylabel('Y [km] ')\n", + " ax.xaxis.labelpad = 15\n", + " ax.yaxis.labelpad = 15\n", + " ax.zaxis.set_ticks(np.arange(0,int(z2+1),.5))\n", + " ax.set_zlabel('Z [km] ')\n", + " \n", + "fig = plt.figure(figsize=(9.5,10))\n", + "plot2D(x_slice,z_slice,fig,1,'direction eastward [km]','altitude above the ground [km]')\n", + "plot2D(y_slice,z_slice,fig,2,'direction northward [km]','altitude above the ground [km]')\n", + "plot2D(x_slice,y_slice,fig,3,'direction eastward [km]','direction northward [km]')\n", + "fig = plt.figure(figsize=(9.5,10))\n", + "plot3D(x_slice,y_slice,z_slice,fig,1)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/PCCP/pccp.c1.ipynb b/VAPs/quicklook/PCCP/pccp.c1.ipynb new file mode 100644 index 00000000..0b3b934a --- /dev/null +++ b/VAPs/quicklook/PCCP/pccp.c1.ipynb @@ -0,0 +1,310 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# PCCP.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/pccp) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'pccp'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2022-09-26', 'facility': 'S5', 'site': 'hou', 'start_date': '2021-09-17'}, {'end_date': '2020-03-03', 'facility': 'E43', 'site': 'sgp', 'start_date': '2017-09-01'}, {'end_date': '2019-12-01', 'facility': 'E44', 'site': 'sgp', 'start_date': '2017-09-01'}, {'end_date': '2019-10-30', 'facility': 'E45', 'site': 'sgp', 'start_date': '2017-09-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'E43' )\n", + "\n", + "date_start = '2020-03-02'\n", + "date_end = '2020-03-03'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "cell_type": "markdown", + "id": "822caf5f", + "metadata": {}, + "source": [ + "## Point Cloud of Cloud of Points" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1fe5cb65", + "metadata": {}, + "outputs": [], + "source": [ + "# this variable represents the index of the data point that is being shown\n", + "print(f\"Available time values: {ds.time.dt.strftime(r'%Y-%m-%d %H:%M:%S').values[0]} -- {ds.time.dt.strftime(r'%Y-%m-%d %H:%M:%S').values[-1]}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b1c97574", + "metadata": {}, + "outputs": [], + "source": [ + "# Enter timestamp to plot (format: YYYY-MM-DD hh:mm:ss)\n", + "display_time = '2020-03-02 15:00:00'\n", + "\n", + "# list available time stamps\n", + "display_dt = datetime.strptime(display_time, r'%Y-%m-%d %H:%M:%S')\n", + "available_times = np.array([datetime.combine(d,t) for d, t in zip(ds.time.dt.date.values,ds.time.dt.time.values)])\n", + "# get closest time \n", + "time_index = np.argmin(np.abs(available_times - display_dt))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dc2401b0", + "metadata": {}, + "outputs": [], + "source": [ + "x_relative_var = ds.variables['x_relative'][time_index]\n", + "y_relative_var = ds.variables['y_relative'][time_index]\n", + "z_relative_var = ds.variables['z_relative'][time_index]\n", + "\n", + "# # Filter out values that exceed 50 km\n", + "ind_nonzero = tuple(np.nonzero((np.abs(x_relative_var) < 50000)))\n", + "print(len(x_relative_var[0]),' cloud points are extracted')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c0587f1", + "metadata": {}, + "outputs": [], + "source": [ + "x_slice = np.array(x_relative_var[ind_nonzero])/1000 # convert to km \n", + "y_slice = np.array(y_relative_var[ind_nonzero])/1000\n", + "z_slice = np.array(z_relative_var[ind_nonzero])/1000\n", + "\n", + "###2D plot of x,y variables in subplot(2,2,si)\n", + "def plot2D(x,y,fig,si,xlabel,ylabel):\n", + " ax = fig.add_subplot(2,2,si)\n", + " ax.scatter(x, y, s=4, marker='o', c= 'gray')\n", + " ax.set_xlabel(xlabel)\n", + " ax.set_ylabel(ylabel)\n", + " ax.xaxis.labelpad = 5\n", + " ax.yaxis.labelpad = 5\n", + "\n", + "###3D plot of x,y,z variables in subplot(2,2,si)\n", + "def plot3D(x,y,z,fig,si):\n", + " #check if data point count is sufficient for display\n", + " if (len(x)>10):\n", + " ax = fig.add_subplot(1,1,si, projection='3d')\n", + " x = [x[0:len(x)]]\n", + " y = [y[0:len(y)]]\n", + " z = [z[0:len(z)]]\n", + " x1 = int(min(min(x)))\n", + " x2 = int(max(max(x)))\n", + " y1 = int(min(min(y)))\n", + " y2 = int(max(max(y)))\n", + " z2 = int(max(max(z)))\n", + " \n", + " ax.scatter(x, y, z, c='gray', marker='o')\n", + " # ax.xaxis.set_ticks(np.arange(x1,x2,int((x2-x1+2)/4)+0.5))\n", + " # ax.yaxis.set_ticks(np.arange(y1,y2,int((y2-y1+2)/4)+0.5))\n", + " ax.view_init(elev=15, azim=-70)\n", + " ax.set_xlabel('X [km] ')\n", + " ax.set_ylabel('Y [km] ')\n", + " ax.xaxis.labelpad = 15\n", + " ax.yaxis.labelpad = 15\n", + " ax.zaxis.set_ticks(np.arange(0,int(z2+1),.5))\n", + " ax.set_zlabel('Z [km] ')\n", + " \n", + "fig = plt.figure(figsize=(9.5,10))\n", + "plot2D(x_slice,z_slice,fig,1,'direction eastward [km]','altitude above the ground [km]')\n", + "plot2D(y_slice,z_slice,fig,2,'direction northward [km]','altitude above the ground [km]')\n", + "plot2D(x_slice,y_slice,fig,3,'direction eastward [km]','direction northward [km]')\n", + "fig = plt.figure(figsize=(9.5,10))\n", + "plot3D(x_slice,y_slice,z_slice,fig,1)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/PSAP/.ipynb_checkpoints/aospsap3w.c1-checkpoint.ipynb b/VAPs/quicklook/PSAP/.ipynb_checkpoints/aospsap3w.c1-checkpoint.ipynb new file mode 100644 index 00000000..73feaecb --- /dev/null +++ b/VAPs/quicklook/PSAP/.ipynb_checkpoints/aospsap3w.c1-checkpoint.ipynb @@ -0,0 +1,1841 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AOSPSAP3W.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/psap) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'aospsap3w'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2013-06-24', 'facility': 'M1', 'site': 'pvc', 'start_date': '2012-07-16'}]" + ] + }, + { + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ac6764f5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The following locations and date ranges are available for this VAP:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sitefacilitystart_dateend_date
0pvcM12012-07-162013-06-24
\n", + "
" + ], + "text/plain": [ + " site facility start_date end_date\n", + "0 pvc M1 2012-07-16 2013-06-24" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'pvc', 'M1' )\n", + "\n", + "date_start = '2013-06-22'\n", + "date_end = '2013-06-24'" + ] + }, + { + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/data/archive/pvc/pvcaospsap3wM1.c1'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['20130622', '20130623', '20130624']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "51feea2e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['/data/archive/pvc/pvcaospsap3wM1.c1/pvcaospsap3wM1.c1.20130622.000000.cdf',\n", + " '/data/archive/pvc/pvcaospsap3wM1.c1/pvcaospsap3wM1.c1.20130623.000000.cdf',\n", + " '/data/archive/pvc/pvcaospsap3wM1.c1/pvcaospsap3wM1.c1.20130624.000000.cdf']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3 files loaded\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:              (time: 3923)\n",
+       "Coordinates:\n",
+       "  * time                 (time) datetime64[ns] 2013-06-22 ... 2013-06-24T18:0...\n",
+       "Data variables: (12/21)\n",
+       "    base_time            (time) datetime64[ns] 2013-06-22 ... 2013-06-24\n",
+       "    time_offset          (time) datetime64[ns] 2013-06-22 ... 2013-06-24T18:0...\n",
+       "    Ba_B_PSAP3W          (time) float32 dask.array<chunksize=(1440,), meta=np.ndarray>\n",
+       "    qc_Ba_B_PSAP3W       (time) int32 dask.array<chunksize=(1440,), meta=np.ndarray>\n",
+       "    Ba_G_PSAP3W          (time) float32 dask.array<chunksize=(1440,), meta=np.ndarray>\n",
+       "    qc_Ba_G_PSAP3W       (time) int32 dask.array<chunksize=(1440,), meta=np.ndarray>\n",
+       "    ...                   ...\n",
+       "    qc_sample_length     (time) int32 dask.array<chunksize=(1440,), meta=np.ndarray>\n",
+       "    impactor_setting     (time) float32 dask.array<chunksize=(1440,), meta=np.ndarray>\n",
+       "    qc_impactor_setting  (time) int32 dask.array<chunksize=(1440,), meta=np.ndarray>\n",
+       "    lat                  (time) float32 42.03 42.03 42.03 ... 42.03 42.03 42.03\n",
+       "    lon                  (time) float32 -70.05 -70.05 -70.05 ... -70.05 -70.05\n",
+       "    alt                  (time) float32 43.0 43.0 43.0 43.0 ... 43.0 43.0 43.0\n",
+       "Attributes: (12/21)\n",
+       "    command_line:             aosmqc_ingest -s pvc -f M1 -n aosmqc -R -D\n",
+       "    process_version:          ingest-aosmqc-1.2-0.el6\n",
+       "    dod_version:              aospsap3w-c1-1.3\n",
+       "    site_id:                  pvc\n",
+       "    facility_id:              M1: Cape Cod, Massachusetts\n",
+       "    data_level:               c1\n",
+       "    ...                       ...\n",
+       "    datastream:               pvcaospsap3wM1.c1\n",
+       "    history:                  created by user dsmgr on machine tin at 2014-06...\n",
+       "    _file_dates:              ['20130622', '20130623', '20130624']\n",
+       "    _file_times:              ['000000', '000000', '000000']\n",
+       "    _datastream:              pvcaospsap3wM1.c1\n",
+       "    _arm_standards_flag:      1
" + ], + "text/plain": [ + "\n", + "Dimensions: (time: 3923)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 2013-06-22 ... 2013-06-24T18:0...\n", + "Data variables: (12/21)\n", + " base_time (time) datetime64[ns] 2013-06-22 ... 2013-06-24\n", + " time_offset (time) datetime64[ns] 2013-06-22 ... 2013-06-24T18:0...\n", + " Ba_B_PSAP3W (time) float32 dask.array\n", + " qc_Ba_B_PSAP3W (time) int32 dask.array\n", + " Ba_G_PSAP3W (time) float32 dask.array\n", + " qc_Ba_G_PSAP3W (time) int32 dask.array\n", + " ... ...\n", + " qc_sample_length (time) int32 dask.array\n", + " impactor_setting (time) float32 dask.array\n", + " qc_impactor_setting (time) int32 dask.array\n", + " lat (time) float32 42.03 42.03 42.03 ... 42.03 42.03 42.03\n", + " lon (time) float32 -70.05 -70.05 -70.05 ... -70.05 -70.05\n", + " alt (time) float32 43.0 43.0 43.0 43.0 ... 43.0 43.0 43.0\n", + "Attributes: (12/21)\n", + " command_line: aosmqc_ingest -s pvc -f M1 -n aosmqc -R -D\n", + " process_version: ingest-aosmqc-1.2-0.el6\n", + " dod_version: aospsap3w-c1-1.3\n", + " site_id: pvc\n", + " facility_id: M1: Cape Cod, Massachusetts\n", + " data_level: c1\n", + " ... ...\n", + " datastream: pvcaospsap3wM1.c1\n", + " history: created by user dsmgr on machine tin at 2014-06...\n", + " _file_dates: ['20130622', '20130623', '20130624']\n", + " _file_times: ['000000', '000000', '000000']\n", + " _datastream: pvcaospsap3wM1.c1\n", + " _arm_standards_flag: 1" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['transmittance_blue', 'dqrvar_transmittance_blue', 'transmittance_green']" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "'transmittance_blue'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[9], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m ts_display\u001b[38;5;241m.\u001b[39madd_subplots((\u001b[38;5;28mlen\u001b[39m(variables_to_plot),), figsize \u001b[38;5;241m=\u001b[39m (\u001b[38;5;241m9.5\u001b[39m,\u001b[38;5;241m4\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mlen\u001b[39m(variables_to_plot)))\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i,v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(variables_to_plot):\n\u001b[0;32m----> 5\u001b[0m ts_ax \u001b[38;5;241m=\u001b[39m ts_display\u001b[38;5;241m.\u001b[39mplot(v, subplot_index\u001b[38;5;241m=\u001b[39m(i,), set_title\u001b[38;5;241m=\u001b[39m\u001b[43mds\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvariables\u001b[49m\u001b[43m[\u001b[49m\u001b[43mv\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241m.\u001b[39mattrs[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlong_name\u001b[39m\u001b[38;5;124m'\u001b[39m],)\n\u001b[1;32m 6\u001b[0m ts_ax\u001b[38;5;241m.\u001b[39mgrid()\n\u001b[1;32m 8\u001b[0m plt\u001b[38;5;241m.\u001b[39mshow()\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/utils.py:453\u001b[0m, in \u001b[0;36mFrozen.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 452\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__getitem__\u001b[39m(\u001b[38;5;28mself\u001b[39m, key: K) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m V:\n\u001b[0;32m--> 453\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmapping\u001b[49m\u001b[43m[\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m]\u001b[49m\n", + "\u001b[0;31mKeyError\u001b[0m: 'transmittance_blue'" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "f164bc0d8b484beaa248597fc1245960", + "version_major": 2, + "version_minor": 0 + }, + "image/png": "", + "text/html": [ + "\n", + "
\n", + "
\n", + " Figure\n", + "
\n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'transmittance_blue'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'transmittance_blue'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/PSAP/PSAP_tutorial.ipynb b/VAPs/quicklook/PSAP/PSAP_tutorial.ipynb new file mode 100644 index 00000000..3eb3aa7b --- /dev/null +++ b/VAPs/quicklook/PSAP/PSAP_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AOSPSAP3W.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/psap) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using aospsap3w as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `aospsap3w.c1`, where `aospsap3w` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `pvc` and facility `M1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/pvc/pvcaospsap3wM1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"aospsap3w\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"pvc\"\n", + "facility = \"M1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/PSAP/aospsap3w.c1.ipynb b/VAPs/quicklook/PSAP/aospsap3w.c1.ipynb new file mode 100644 index 00000000..32b774ed --- /dev/null +++ b/VAPs/quicklook/PSAP/aospsap3w.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AOSPSAP3W.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/psap) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'aospsap3w'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2013-06-24', 'facility': 'M1', 'site': 'pvc', 'start_date': '2012-07-16'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'pvc', 'M1' )\n", + "\n", + "date_start = '2013-06-22'\n", + "date_end = '2013-06-24'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['transmittance_blue', 'dqrvar_transmittance_blue', 'transmittance_green']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'transmittance_blue'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'transmittance_blue'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/QCRAD/QCRAD_tutorial.ipynb b/VAPs/quicklook/QCRAD/QCRAD_tutorial.ipynb new file mode 100644 index 00000000..cc100e4a --- /dev/null +++ b/VAPs/quicklook/QCRAD/QCRAD_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# QCRAD1LONG.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/qcrad) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using qcrad1long as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `qcrad1long.c1`, where `qcrad1long` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `awr` and facility `S1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/awr/awrqcrad1longS1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"qcrad1long\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"awr\"\n", + "facility = \"S1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/QCRAD/qcrad1long.c1.ipynb b/VAPs/quicklook/QCRAD/qcrad1long.c1.ipynb new file mode 100644 index 00000000..f8779930 --- /dev/null +++ b/VAPs/quicklook/QCRAD/qcrad1long.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# QCRAD1LONG.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/qcrad) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'qcrad1long'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2016-01-19', 'facility': 'S1', 'site': 'awr', 'start_date': '2015-12-05'}, {'end_date': '2020-08-06', 'facility': 'M1', 'site': 'oli', 'start_date': '2019-08-26'}, {'end_date': '2023-12-18', 'facility': 'C1', 'site': 'ena', 'start_date': '2022-10-29'}, {'end_date': '2008-01-01', 'facility': 'M1', 'site': 'fkb', 'start_date': '2007-03-01'}, {'end_date': '2015-12-01', 'facility': 'M1', 'site': 'mao', 'start_date': '2014-01-01'}, {'end_date': '2008-12-01', 'facility': 'M1', 'site': 'hfe', 'start_date': '2008-06-01'}, {'end_date': '2012-02-01', 'facility': 'M1', 'site': 'gan', 'start_date': '2011-10-01'}, {'end_date': '2011-01-01', 'facility': 'M1', 'site': 'grw', 'start_date': '2009-04-01'}, {'end_date': '2013-07-08', 'facility': 'M1', 'site': 'pvc', 'start_date': '2012-06-26'}, {'end_date': '2005-09-01', 'facility': 'M1', 'site': 'pye', 'start_date': '2005-02-01'}, {'end_date': '2011-05-01', 'facility': 'M1', 'site': 'sbs', 'start_date': '2010-09-23'}, {'end_date': '2012-03-27', 'facility': 'M1', 'site': 'pgh', 'start_date': '2011-06-16'}, {'end_date': '2023-12-17', 'facility': 'C1', 'site': 'nsa', 'start_date': '1998-02-15'}, {'end_date': '2011-01-16', 'facility': 'C2', 'site': 'nsa', 'start_date': '1999-08-21'}, {'end_date': '2007-01-01', 'facility': 'M1', 'site': 'nim', 'start_date': '2005-12-01'}, {'end_date': '2020-05-26', 'facility': 'C1', 'site': 'sgp', 'start_date': '2016-10-03'}, {'end_date': '2023-12-18', 'facility': 'E39', 'site': 'sgp', 'start_date': '2015-08-23'}, {'end_date': '2023-09-28', 'facility': 'E40', 'site': 'sgp', 'start_date': '2015-10-07'}, {'end_date': '2023-09-28', 'facility': 'E11', 'site': 'sgp', 'start_date': '2022-08-05'}, {'end_date': '2023-12-17', 'facility': 'E12', 'site': 'sgp', 'start_date': '2019-08-26'}, {'end_date': '2023-12-18', 'facility': 'E13', 'site': 'sgp', 'start_date': '2022-08-10'}, {'end_date': '2023-09-28', 'facility': 'E15', 'site': 'sgp', 'start_date': '2019-08-20'}, {'end_date': '2019-05-01', 'facility': 'E21', 'site': 'sgp', 'start_date': '2018-05-23'}, {'end_date': '2020-06-08', 'facility': 'E31', 'site': 'sgp', 'start_date': '2016-12-02'}, {'end_date': '2023-12-18', 'facility': 'E32', 'site': 'sgp', 'start_date': '2019-06-13'}, {'end_date': '2023-12-18', 'facility': 'E33', 'site': 'sgp', 'start_date': '2017-07-14'}, {'end_date': '2023-09-28', 'facility': 'E34', 'site': 'sgp', 'start_date': '2017-07-15'}, {'end_date': '2023-09-28', 'facility': 'E35', 'site': 'sgp', 'start_date': '2017-07-20'}, {'end_date': '2023-09-28', 'facility': 'E36', 'site': 'sgp', 'start_date': '2019-06-25'}, {'end_date': '2023-12-18', 'facility': 'E37', 'site': 'sgp', 'start_date': '2019-06-21'}, {'end_date': '2019-07-26', 'facility': 'E38', 'site': 'sgp', 'start_date': '2017-07-19'}, {'end_date': '2023-08-02', 'facility': 'E41', 'site': 'sgp', 'start_date': '2016-04-13'}, {'end_date': '2023-09-28', 'facility': 'E9', 'site': 'sgp', 'start_date': '2018-05-31'}, {'end_date': '2023-12-18', 'facility': 'S01', 'site': 'sgp', 'start_date': '2020-05-26'}, {'end_date': '2014-07-06', 'facility': 'C1', 'site': 'twp', 'start_date': '1996-10-01'}, {'end_date': '2011-10-01', 'facility': 'C2', 'site': 'twp', 'start_date': '1998-10-01'}, {'end_date': '2015-01-05', 'facility': 'C3', 'site': 'twp', 'start_date': '2002-03-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2020-05-24'\n", + "date_end = '2020-05-26'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['BestEstimate_down_short_hemisp', 'down_short_hemisp', 'down_short_diffuse_hemisp']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'BestEstimate_down_short_hemisp'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'BestEstimate_down_short_hemisp'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/QCRAD/qcrad1long.c2.ipynb b/VAPs/quicklook/QCRAD/qcrad1long.c2.ipynb new file mode 100644 index 00000000..b877f2d1 --- /dev/null +++ b/VAPs/quicklook/QCRAD/qcrad1long.c2.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# QCRAD1LONG.C2 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/qcrad) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'qcrad1long'\n", + "DATA_LEVEL = 'c2'\n", + "LOCATIONS = [{'end_date': '2017-01-02', 'facility': 'M1', 'site': 'awr', 'start_date': '2015-11-24'}, {'end_date': '2017-11-01', 'facility': 'M1', 'site': 'asi', 'start_date': '2016-05-05'}, {'end_date': '2020-06-02', 'facility': 'M1', 'site': 'anx', 'start_date': '2019-12-01'}, {'end_date': '2023-06-15', 'facility': 'M1', 'site': 'guc', 'start_date': '2021-09-01'}, {'end_date': '2022-10-28', 'facility': 'C1', 'site': 'ena', 'start_date': '2013-10-03'}, {'end_date': '2022-10-01', 'facility': 'M1', 'site': 'hou', 'start_date': '2021-08-17'}, {'end_date': '2008-01-01', 'facility': 'M1', 'site': 'fkb', 'start_date': '2007-03-15'}, {'end_date': '2015-12-01', 'facility': 'M1', 'site': 'mao', 'start_date': '2013-12-20'}, {'end_date': '2018-03-13', 'facility': 'S1', 'site': 'mcq', 'start_date': '2016-04-04'}, {'end_date': '2008-12-28', 'facility': 'M1', 'site': 'hfe', 'start_date': '2008-05-16'}, {'end_date': '2019-05-01', 'facility': 'M1', 'site': 'cor', 'start_date': '2018-09-23'}, {'end_date': '2012-02-08', 'facility': 'M1', 'site': 'gan', 'start_date': '2011-09-23'}, {'end_date': '2011-01-06', 'facility': 'M1', 'site': 'grw', 'start_date': '2009-04-16'}, {'end_date': '2013-07-09', 'facility': 'M1', 'site': 'pvc', 'start_date': '2012-06-25'}, {'end_date': '2005-09-15', 'facility': 'M1', 'site': 'pye', 'start_date': '2005-02-14'}, {'end_date': '2011-05-02', 'facility': 'M1', 'site': 'sbs', 'start_date': '2010-09-22'}, {'end_date': '2021-06-15', 'facility': 'M1', 'site': 'oli', 'start_date': '2013-10-01'}, {'end_date': '2012-03-27', 'facility': 'M1', 'site': 'pgh', 'start_date': '2011-06-04'}, {'end_date': '2022-08-20', 'facility': 'C1', 'site': 'nsa', 'start_date': '2003-09-20'}, {'end_date': '2011-01-17', 'facility': 'C2', 'site': 'nsa', 'start_date': '2004-11-17'}, {'end_date': '2007-01-07', 'facility': 'M1', 'site': 'nim', 'start_date': '2005-11-26'}, {'end_date': '2020-05-25', 'facility': 'C1', 'site': 'sgp', 'start_date': '1997-03-21'}, {'end_date': '2022-08-05', 'facility': 'E39', 'site': 'sgp', 'start_date': '2015-08-23'}, {'end_date': '2009-10-28', 'facility': 'E3', 'site': 'sgp', 'start_date': '1996-03-06'}, {'end_date': '2022-08-09', 'facility': 'E40', 'site': 'sgp', 'start_date': '2015-10-07'}, {'end_date': '2011-10-19', 'facility': 'E10', 'site': 'sgp', 'start_date': '1995-07-21'}, {'end_date': '2022-08-04', 'facility': 'E11', 'site': 'sgp', 'start_date': '1995-06-30'}, {'end_date': '2022-08-02', 'facility': 'E12', 'site': 'sgp', 'start_date': '1996-01-19'}, {'end_date': '2022-08-09', 'facility': 'E13', 'site': 'sgp', 'start_date': '1994-01-07'}, {'end_date': '2022-08-08', 'facility': 'E15', 'site': 'sgp', 'start_date': '1994-01-12'}, {'end_date': '2011-11-15', 'facility': 'E16', 'site': 'sgp', 'start_date': '1995-06-02'}, {'end_date': '2009-11-17', 'facility': 'E18', 'site': 'sgp', 'start_date': '1996-06-20'}, {'end_date': '2011-05-23', 'facility': 'E19', 'site': 'sgp', 'start_date': '1998-07-08'}, {'end_date': '2009-10-14', 'facility': 'E1', 'site': 'sgp', 'start_date': '1995-11-15'}, {'end_date': '2011-11-17', 'facility': 'E20', 'site': 'sgp', 'start_date': '1994-11-03'}, {'end_date': '2019-05-01', 'facility': 'E21', 'site': 'sgp', 'start_date': '1999-09-11'}, {'end_date': '2009-12-01', 'facility': 'E22', 'site': 'sgp', 'start_date': '1995-03-16'}, {'end_date': '2009-11-14', 'facility': 'E24', 'site': 'sgp', 'start_date': '1995-11-07'}, {'end_date': '2002-04-03', 'facility': 'E25', 'site': 'sgp', 'start_date': '1997-11-12'}, {'end_date': '2009-12-04', 'facility': 'E27', 'site': 'sgp', 'start_date': '2003-05-15'}, {'end_date': '2009-10-20', 'facility': 'E2', 'site': 'sgp', 'start_date': '1996-03-07'}, {'end_date': '2021-09-21', 'facility': 'E31', 'site': 'sgp', 'start_date': '2011-10-13'}, {'end_date': '2022-08-04', 'facility': 'E32', 'site': 'sgp', 'start_date': '2012-02-04'}, {'end_date': '2022-08-01', 'facility': 'E33', 'site': 'sgp', 'start_date': '2011-08-17'}, {'end_date': '2022-08-08', 'facility': 'E34', 'site': 'sgp', 'start_date': '2011-09-02'}, {'end_date': '2022-08-05', 'facility': 'E35', 'site': 'sgp', 'start_date': '2011-10-05'}, {'end_date': '2022-08-01', 'facility': 'E36', 'site': 'sgp', 'start_date': '2011-09-28'}, {'end_date': '2022-08-02', 'facility': 'E37', 'site': 'sgp', 'start_date': '2011-09-28'}, {'end_date': '2021-06-07', 'facility': 'E38', 'site': 'sgp', 'start_date': '2011-08-23'}, {'end_date': '2022-08-02', 'facility': 'E41', 'site': 'sgp', 'start_date': '2016-04-13'}, {'end_date': '2011-09-26', 'facility': 'E4', 'site': 'sgp', 'start_date': '1995-05-08'}, {'end_date': '2009-11-02', 'facility': 'E5', 'site': 'sgp', 'start_date': '1996-06-14'}, {'end_date': '2011-10-18', 'facility': 'E6', 'site': 'sgp', 'start_date': '1996-03-05'}, {'end_date': '2011-11-14', 'facility': 'E7', 'site': 'sgp', 'start_date': '1995-05-18'}, {'end_date': '2009-11-10', 'facility': 'E8', 'site': 'sgp', 'start_date': '1995-09-22'}, {'end_date': '2022-08-03', 'facility': 'E9', 'site': 'sgp', 'start_date': '1994-01-12'}, {'end_date': '2014-07-06', 'facility': 'C1', 'site': 'twp', 'start_date': '1996-10-10'}, {'end_date': '2013-09-08', 'facility': 'C2', 'site': 'twp', 'start_date': '1998-10-29'}, {'end_date': '2015-01-06', 'facility': 'C3', 'site': 'twp', 'start_date': '2002-03-12'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2020-05-23'\n", + "date_end = '2020-05-25'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['BestEstimate_down_short_hemisp', 'down_short_hemisp', 'down_short_diffuse_hemisp']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'BestEstimate_down_short_hemisp'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'BestEstimate_down_short_hemisp'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/QCRAD/qcradbeflux1long.c1.ipynb b/VAPs/quicklook/QCRAD/qcradbeflux1long.c1.ipynb new file mode 100644 index 00000000..6645ebb3 --- /dev/null +++ b/VAPs/quicklook/QCRAD/qcradbeflux1long.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# QCRADBEFLUX1LONG.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/qcrad) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'qcradbeflux1long'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2023-12-17', 'facility': 'C1', 'site': 'sgp', 'start_date': '2011-07-18'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2023-12-15'\n", + "date_end = '2023-12-17'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['BestEstimate_down_short_hemisp', 'down_short_hemisp', 'down_short_diffuse_hemisp']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'BestEstimate_down_short_hemisp'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'BestEstimate_down_short_hemisp'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/QCRAD/qcradbeflux1long.c2.ipynb b/VAPs/quicklook/QCRAD/qcradbeflux1long.c2.ipynb new file mode 100644 index 00000000..0cb12c6d --- /dev/null +++ b/VAPs/quicklook/QCRAD/qcradbeflux1long.c2.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# QCRADBEFLUX1LONG.C2 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/qcrad) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'qcradbeflux1long'\n", + "DATA_LEVEL = 'c2'\n", + "LOCATIONS = [{'end_date': '2020-05-25', 'facility': 'C1', 'site': 'sgp', 'start_date': '1995-05-19'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2020-05-23'\n", + "date_end = '2020-05-25'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['BestEstimate_down_short_hemisp', 'down_short_hemisp', 'down_short_diffuse_hemisp']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'BestEstimate_down_short_hemisp'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'BestEstimate_down_short_hemisp'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/QCRAD/qcradbrs1long.c1.ipynb b/VAPs/quicklook/QCRAD/qcradbrs1long.c1.ipynb new file mode 100644 index 00000000..2cd5ba7a --- /dev/null +++ b/VAPs/quicklook/QCRAD/qcradbrs1long.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# QCRADBRS1LONG.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/qcrad) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'qcradbrs1long'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2023-12-18', 'facility': 'C1', 'site': 'sgp', 'start_date': '1993-09-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2023-12-16'\n", + "date_end = '2023-12-18'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['BestEstimate_down_short_hemisp', 'down_short_hemisp', 'down_short_diffuse_hemisp']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'BestEstimate_down_short_hemisp'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'BestEstimate_down_short_hemisp'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/QCRAD/qcradbrs1long.c2.ipynb b/VAPs/quicklook/QCRAD/qcradbrs1long.c2.ipynb new file mode 100644 index 00000000..9ea94c7b --- /dev/null +++ b/VAPs/quicklook/QCRAD/qcradbrs1long.c2.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# QCRADBRS1LONG.C2 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/qcrad) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'qcradbrs1long'\n", + "DATA_LEVEL = 'c2'\n", + "LOCATIONS = [{'end_date': '2022-08-09', 'facility': 'C1', 'site': 'sgp', 'start_date': '1993-09-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2022-08-07'\n", + "date_end = '2022-08-09'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['BestEstimate_down_short_hemisp', 'down_short_hemisp', 'down_short_diffuse_hemisp']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'BestEstimate_down_short_hemisp'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'BestEstimate_down_short_hemisp'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/RADFLUXANAL/.ipynb_checkpoints/radflux1long.c1-checkpoint.ipynb b/VAPs/quicklook/RADFLUXANAL/.ipynb_checkpoints/radflux1long.c1-checkpoint.ipynb new file mode 100644 index 00000000..771298d9 --- /dev/null +++ b/VAPs/quicklook/RADFLUXANAL/.ipynb_checkpoints/radflux1long.c1-checkpoint.ipynb @@ -0,0 +1,3763 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# RADFLUX1LONG.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/radfluxanal) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'radflux1long'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2023-09-14', 'facility': 'C1', 'site': 'nsa', 'start_date': '2021-07-04'}, {'end_date': '2021-06-12', 'facility': 'M1', 'site': 'oli', 'start_date': '2020-10-16'}, {'end_date': '2023-09-28', 'facility': 'E11', 'site': 'sgp', 'start_date': '2020-06-08'}, {'end_date': '2023-10-30', 'facility': 'E12', 'site': 'sgp', 'start_date': '2020-06-07'}, {'end_date': '2023-10-26', 'facility': 'E13', 'site': 'sgp', 'start_date': '2020-06-03'}, {'end_date': '2023-09-28', 'facility': 'E15', 'site': 'sgp', 'start_date': '2021-07-12'}, {'end_date': '2023-09-28', 'facility': 'E9', 'site': 'sgp', 'start_date': '2021-07-04'}, {'end_date': '2021-09-20', 'facility': 'E31', 'site': 'sgp', 'start_date': '2020-06-08'}, {'end_date': '2023-10-31', 'facility': 'E32', 'site': 'sgp', 'start_date': '2021-07-03'}, {'end_date': '2023-10-31', 'facility': 'E33', 'site': 'sgp', 'start_date': '2020-06-11'}, {'end_date': '2023-09-28', 'facility': 'E34', 'site': 'sgp', 'start_date': '2020-06-18'}, {'end_date': '2023-09-28', 'facility': 'E35', 'site': 'sgp', 'start_date': '2019-06-24'}, {'end_date': '2023-09-28', 'facility': 'E36', 'site': 'sgp', 'start_date': '2020-06-26'}, {'end_date': '2023-10-31', 'facility': 'E37', 'site': 'sgp', 'start_date': '2020-07-01'}, {'end_date': '2021-05-29', 'facility': 'E38', 'site': 'sgp', 'start_date': '2020-06-25'}, {'end_date': '2023-10-30', 'facility': 'E39', 'site': 'sgp', 'start_date': '2020-09-07'}, {'end_date': '2023-09-28', 'facility': 'E40', 'site': 'sgp', 'start_date': '2020-09-07'}, {'end_date': '2023-08-02', 'facility': 'E41', 'site': 'sgp', 'start_date': '2021-07-31'}]" + ] + }, + { + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ac6764f5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The following locations and date ranges are available for this VAP:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sitefacilitystart_dateend_date
0nsaC12021-07-042023-09-14
1oliM12020-10-162021-06-12
2sgpE112020-06-082023-09-28
3sgpE122020-06-072023-10-30
4sgpE132020-06-032023-10-26
5sgpE152021-07-122023-09-28
6sgpE92021-07-042023-09-28
7sgpE312020-06-082021-09-20
8sgpE322021-07-032023-10-31
9sgpE332020-06-112023-10-31
10sgpE342020-06-182023-09-28
11sgpE352019-06-242023-09-28
12sgpE362020-06-262023-09-28
13sgpE372020-07-012023-10-31
14sgpE382020-06-252021-05-29
15sgpE392020-09-072023-10-30
16sgpE402020-09-072023-09-28
17sgpE412021-07-312023-08-02
\n", + "
" + ], + "text/plain": [ + " site facility start_date end_date\n", + "0 nsa C1 2021-07-04 2023-09-14\n", + "1 oli M1 2020-10-16 2021-06-12\n", + "2 sgp E11 2020-06-08 2023-09-28\n", + "3 sgp E12 2020-06-07 2023-10-30\n", + "4 sgp E13 2020-06-03 2023-10-26\n", + "5 sgp E15 2021-07-12 2023-09-28\n", + "6 sgp E9 2021-07-04 2023-09-28\n", + "7 sgp E31 2020-06-08 2021-09-20\n", + "8 sgp E32 2021-07-03 2023-10-31\n", + "9 sgp E33 2020-06-11 2023-10-31\n", + "10 sgp E34 2020-06-18 2023-09-28\n", + "11 sgp E35 2019-06-24 2023-09-28\n", + "12 sgp E36 2020-06-26 2023-09-28\n", + "13 sgp E37 2020-07-01 2023-10-31\n", + "14 sgp E38 2020-06-25 2021-05-29\n", + "15 sgp E39 2020-09-07 2023-10-30\n", + "16 sgp E40 2020-09-07 2023-09-28\n", + "17 sgp E41 2021-07-31 2023-08-02" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'E11' )\n", + "\n", + "date_start = '2023-09-25'\n", + "date_end = '2023-09-27'" + ] + }, + { + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/data/archive/sgp/sgpradflux1longE11.c1'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['20230925', '20230926', '20230927']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "51feea2e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['/data/archive/sgp/sgpradflux1longE11.c1/sgpradflux1longE11.c1.20230925.070000.nc',\n", + " '/data/archive/sgp/sgpradflux1longE11.c1/sgpradflux1longE11.c1.20230926.070000.nc',\n", + " '/data/archive/sgp/sgpradflux1longE11.c1/sgpradflux1longE11.c1.20230927.070000.nc']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3 files loaded\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:                                        (time: 4320, bound: 2)\n",
+       "Coordinates:\n",
+       "  * time                                           (time) datetime64[ns] 2023...\n",
+       "Dimensions without coordinates: bound\n",
+       "Data variables: (12/54)\n",
+       "    base_time                                      (time) datetime64[ns] 2023...\n",
+       "    time_offset                                    (time) datetime64[ns] 2023...\n",
+       "    time_bounds                                    (time, bound) object dask.array<chunksize=(1440, 2), meta=np.ndarray>\n",
+       "    downwelling_shortwave                          (time) float32 dask.array<chunksize=(1440,), meta=np.ndarray>\n",
+       "    source_downwelling_shortwave                   (time) int32 dask.array<chunksize=(1440,), meta=np.ndarray>\n",
+       "    qc_downwelling_shortwave                       (time) int32 dask.array<chunksize=(1440,), meta=np.ndarray>\n",
+       "    ...                                             ...\n",
+       "    qc_pressure                                    (time) int32 dask.array<chunksize=(1440,), meta=np.ndarray>\n",
+       "    precipitation                                  (time) float32 dask.array<chunksize=(1440,), meta=np.ndarray>\n",
+       "    qc_precipitation                               (time) int32 dask.array<chunksize=(1440,), meta=np.ndarray>\n",
+       "    lat                                            (time) float32 36.88 ... 3...\n",
+       "    lon                                            (time) float32 -98.29 ... ...\n",
+       "    alt                                            (time) float32 360.0 ... 3...\n",
+       "Attributes: (12/21)\n",
+       "    command_line:            radflux1long -s sgp -f E11 -b 20230901 -e 202310...\n",
+       "    Conventions:             ARM-1.3\n",
+       "    process_version:         radflux1long-3.16.0\n",
+       "    dod_version:             radflux1long-c1-1.6\n",
+       "    input_datastreams:       sgpqcrad1longE11.c1 : 6.6 : 20230629.000000-2023...\n",
+       "    site_id:                 sgp\n",
+       "    ...                      ...\n",
+       "    fitmode_comment:         01 = daily_fit 00 =  1_fit\n",
+       "    history:                 created by user dsmgr on machine prod-proc2.adc....\n",
+       "    _file_dates:             ['20230925', '20230926', '20230927']\n",
+       "    _file_times:             ['070000', '070000', '070000']\n",
+       "    _datastream:             sgpradflux1longE11.c1\n",
+       "    _arm_standards_flag:     1
" + ], + "text/plain": [ + "\n", + "Dimensions: (time: 4320, bound: 2)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 2023...\n", + "Dimensions without coordinates: bound\n", + "Data variables: (12/54)\n", + " base_time (time) datetime64[ns] 2023...\n", + " time_offset (time) datetime64[ns] 2023...\n", + " time_bounds (time, bound) object dask.array\n", + " downwelling_shortwave (time) float32 dask.array\n", + " source_downwelling_shortwave (time) int32 dask.array\n", + " qc_downwelling_shortwave (time) int32 dask.array\n", + " ... ...\n", + " qc_pressure (time) int32 dask.array\n", + " precipitation (time) float32 dask.array\n", + " qc_precipitation (time) int32 dask.array\n", + " lat (time) float32 36.88 ... 3...\n", + " lon (time) float32 -98.29 ... ...\n", + " alt (time) float32 360.0 ... 3...\n", + "Attributes: (12/21)\n", + " command_line: radflux1long -s sgp -f E11 -b 20230901 -e 202310...\n", + " Conventions: ARM-1.3\n", + " process_version: radflux1long-3.16.0\n", + " dod_version: radflux1long-c1-1.6\n", + " input_datastreams: sgpqcrad1longE11.c1 : 6.6 : 20230629.000000-2023...\n", + " site_id: sgp\n", + " ... ...\n", + " fitmode_comment: 01 = daily_fit 00 = 1_fit\n", + " history: created by user dsmgr on machine prod-proc2.adc....\n", + " _file_dates: ['20230925', '20230926', '20230927']\n", + " _file_times: ['070000', '070000', '070000']\n", + " _datastream: sgpradflux1longE11.c1\n", + " _arm_standards_flag: 1" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['downwelling_shortwave', 'clearsky_downwelling_shortwave', 'downwelling_longwave']" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'downwelling_shortwave'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'downwelling_shortwave'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/RADFLUXANAL/RADFLUXANAL_tutorial.ipynb b/VAPs/quicklook/RADFLUXANAL/RADFLUXANAL_tutorial.ipynb new file mode 100644 index 00000000..89cd6553 --- /dev/null +++ b/VAPs/quicklook/RADFLUXANAL/RADFLUXANAL_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# RADFLUX1LONG.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/radfluxanal) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using radflux1long as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `radflux1long.c1`, where `radflux1long` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `oli` and facility `M1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/oli/oliradflux1longM1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"radflux1long\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"oli\"\n", + "facility = \"M1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/RADFLUXANAL/radflux1long.c1.ipynb b/VAPs/quicklook/RADFLUXANAL/radflux1long.c1.ipynb new file mode 100644 index 00000000..c709b441 --- /dev/null +++ b/VAPs/quicklook/RADFLUXANAL/radflux1long.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# RADFLUX1LONG.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/radfluxanal) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'radflux1long'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2021-06-12', 'facility': 'M1', 'site': 'oli', 'start_date': '2020-10-16'}, {'end_date': '2023-09-14', 'facility': 'C1', 'site': 'nsa', 'start_date': '2021-07-04'}, {'end_date': '2023-09-28', 'facility': 'E36', 'site': 'sgp', 'start_date': '2020-06-26'}, {'end_date': '2023-10-31', 'facility': 'E37', 'site': 'sgp', 'start_date': '2020-07-01'}, {'end_date': '2021-05-29', 'facility': 'E38', 'site': 'sgp', 'start_date': '2020-06-25'}, {'end_date': '2023-09-28', 'facility': 'E11', 'site': 'sgp', 'start_date': '2020-06-08'}, {'end_date': '2023-10-30', 'facility': 'E12', 'site': 'sgp', 'start_date': '2020-06-07'}, {'end_date': '2023-10-26', 'facility': 'E13', 'site': 'sgp', 'start_date': '2020-06-03'}, {'end_date': '2023-09-28', 'facility': 'E15', 'site': 'sgp', 'start_date': '2021-07-12'}, {'end_date': '2021-09-20', 'facility': 'E31', 'site': 'sgp', 'start_date': '2020-06-08'}, {'end_date': '2023-10-31', 'facility': 'E32', 'site': 'sgp', 'start_date': '2021-07-03'}, {'end_date': '2023-10-31', 'facility': 'E33', 'site': 'sgp', 'start_date': '2020-06-11'}, {'end_date': '2023-09-28', 'facility': 'E34', 'site': 'sgp', 'start_date': '2020-06-18'}, {'end_date': '2023-09-28', 'facility': 'E35', 'site': 'sgp', 'start_date': '2019-06-24'}, {'end_date': '2023-10-30', 'facility': 'E39', 'site': 'sgp', 'start_date': '2020-09-07'}, {'end_date': '2023-09-28', 'facility': 'E40', 'site': 'sgp', 'start_date': '2020-09-07'}, {'end_date': '2023-08-02', 'facility': 'E41', 'site': 'sgp', 'start_date': '2021-07-31'}, {'end_date': '2023-09-28', 'facility': 'E9', 'site': 'sgp', 'start_date': '2021-07-04'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'E36' )\n", + "\n", + "date_start = '2023-09-25'\n", + "date_end = '2023-09-27'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['downwelling_shortwave', 'clearsky_downwelling_shortwave', 'downwelling_longwave']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'downwelling_shortwave'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'downwelling_shortwave'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/RADFLUXANAL/radflux1long.c2.ipynb b/VAPs/quicklook/RADFLUXANAL/radflux1long.c2.ipynb new file mode 100644 index 00000000..0ff95eed --- /dev/null +++ b/VAPs/quicklook/RADFLUXANAL/radflux1long.c2.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# RADFLUX1LONG.C2 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/radfluxanal) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'radflux1long'\n", + "DATA_LEVEL = 'c2'\n", + "LOCATIONS = [{'end_date': '2016-12-20', 'facility': 'M1', 'site': 'awr', 'start_date': '2015-11-24'}, {'end_date': '2017-11-01', 'facility': 'M1', 'site': 'asi', 'start_date': '2016-05-12'}, {'end_date': '2020-05-31', 'facility': 'M1', 'site': 'anx', 'start_date': '2019-12-01'}, {'end_date': '2021-08-20', 'facility': 'C1', 'site': 'ena', 'start_date': '2013-10-03'}, {'end_date': '2022-09-30', 'facility': 'M1', 'site': 'hou', 'start_date': '2021-08-22'}, {'end_date': '2007-12-23', 'facility': 'M1', 'site': 'fkb', 'start_date': '2007-03-25'}, {'end_date': '2015-12-01', 'facility': 'M1', 'site': 'mao', 'start_date': '2014-01-01'}, {'end_date': '2018-03-13', 'facility': 'S1', 'site': 'mcq', 'start_date': '2016-04-10'}, {'end_date': '2008-12-08', 'facility': 'M1', 'site': 'hfe', 'start_date': '2008-11-19'}, {'end_date': '2019-04-28', 'facility': 'M1', 'site': 'cor', 'start_date': '2018-09-23'}, {'end_date': '2012-01-11', 'facility': 'M1', 'site': 'gan', 'start_date': '2011-11-07'}, {'end_date': '2011-01-06', 'facility': 'M1', 'site': 'grw', 'start_date': '2009-04-23'}, {'end_date': '2005-09-10', 'facility': 'M1', 'site': 'pye', 'start_date': '2005-02-22'}, {'end_date': '2019-07-12', 'facility': 'M1', 'site': 'oli', 'start_date': '2014-02-20'}, {'end_date': '2021-07-04', 'facility': 'C1', 'site': 'nsa', 'start_date': '2003-09-22'}, {'end_date': '2010-10-14', 'facility': 'C2', 'site': 'nsa', 'start_date': '2005-02-18'}, {'end_date': '2006-12-26', 'facility': 'M1', 'site': 'nim', 'start_date': '2005-12-14'}, {'end_date': '2021-07-25', 'facility': 'E35', 'site': 'sgp', 'start_date': '2011-10-06'}, {'end_date': '2021-07-27', 'facility': 'E36', 'site': 'sgp', 'start_date': '2011-09-28'}, {'end_date': '2021-07-27', 'facility': 'E37', 'site': 'sgp', 'start_date': '2011-09-28'}, {'end_date': '2020-06-25', 'facility': 'E38', 'site': 'sgp', 'start_date': '2011-08-25'}, {'end_date': '2020-05-23', 'facility': 'C1', 'site': 'sgp', 'start_date': '1997-03-25'}, {'end_date': '2011-10-16', 'facility': 'E10', 'site': 'sgp', 'start_date': '1995-08-10'}, {'end_date': '2021-07-05', 'facility': 'E11', 'site': 'sgp', 'start_date': '1995-09-26'}, {'end_date': '2021-07-03', 'facility': 'E12', 'site': 'sgp', 'start_date': '1996-01-21'}, {'end_date': '2021-07-12', 'facility': 'E13', 'site': 'sgp', 'start_date': '1994-01-07'}, {'end_date': '2021-07-12', 'facility': 'E15', 'site': 'sgp', 'start_date': '1994-03-31'}, {'end_date': '2011-11-10', 'facility': 'E16', 'site': 'sgp', 'start_date': '1995-06-12'}, {'end_date': '2009-11-07', 'facility': 'E18', 'site': 'sgp', 'start_date': '1996-06-20'}, {'end_date': '2011-05-21', 'facility': 'E19', 'site': 'sgp', 'start_date': '1998-07-19'}, {'end_date': '2009-05-13', 'facility': 'E1', 'site': 'sgp', 'start_date': '1995-11-15'}, {'end_date': '2011-11-15', 'facility': 'E20', 'site': 'sgp', 'start_date': '1995-04-01'}, {'end_date': '2019-04-27', 'facility': 'E21', 'site': 'sgp', 'start_date': '1999-09-13'}, {'end_date': '2009-11-30', 'facility': 'E22', 'site': 'sgp', 'start_date': '1995-06-12'}, {'end_date': '2009-11-07', 'facility': 'E24', 'site': 'sgp', 'start_date': '1995-11-08'}, {'end_date': '2002-04-03', 'facility': 'E25', 'site': 'sgp', 'start_date': '1997-11-18'}, {'end_date': '2009-07-15', 'facility': 'E27', 'site': 'sgp', 'start_date': '2003-05-29'}, {'end_date': '2009-10-19', 'facility': 'E2', 'site': 'sgp', 'start_date': '1996-03-25'}, {'end_date': '2021-07-12', 'facility': 'E31', 'site': 'sgp', 'start_date': '2011-10-13'}, {'end_date': '2021-07-03', 'facility': 'E32', 'site': 'sgp', 'start_date': '2012-02-05'}, {'end_date': '2021-07-31', 'facility': 'E33', 'site': 'sgp', 'start_date': '2011-08-25'}, {'end_date': '2021-07-29', 'facility': 'E34', 'site': 'sgp', 'start_date': '2011-09-04'}, {'end_date': '2021-07-12', 'facility': 'E39', 'site': 'sgp', 'start_date': '2015-10-06'}, {'end_date': '2009-10-24', 'facility': 'E3', 'site': 'sgp', 'start_date': '1996-03-07'}, {'end_date': '2021-07-23', 'facility': 'E40', 'site': 'sgp', 'start_date': '2015-10-08'}, {'end_date': '2021-07-31', 'facility': 'E41', 'site': 'sgp', 'start_date': '2016-04-21'}, {'end_date': '2011-09-25', 'facility': 'E4', 'site': 'sgp', 'start_date': '1996-03-07'}, {'end_date': '2009-11-01', 'facility': 'E5', 'site': 'sgp', 'start_date': '1996-06-17'}, {'end_date': '2011-10-16', 'facility': 'E6', 'site': 'sgp', 'start_date': '1996-03-07'}, {'end_date': '2011-11-13', 'facility': 'E7', 'site': 'sgp', 'start_date': '1995-10-20'}, {'end_date': '2009-11-05', 'facility': 'E8', 'site': 'sgp', 'start_date': '1995-09-29'}, {'end_date': '2021-07-03', 'facility': 'E9', 'site': 'sgp', 'start_date': '1994-01-19'}, {'end_date': '2014-06-29', 'facility': 'C1', 'site': 'twp', 'start_date': '1996-10-14'}, {'end_date': '2013-09-08', 'facility': 'C2', 'site': 'twp', 'start_date': '1998-10-30'}, {'end_date': '2015-01-06', 'facility': 'C3', 'site': 'twp', 'start_date': '2002-03-12'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'E35' )\n", + "\n", + "date_start = '2021-07-22'\n", + "date_end = '2021-07-24'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['downwelling_shortwave', 'clearsky_downwelling_shortwave', 'downwelling_longwave']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'downwelling_shortwave'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'downwelling_shortwave'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/RADFLUXANAL/radfluxbrs1long.c2.ipynb b/VAPs/quicklook/RADFLUXANAL/radfluxbrs1long.c2.ipynb new file mode 100644 index 00000000..98d71f59 --- /dev/null +++ b/VAPs/quicklook/RADFLUXANAL/radfluxbrs1long.c2.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# RADFLUXBRS1LONG.C2 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/radfluxanal) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'radfluxbrs1long'\n", + "DATA_LEVEL = 'c2'\n", + "LOCATIONS = [{'end_date': '2021-07-12', 'facility': 'C1', 'site': 'sgp', 'start_date': '1997-03-21'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2021-07-09'\n", + "date_end = '2021-07-11'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['downwelling_shortwave', 'clearsky_downwelling_shortwave', 'downwelling_longwave']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'downwelling_shortwave'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'downwelling_shortwave'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/RIPBE/30ripbe1mcfarlane.c1.ipynb b/VAPs/quicklook/RIPBE/30ripbe1mcfarlane.c1.ipynb new file mode 100644 index 00000000..3b769c15 --- /dev/null +++ b/VAPs/quicklook/RIPBE/30ripbe1mcfarlane.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# 30RIPBE1MCFARLANE.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/ripbe) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = '30ripbe1mcfarlane'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2007-07-31', 'facility': 'C1', 'site': 'sgp', 'start_date': '2002-03-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2007-07-29'\n", + "date_end = '2007-07-31'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['solar_zenith', 'solar_distance_factor', 'clear_sky_frac']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'clear_sky_frac'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'solar_zenith'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/RIPBE/RIPBE_tutorial.ipynb b/VAPs/quicklook/RIPBE/RIPBE_tutorial.ipynb new file mode 100644 index 00000000..dffde185 --- /dev/null +++ b/VAPs/quicklook/RIPBE/RIPBE_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# 30RIPBE1MCFARLANE.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/ripbe) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using 30ripbe1mcfarlane as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `30ripbe1mcfarlane.c1`, where `30ripbe1mcfarlane` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `sgp` and facility `C1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/sgp/sgp30ripbe1mcfarlaneC1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"30ripbe1mcfarlane\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"sgp\"\n", + "facility = \"C1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/RIPBE/ripbe1mcfarlane.c1.ipynb b/VAPs/quicklook/RIPBE/ripbe1mcfarlane.c1.ipynb new file mode 100644 index 00000000..912409e3 --- /dev/null +++ b/VAPs/quicklook/RIPBE/ripbe1mcfarlane.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# RIPBE1MCFARLANE.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/ripbe) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'ripbe1mcfarlane'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2011-06-05', 'facility': 'C1', 'site': 'sgp', 'start_date': '2002-03-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2011-06-03'\n", + "date_end = '2011-06-05'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['pressure_level', 'pressure_layer', 'temperature_level']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'clear_sky_flag'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'pressure_level'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/RLPROF/10rlprofbe1news.c1.ipynb b/VAPs/quicklook/RLPROF/10rlprofbe1news.c1.ipynb new file mode 100644 index 00000000..30df2e97 --- /dev/null +++ b/VAPs/quicklook/RLPROF/10rlprofbe1news.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# 10RLPROFBE1NEWS.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/rlprof) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = '10rlprofbe1news'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2015-09-21', 'facility': 'C1', 'site': 'sgp', 'start_date': '2004-10-01'}, {'end_date': '2015-01-01', 'facility': 'C3', 'site': 'twp', 'start_date': '2010-12-15'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2015-09-21'\n", + "date_end = '2015-09-21'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['asr', 'bscat', 'ext']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'asr'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'asr'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/RLPROF/RLPROF_tutorial.ipynb b/VAPs/quicklook/RLPROF/RLPROF_tutorial.ipynb new file mode 100644 index 00000000..cc7dac76 --- /dev/null +++ b/VAPs/quicklook/RLPROF/RLPROF_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# 10RLPROFBE1NEWS.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/rlprof) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using 10rlprofbe1news as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `10rlprofbe1news.c1`, where `10rlprofbe1news` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `sgp` and facility `C1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/sgp/sgp10rlprofbe1newsC1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"10rlprofbe1news\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"sgp\"\n", + "facility = \"C1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/SACRADV3D3C/.ipynb_checkpoints/kasacradv3d3c.c1-checkpoint.ipynb b/VAPs/quicklook/SACRADV3D3C/.ipynb_checkpoints/kasacradv3d3c.c1-checkpoint.ipynb new file mode 100644 index 00000000..8d364ba7 --- /dev/null +++ b/VAPs/quicklook/SACRADV3D3C/.ipynb_checkpoints/kasacradv3d3c.c1-checkpoint.ipynb @@ -0,0 +1,2574 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# KASACRADV3D3C.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/sacradv3d3c) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'kasacradv3d3c'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2012-08-31', 'facility': 'C1', 'site': 'sgp', 'start_date': '2012-08-01'}]" + ] + }, + { + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ac6764f5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The following locations and date ranges are available for this VAP:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sitefacilitystart_dateend_date
0sgpC12012-08-012012-08-31
\n", + "
" + ], + "text/plain": [ + " site facility start_date end_date\n", + "0 sgp C1 2012-08-01 2012-08-31" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2012-08-29'\n", + "date_end = '2012-08-31'" + ] + }, + { + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/data/archive/sgp/sgpkasacradv3d3cC1.c1'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['20120829', '20120830', '20120831']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "51feea2e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.025008.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.153009.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.201628.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.002402.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.175243.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.195648.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.130446.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.081435.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.151031.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.173303.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.030947.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.054747.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.124506.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.104113.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.075456.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.004340.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.102134.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.052807.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.221741.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.051559.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.223721.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.202247.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.104350.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.131114.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.030911.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.010234.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.173849.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.224833.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.004254.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.075837.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.081817.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.053252.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.125133.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.102411.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.153540.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.055231.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.151600.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.222853.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.175829.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.200306.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.032851.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.060543.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.054603.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.230808.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.202355.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.103937.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.130632.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.105916.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.224825.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.011234.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.175952.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.031811.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.033753.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.005255.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.132611.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.181932.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.153200.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.204334.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.083147.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.155139.nc',\n", + " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.081207.nc']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "61 files loaded\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:                                       (time: 23687, bound: 2,\n",
+       "                                                   height: 201, bin: 28,\n",
+       "                                                   isoline: 4, h_distance: 401,\n",
+       "                                                   frequency: 1)\n",
+       "Coordinates:\n",
+       "  * time                                          (time) datetime64[ns] 2012-...\n",
+       "  * height                                        (height) float32 0.0 ... 10.0\n",
+       "  * bin                                           (bin) float32 -47.5 ... 20.0\n",
+       "  * isoline                                       (isoline) float32 5.0 ... 20.0\n",
+       "  * h_distance                                    (h_distance) float32 -1e+04...\n",
+       "  * frequency                                     (frequency) float32 3.529e+10\n",
+       "Dimensions without coordinates: bound\n",
+       "Data variables: (12/20)\n",
+       "    base_time                                     (time) datetime64[ns] 2012-...\n",
+       "    time_offset                                   (time) datetime64[ns] 2012-...\n",
+       "    time_bounds                                   (time, bound) object dask.array<chunksize=(392, 2), meta=np.ndarray>\n",
+       "    height_bounds                                 (time, height, bound) float32 dask.array<chunksize=(392, 201, 2), meta=np.ndarray>\n",
+       "    bin_bounds                                    (time, bin, bound) float32 dask.array<chunksize=(392, 28, 2), meta=np.ndarray>\n",
+       "    isoline_bounds                                (time, isoline, bound) float32 dask.array<chunksize=(392, 4, 2), meta=np.ndarray>\n",
+       "    ...                                            ...\n",
+       "    cloud_fraction                                (time, isoline, height) float32 dask.array<chunksize=(392, 4, 201), meta=np.ndarray>\n",
+       "    cloud_fraction_std                            (time, isoline, height) float32 dask.array<chunksize=(392, 4, 201), meta=np.ndarray>\n",
+       "    cfad                                          (time, bin, height) float32 dask.array<chunksize=(392, 28, 201), meta=np.ndarray>\n",
+       "    lat                                           (time) float32 36.6 ... 36.6\n",
+       "    lon                                           (time) float32 -97.49 ... -...\n",
+       "    alt                                           (time) float32 318.0 ... 318.0\n",
+       "Attributes: (12/20)\n",
+       "    command_line:          sacradv3d3c -s sgp -f C1 -b 20120829 -n sacradv3d3...\n",
+       "    process_version:       vap-sacradv3d3c-1.1-0.el6\n",
+       "    dod_version:           kasacradv3d3c-c1-1.2\n",
+       "    input_datastreams:     sgpkasacrcorcwrhiC1.c1 : 1.0 : 20120829.002403-201...\n",
+       "    site_id:               sgp\n",
+       "    platform_id:           kasacradv3d3c\n",
+       "    ...                    ...\n",
+       "    radar_beam_width_h:    0.311\n",
+       "    history:               created by user singh on machine amber at 2018-12-...\n",
+       "    _file_dates:           ['20120829', '20120829', '20120829', '20120829', '...\n",
+       "    _file_times:           ['002402', '004340', '025008', '030947', '051559',...\n",
+       "    _datastream:           sgpkasacradv3d3cC1.c1\n",
+       "    _arm_standards_flag:   1
" + ], + "text/plain": [ + "\n", + "Dimensions: (time: 23687, bound: 2,\n", + " height: 201, bin: 28,\n", + " isoline: 4, h_distance: 401,\n", + " frequency: 1)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 2012-...\n", + " * height (height) float32 0.0 ... 10.0\n", + " * bin (bin) float32 -47.5 ... 20.0\n", + " * isoline (isoline) float32 5.0 ... 20.0\n", + " * h_distance (h_distance) float32 -1e+04...\n", + " * frequency (frequency) float32 3.529e+10\n", + "Dimensions without coordinates: bound\n", + "Data variables: (12/20)\n", + " base_time (time) datetime64[ns] 2012-...\n", + " time_offset (time) datetime64[ns] 2012-...\n", + " time_bounds (time, bound) object dask.array\n", + " height_bounds (time, height, bound) float32 dask.array\n", + " bin_bounds (time, bin, bound) float32 dask.array\n", + " isoline_bounds (time, isoline, bound) float32 dask.array\n", + " ... ...\n", + " cloud_fraction (time, isoline, height) float32 dask.array\n", + " cloud_fraction_std (time, isoline, height) float32 dask.array\n", + " cfad (time, bin, height) float32 dask.array\n", + " lat (time) float32 36.6 ... 36.6\n", + " lon (time) float32 -97.49 ... -...\n", + " alt (time) float32 318.0 ... 318.0\n", + "Attributes: (12/20)\n", + " command_line: sacradv3d3c -s sgp -f C1 -b 20120829 -n sacradv3d3...\n", + " process_version: vap-sacradv3d3c-1.1-0.el6\n", + " dod_version: kasacradv3d3c-c1-1.2\n", + " input_datastreams: sgpkasacrcorcwrhiC1.c1 : 1.0 : 20120829.002403-201...\n", + " site_id: sgp\n", + " platform_id: kasacradv3d3c\n", + " ... ...\n", + " radar_beam_width_h: 0.311\n", + " history: created by user singh on machine amber at 2018-12-...\n", + " _file_dates: ['20120829', '20120829', '20120829', '20120829', '...\n", + " _file_times: ['002402', '004340', '025008', '030947', '051559',...\n", + " _datastream: sgpkasacradv3d3cC1.c1\n", + " _arm_standards_flag: 1" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['wind_speed', 'wind_direction', 'cloud_fraction']" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "Dimensions of C (201, 4, 23687) should be one smaller than X(23687) and Y(4) while using shading='flat' see help(pcolormesh)", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[9], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m ts_display\u001b[38;5;241m.\u001b[39madd_subplots((\u001b[38;5;28mlen\u001b[39m(variables_to_plot),), figsize \u001b[38;5;241m=\u001b[39m (\u001b[38;5;241m9.5\u001b[39m,\u001b[38;5;241m4\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mlen\u001b[39m(variables_to_plot)))\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i,v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(variables_to_plot):\n\u001b[0;32m----> 5\u001b[0m ts_ax \u001b[38;5;241m=\u001b[39m \u001b[43mts_display\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mplot\u001b[49m\u001b[43m(\u001b[49m\u001b[43mv\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msubplot_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mi\u001b[49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mset_title\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mds\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvariables\u001b[49m\u001b[43m[\u001b[49m\u001b[43mv\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mattrs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mlong_name\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6\u001b[0m ts_ax\u001b[38;5;241m.\u001b[39mgrid()\n\u001b[1;32m 8\u001b[0m plt\u001b[38;5;241m.\u001b[39mshow()\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/plotting/timeseriesdisplay.py:588\u001b[0m, in \u001b[0;36mTimeSeriesDisplay.plot\u001b[0;34m(self, field, dsname, subplot_index, cmap, set_title, add_nan, day_night_background, invert_y_axis, abs_limits, time_rng, y_rng, use_var_for_y, set_shading, assessment_overplot, overplot_marker, overplot_behind, overplot_markersize, assessment_overplot_category, assessment_overplot_category_color, force_line_plot, labels, cbar_label, cbar_h_adjust, secondary_y, y_axis_flag_meanings, colorbar_labels, cb_friendly, **kwargs)\u001b[0m\n\u001b[1;32m 586\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 587\u001b[0m kwargs[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mface\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m--> 588\u001b[0m mesh \u001b[38;5;241m=\u001b[39m \u001b[43max\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpcolormesh\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 589\u001b[0m \u001b[43m \u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43masarray\u001b[49m\u001b[43m(\u001b[49m\u001b[43mxdata\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 590\u001b[0m \u001b[43m \u001b[49m\u001b[43mydata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 591\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtranspose\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 592\u001b[0m \u001b[43m \u001b[49m\u001b[43mshading\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mset_shading\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 593\u001b[0m \u001b[43m \u001b[49m\u001b[43mcmap\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcmap\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 594\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 595\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 597\u001b[0m \u001b[38;5;66;03m# Set Title\u001b[39;00m\n\u001b[1;32m 598\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m set_title \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/__init__.py:1442\u001b[0m, in \u001b[0;36m_preprocess_data..inner\u001b[0;34m(ax, data, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1439\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 1440\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minner\u001b[39m(ax, \u001b[38;5;241m*\u001b[39margs, data\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 1441\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m data \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1442\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43max\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mmap\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43msanitize_sequence\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1444\u001b[0m bound \u001b[38;5;241m=\u001b[39m new_sig\u001b[38;5;241m.\u001b[39mbind(ax, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 1445\u001b[0m auto_label \u001b[38;5;241m=\u001b[39m (bound\u001b[38;5;241m.\u001b[39marguments\u001b[38;5;241m.\u001b[39mget(label_namer)\n\u001b[1;32m 1446\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m bound\u001b[38;5;241m.\u001b[39mkwargs\u001b[38;5;241m.\u001b[39mget(label_namer))\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/axes/_axes.py:6220\u001b[0m, in \u001b[0;36mAxes.pcolormesh\u001b[0;34m(self, alpha, norm, cmap, vmin, vmax, shading, antialiased, *args, **kwargs)\u001b[0m\n\u001b[1;32m 6217\u001b[0m shading \u001b[38;5;241m=\u001b[39m shading\u001b[38;5;241m.\u001b[39mlower()\n\u001b[1;32m 6218\u001b[0m kwargs\u001b[38;5;241m.\u001b[39msetdefault(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnone\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m-> 6220\u001b[0m X, Y, C, shading \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_pcolorargs\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mpcolormesh\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6221\u001b[0m \u001b[43m \u001b[49m\u001b[43mshading\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mshading\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6222\u001b[0m coords \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mstack([X, Y], axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m 6223\u001b[0m \u001b[38;5;66;03m# convert to one dimensional array, except for 3D RGB(A) arrays\u001b[39;00m\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/axes/_axes.py:5751\u001b[0m, in \u001b[0;36mAxes._pcolorargs\u001b[0;34m(self, funcname, shading, *args, **kwargs)\u001b[0m\n\u001b[1;32m 5749\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m shading \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mflat\u001b[39m\u001b[38;5;124m'\u001b[39m:\n\u001b[1;32m 5750\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (Nx, Ny) \u001b[38;5;241m!=\u001b[39m (ncols \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m, nrows \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m):\n\u001b[0;32m-> 5751\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDimensions of C \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mC\u001b[38;5;241m.\u001b[39mshape\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m should\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 5752\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m be one smaller than X(\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mNx\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m) and Y(\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mNy\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 5753\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m while using shading=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mflat\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 5754\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m see help(\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfuncname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 5755\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m: \u001b[38;5;66;03m# ['nearest', 'gouraud']:\u001b[39;00m\n\u001b[1;32m 5756\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (Nx, Ny) \u001b[38;5;241m!=\u001b[39m (ncols, nrows):\n", + "\u001b[0;31mTypeError\u001b[0m: Dimensions of C (201, 4, 23687) should be one smaller than X(23687) and Y(4) while using shading='flat' see help(pcolormesh)" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "8189dde8d6e0443cb4732d2453c2a30b", + "version_major": 2, + "version_minor": 0 + }, + "image/png": "", + "text/html": [ + "\n", + "
\n", + "
\n", + " Figure\n", + "
\n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'wind_speed'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/SACRADV3D3C/SACRADV3D3C_tutorial.ipynb b/VAPs/quicklook/SACRADV3D3C/SACRADV3D3C_tutorial.ipynb new file mode 100644 index 00000000..8186ebb2 --- /dev/null +++ b/VAPs/quicklook/SACRADV3D3C/SACRADV3D3C_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# KASACRADV3D3C.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/sacradv3d3c) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using kasacradv3d3c as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `kasacradv3d3c.c1`, where `kasacradv3d3c` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `sgp` and facility `C1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/sgp/sgpkasacradv3d3cC1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"kasacradv3d3c\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"sgp\"\n", + "facility = \"C1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/SACRADV3D3C/kasacradv3d3c.c1.ipynb b/VAPs/quicklook/SACRADV3D3C/kasacradv3d3c.c1.ipynb new file mode 100644 index 00000000..681c8eee --- /dev/null +++ b/VAPs/quicklook/SACRADV3D3C/kasacradv3d3c.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# KASACRADV3D3C.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/sacradv3d3c) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'kasacradv3d3c'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2012-08-31', 'facility': 'C1', 'site': 'sgp', 'start_date': '2012-08-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2012-08-29'\n", + "date_end = '2012-08-31'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['wind_speed', 'wind_direction', 'cloud_fraction']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'wind_speed'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/SACRADVVAD/SACRADVVAD_tutorial.ipynb b/VAPs/quicklook/SACRADVVAD/SACRADVVAD_tutorial.ipynb new file mode 100644 index 00000000..c100b65a --- /dev/null +++ b/VAPs/quicklook/SACRADVVAD/SACRADVVAD_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# KASACRADVVAD.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/sacradvvad) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using kasacradvvad as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `kasacradvvad.c1`, where `kasacradvvad` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `pvc` and facility `M1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/pvc/pvckasacradvvadM1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"kasacradvvad\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"pvc\"\n", + "facility = \"M1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/SACRADVVAD/kasacradvvad.c1.ipynb b/VAPs/quicklook/SACRADVVAD/kasacradvvad.c1.ipynb new file mode 100644 index 00000000..2ff9b73f --- /dev/null +++ b/VAPs/quicklook/SACRADVVAD/kasacradvvad.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# KASACRADVVAD.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/sacradvvad) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'kasacradvvad'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2012-09-30', 'facility': 'M1', 'site': 'pvc', 'start_date': '2012-09-01'}, {'end_date': '2012-08-31', 'facility': 'C1', 'site': 'sgp', 'start_date': '2012-08-01'}, {'end_date': '2014-08-30', 'facility': 'M1', 'site': 'tmp', 'start_date': '2014-08-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2012-08-29'\n", + "date_end = '2012-08-31'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['horizontal_wind_magnitude_at_cloud_level', 'horizontal_wind_direction_at_cloud_level']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'horizontal_wind_magnitude_at_cloud_level'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/SFCCLDGRID/.ipynb_checkpoints/15swfcldgrid1long.c1-checkpoint.ipynb b/VAPs/quicklook/SFCCLDGRID/.ipynb_checkpoints/15swfcldgrid1long.c1-checkpoint.ipynb new file mode 100644 index 00000000..0ed9cccf --- /dev/null +++ b/VAPs/quicklook/SFCCLDGRID/.ipynb_checkpoints/15swfcldgrid1long.c1-checkpoint.ipynb @@ -0,0 +1,2384 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# 15SWFCLDGRID1LONG.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/sfccldgrid) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = '15swfcldgrid1long'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2009-11-25', 'facility': 'N1', 'site': 'sgp', 'start_date': '1997-01-01'}]" + ] + }, + { + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ac6764f5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The following locations and date ranges are available for this VAP:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sitefacilitystart_dateend_date
0sgpN11997-01-012009-11-25
\n", + "
" + ], + "text/plain": [ + " site facility start_date end_date\n", + "0 sgp N1 1997-01-01 2009-11-25" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'N1' )\n", + "\n", + "date_start = '2009-11-23'\n", + "date_end = '2009-11-25'" + ] + }, + { + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/data/archive/sgp/sgp15swfcldgrid1longN1.c1'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['20091123', '20091124', '20091125']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "51feea2e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['/data/archive/sgp/sgp15swfcldgrid1longN1.c1/sgp15swfcldgrid1longN1.c1.20091123.180000.cdf',\n", + " '/data/archive/sgp/sgp15swfcldgrid1longN1.c1/sgp15swfcldgrid1longN1.c1.20091124.144500.cdf',\n", + " '/data/archive/sgp/sgp15swfcldgrid1longN1.c1/sgp15swfcldgrid1longN1.c1.20091125.144500.cdf']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3 files loaded\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:           (time: 33, lat: 15, lon: 17)\n",
+       "Coordinates:\n",
+       "  * time              (time) datetime64[ns] 2009-11-23T18:00:00 ... 2009-11-2...\n",
+       "  * lat               (lat) float32 38.5 38.25 38.0 37.75 ... 35.5 35.25 35.0\n",
+       "  * lon               (lon) float32 99.5 99.25 99.0 98.75 ... 96.0 95.75 95.5\n",
+       "Data variables: (12/22)\n",
+       "    base_time         (time) datetime64[ns] 2009-11-23T18:00:00 ... 2009-11-2...\n",
+       "    time_offset       (time) datetime64[ns] 2009-11-23T18:00:00 ... 2009-11-2...\n",
+       "    cloudfraction     (time, lat, lon) float32 dask.array<chunksize=(2, 15, 17), meta=np.ndarray>\n",
+       "    qc_cloudfraction  (time, lat, lon) int32 dask.array<chunksize=(2, 15, 17), meta=np.ndarray>\n",
+       "    cf_cloudfraction  (time) int32 dask.array<chunksize=(2,), meta=np.ndarray>\n",
+       "    tswfluxdn         (time, lat, lon) float32 dask.array<chunksize=(2, 15, 17), meta=np.ndarray>\n",
+       "    ...                ...\n",
+       "    cf_clrfluxdn      (time) int32 dask.array<chunksize=(2,), meta=np.ndarray>\n",
+       "    cdirfluxdn        (time, lat, lon) float32 dask.array<chunksize=(2, 15, 17), meta=np.ndarray>\n",
+       "    qc_cdirfluxdn     (time, lat, lon) int32 dask.array<chunksize=(2, 15, 17), meta=np.ndarray>\n",
+       "    cf_cdirfluxdn     (time) int32 dask.array<chunksize=(2,), meta=np.ndarray>\n",
+       "    azimuth           (time) float32 dask.array<chunksize=(2,), meta=np.ndarray>\n",
+       "    alt               (time) float32 318.0 318.0 318.0 ... 318.0 318.0 318.0\n",
+       "Attributes: (12/18)\n",
+       "    Date:                           Wed Jun 16 21:58:59 2010\n",
+       "    Version:                        $State: process-vap-sfccldgrid1long-2.0-0 $\n",
+       "    Command_Line:                   sfccldgrid1long -d 20091123\n",
+       "    Input_Platforms:                sgp15swfanalbrs1longC1.c1, sgp15swfanalsi...\n",
+       "    BW_Version:                     $State: ds-dsutil-bw-4.3-0 $\n",
+       "    qc_format_version:              0.1\n",
+       "    ...                             ...\n",
+       "    history:                        created by user dsmgr on machine zinc at ...\n",
+       "    _file_dates:                    ['20091123', '20091124', '20091125']\n",
+       "    _file_times:                    ['180000', '144500', '144500']\n",
+       "    datastream:                     sgp15swfcldgrid1longN1.c1\n",
+       "    _datastream:                    sgp15swfcldgrid1longN1.c1\n",
+       "    _arm_standards_flag:            1
" + ], + "text/plain": [ + "\n", + "Dimensions: (time: 33, lat: 15, lon: 17)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 2009-11-23T18:00:00 ... 2009-11-2...\n", + " * lat (lat) float32 38.5 38.25 38.0 37.75 ... 35.5 35.25 35.0\n", + " * lon (lon) float32 99.5 99.25 99.0 98.75 ... 96.0 95.75 95.5\n", + "Data variables: (12/22)\n", + " base_time (time) datetime64[ns] 2009-11-23T18:00:00 ... 2009-11-2...\n", + " time_offset (time) datetime64[ns] 2009-11-23T18:00:00 ... 2009-11-2...\n", + " cloudfraction (time, lat, lon) float32 dask.array\n", + " qc_cloudfraction (time, lat, lon) int32 dask.array\n", + " cf_cloudfraction (time) int32 dask.array\n", + " tswfluxdn (time, lat, lon) float32 dask.array\n", + " ... ...\n", + " cf_clrfluxdn (time) int32 dask.array\n", + " cdirfluxdn (time, lat, lon) float32 dask.array\n", + " qc_cdirfluxdn (time, lat, lon) int32 dask.array\n", + " cf_cdirfluxdn (time) int32 dask.array\n", + " azimuth (time) float32 dask.array\n", + " alt (time) float32 318.0 318.0 318.0 ... 318.0 318.0 318.0\n", + "Attributes: (12/18)\n", + " Date: Wed Jun 16 21:58:59 2010\n", + " Version: $State: process-vap-sfccldgrid1long-2.0-0 $\n", + " Command_Line: sfccldgrid1long -d 20091123\n", + " Input_Platforms: sgp15swfanalbrs1longC1.c1, sgp15swfanalsi...\n", + " BW_Version: $State: ds-dsutil-bw-4.3-0 $\n", + " qc_format_version: 0.1\n", + " ... ...\n", + " history: created by user dsmgr on machine zinc at ...\n", + " _file_dates: ['20091123', '20091124', '20091125']\n", + " _file_times: ['180000', '144500', '144500']\n", + " datastream: sgp15swfcldgrid1longN1.c1\n", + " _datastream: sgp15swfcldgrid1longN1.c1\n", + " _arm_standards_flag: 1" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['cloudfraction', 'cf_cloudfraction', 'tswfluxdn']" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "Dimensions of C (17, 15, 33) should be one smaller than X(33) and Y(15) while using shading='flat' see help(pcolormesh)", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[9], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m ts_display\u001b[38;5;241m.\u001b[39madd_subplots((\u001b[38;5;28mlen\u001b[39m(variables_to_plot),), figsize \u001b[38;5;241m=\u001b[39m (\u001b[38;5;241m9.5\u001b[39m,\u001b[38;5;241m4\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mlen\u001b[39m(variables_to_plot)))\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i,v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(variables_to_plot):\n\u001b[0;32m----> 5\u001b[0m ts_ax \u001b[38;5;241m=\u001b[39m \u001b[43mts_display\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mplot\u001b[49m\u001b[43m(\u001b[49m\u001b[43mv\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msubplot_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mi\u001b[49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mset_title\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mds\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvariables\u001b[49m\u001b[43m[\u001b[49m\u001b[43mv\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mattrs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mlong_name\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6\u001b[0m ts_ax\u001b[38;5;241m.\u001b[39mgrid()\n\u001b[1;32m 8\u001b[0m plt\u001b[38;5;241m.\u001b[39mshow()\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/plotting/timeseriesdisplay.py:588\u001b[0m, in \u001b[0;36mTimeSeriesDisplay.plot\u001b[0;34m(self, field, dsname, subplot_index, cmap, set_title, add_nan, day_night_background, invert_y_axis, abs_limits, time_rng, y_rng, use_var_for_y, set_shading, assessment_overplot, overplot_marker, overplot_behind, overplot_markersize, assessment_overplot_category, assessment_overplot_category_color, force_line_plot, labels, cbar_label, cbar_h_adjust, secondary_y, y_axis_flag_meanings, colorbar_labels, cb_friendly, **kwargs)\u001b[0m\n\u001b[1;32m 586\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 587\u001b[0m kwargs[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mface\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m--> 588\u001b[0m mesh \u001b[38;5;241m=\u001b[39m \u001b[43max\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpcolormesh\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 589\u001b[0m \u001b[43m \u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43masarray\u001b[49m\u001b[43m(\u001b[49m\u001b[43mxdata\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 590\u001b[0m \u001b[43m \u001b[49m\u001b[43mydata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 591\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtranspose\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 592\u001b[0m \u001b[43m \u001b[49m\u001b[43mshading\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mset_shading\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 593\u001b[0m \u001b[43m \u001b[49m\u001b[43mcmap\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcmap\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 594\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 595\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 597\u001b[0m \u001b[38;5;66;03m# Set Title\u001b[39;00m\n\u001b[1;32m 598\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m set_title \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/__init__.py:1442\u001b[0m, in \u001b[0;36m_preprocess_data..inner\u001b[0;34m(ax, data, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1439\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 1440\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minner\u001b[39m(ax, \u001b[38;5;241m*\u001b[39margs, data\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 1441\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m data \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1442\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43max\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mmap\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43msanitize_sequence\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1444\u001b[0m bound \u001b[38;5;241m=\u001b[39m new_sig\u001b[38;5;241m.\u001b[39mbind(ax, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 1445\u001b[0m auto_label \u001b[38;5;241m=\u001b[39m (bound\u001b[38;5;241m.\u001b[39marguments\u001b[38;5;241m.\u001b[39mget(label_namer)\n\u001b[1;32m 1446\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m bound\u001b[38;5;241m.\u001b[39mkwargs\u001b[38;5;241m.\u001b[39mget(label_namer))\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/axes/_axes.py:6220\u001b[0m, in \u001b[0;36mAxes.pcolormesh\u001b[0;34m(self, alpha, norm, cmap, vmin, vmax, shading, antialiased, *args, **kwargs)\u001b[0m\n\u001b[1;32m 6217\u001b[0m shading \u001b[38;5;241m=\u001b[39m shading\u001b[38;5;241m.\u001b[39mlower()\n\u001b[1;32m 6218\u001b[0m kwargs\u001b[38;5;241m.\u001b[39msetdefault(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnone\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m-> 6220\u001b[0m X, Y, C, shading \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_pcolorargs\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mpcolormesh\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6221\u001b[0m \u001b[43m \u001b[49m\u001b[43mshading\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mshading\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6222\u001b[0m coords \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mstack([X, Y], axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m 6223\u001b[0m \u001b[38;5;66;03m# convert to one dimensional array, except for 3D RGB(A) arrays\u001b[39;00m\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/axes/_axes.py:5751\u001b[0m, in \u001b[0;36mAxes._pcolorargs\u001b[0;34m(self, funcname, shading, *args, **kwargs)\u001b[0m\n\u001b[1;32m 5749\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m shading \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mflat\u001b[39m\u001b[38;5;124m'\u001b[39m:\n\u001b[1;32m 5750\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (Nx, Ny) \u001b[38;5;241m!=\u001b[39m (ncols \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m, nrows \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m):\n\u001b[0;32m-> 5751\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDimensions of C \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mC\u001b[38;5;241m.\u001b[39mshape\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m should\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 5752\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m be one smaller than X(\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mNx\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m) and Y(\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mNy\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 5753\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m while using shading=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mflat\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 5754\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m see help(\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfuncname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 5755\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m: \u001b[38;5;66;03m# ['nearest', 'gouraud']:\u001b[39;00m\n\u001b[1;32m 5756\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (Nx, Ny) \u001b[38;5;241m!=\u001b[39m (ncols, nrows):\n", + "\u001b[0;31mTypeError\u001b[0m: Dimensions of C (17, 15, 33) should be one smaller than X(33) and Y(15) while using shading='flat' see help(pcolormesh)" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "5ec59307f5704277961c72d61c88ebc6", + "version_major": 2, + "version_minor": 0 + }, + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA7YAAASwCAYAAADPBNYLAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABi0UlEQVR4nO3df3RV5Z3o/08gkKA1sUAJIIjRaqXlqkO4IrF8Ha3GQceWGeeKY5eog/c2Vy0DqV5FZvmD5axMO6tO6w9Qr6DjXWgz/hxmbkbNzFhFwRlJg+MIrb1CDWgiTRwT1DYI7O8fLjJNE5Rf5yQPvF5rnT/Ow7OTZ283cb/Z5+QUZFmWBQAAACRqUH8vAAAAAPaHsAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAnbHHvhhRfiggsuiLFjx0ZBQUE89dRTn7nN888/HxUVFVFcXBzHHnts3HPPPblfKAAAQKKEbY59+OGHcfLJJ8ddd921R/M3btwY5513XkyfPj2amprixhtvjLlz58bjjz+e45UCAACkqSDLsqy/F3GoKCgoiCeffDJmzpy52znXX399rFixItavX989Vl1dHa+++mqsXr06D6sEAABIS2F/L4CeVq9eHVVVVT3Gzj333Fi6dGl8/PHHMWTIkD636+rqiq6uru7nO3fujPfeey9GjBgRBQUFOV0zAAAc6rIsi61bt8bYsWNj0CAvjM03YTvAtLa2RllZWY+xsrKy2L59e7S1tcWYMWP63K62tjZuvfXWfCwRAADYjU2bNsW4ceP6exmHHGE7AP32HdZdrxb/tDuvCxYsiJqamu7nHR0dcfTRR8emTZuipKQkNwsFAAAiIqKzszPGjx8fRxxxRH8v5ZAkbAeY0aNHR2tra4+xLVu2RGFhYYwYMWK32xUVFUVRUVGv8ZKSEmELAAB54m2A/cOLvweYadOmRUNDQ4+xZ599NqZMmbLb99cCAAAcyoRtjn3wwQexdu3aWLt2bUR88nE+a9eujebm5oj45CXEs2fP7p5fXV0db731VtTU1MT69etj2bJlsXTp0rj22mv7Y/kAAAADnpci59iaNWvizDPP7H6+632wl112WTz44IPR0tLSHbkREeXl5VFfXx/z58+Pu+++O8aOHRt33HFHXHjhhXlfOwAAQAp8ju1BqrOzM0pLS6Ojo8N7bAEAIMdcf/cvL0UGAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbPNk8eLFUV5eHsXFxVFRURErV6781PnLly+Pk08+OQ477LAYM2ZMXHHFFdHe3p6n1QIAAKRD2OZBXV1dzJs3LxYuXBhNTU0xffr0mDFjRjQ3N/c5/8UXX4zZs2fHnDlz4vXXX49HH300XnnllbjyyivzvHIAAICBT9jmwe233x5z5syJK6+8MiZOnBg/+MEPYvz48bFkyZI+57/88stxzDHHxNy5c6O8vDy++tWvxre+9a1Ys2ZNnlcOAAAw8AnbHNu2bVs0NjZGVVVVj/GqqqpYtWpVn9tUVlbG5s2bo76+PrIsi3fffTcee+yxOP/883f7fbq6uqKzs7PHAwAA4FAgbHOsra0tduzYEWVlZT3Gy8rKorW1tc9tKisrY/ny5TFr1qwYOnRojB49Oo488si48847d/t9amtro7S0tPsxfvz4A7ofAAAAA5WwzZOCgoIez7Ms6zW2y7p162Lu3Llx0003RWNjYzz99NOxcePGqK6u3u3XX7BgQXR0dHQ/Nm3adEDXDwAAMFAV9vcCDnYjR46MwYMH97o7u2XLll53cXepra2N008/Pa677rqIiDjppJPi8MMPj+nTp8dtt90WY8aM6bVNUVFRFBUVHfgdAAAAGODcsc2xoUOHRkVFRTQ0NPQYb2hoiMrKyj63+eijj2LQoJ7/aQYPHhwRn9zpBQAA4D8J2zyoqamJ+++/P5YtWxbr16+P+fPnR3Nzc/dLixcsWBCzZ8/unn/BBRfEE088EUuWLIkNGzbESy+9FHPnzo1TTz01xo4d21+7AQAAMCB5KXIezJo1K9rb22PRokXR0tISkyZNivr6+pgwYUJERLS0tPT4TNvLL788tm7dGnfddVd85zvfiSOPPDLOOuus+O53v9tfuwAAADBgFWRe23pQ6uzsjNLS0ujo6IiSkpL+Xg4AABzUXH/3Ly9FBgAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmzzZPHixVFeXh7FxcVRUVERK1eu/NT5XV1dsXDhwpgwYUIUFRXFcccdF8uWLcvTagEAANJR2N8LOBTU1dXFvHnzYvHixXH66afHvffeGzNmzIh169bF0Ucf3ec2F110Ubz77ruxdOnS+OIXvxhbtmyJ7du353nlAAAAA19BlmVZfy/iYDd16tSYPHlyLFmypHts4sSJMXPmzKitre01/+mnn46LL744NmzYEMOHD9+n79nZ2RmlpaXR0dERJSUl+7x2AADgs7n+7l9eipxj27Zti8bGxqiqquoxXlVVFatWrepzmxUrVsSUKVPie9/7Xhx11FFxwgknxLXXXhu/+tWv8rFkAACApHgpco61tbXFjh07oqysrMd4WVlZtLa29rnNhg0b4sUXX4zi4uJ48skno62tLa666qp47733dvs+266urujq6up+3tnZeeB2AgAAYABzxzZPCgoKejzPsqzX2C47d+6MgoKCWL58eZx66qlx3nnnxe233x4PPvjgbu/a1tbWRmlpafdj/PjxB3wfAAAABiJhm2MjR46MwYMH97o7u2XLll53cXcZM2ZMHHXUUVFaWto9NnHixMiyLDZv3tznNgsWLIiOjo7ux6ZNmw7cTgAAAAxgwjbHhg4dGhUVFdHQ0NBjvKGhISorK/vc5vTTT4933nknPvjgg+6xN954IwYNGhTjxo3rc5uioqIoKSnp8QAAADgUCNs8qKmpifvvvz+WLVsW69evj/nz50dzc3NUV1dHxCd3W2fPnt09/5JLLokRI0bEFVdcEevWrYsXXnghrrvuuviTP/mTGDZsWH/tBgAAwIDkl0flwaxZs6K9vT0WLVoULS0tMWnSpKivr48JEyZERERLS0s0Nzd3z//c5z4XDQ0N8e1vfzumTJkSI0aMiIsuuihuu+22/toFAACAAcvn2B6kfI4WAADkj+vv/uWlyAAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2ebJ48eIoLy+P4uLiqKioiJUrV+7Rdi+99FIUFhbGKaecktsFAgAAJErY5kFdXV3MmzcvFi5cGE1NTTF9+vSYMWNGNDc3f+p2HR0dMXv27Pja176Wp5UCAACkpyDLsqy/F3Gwmzp1akyePDmWLFnSPTZx4sSYOXNm1NbW7na7iy++OI4//vgYPHhwPPXUU7F27do9/p6dnZ1RWloaHR0dUVJSsj/LBwAAPoPr7/7ljm2Obdu2LRobG6OqqqrHeFVVVaxatWq32z3wwAPx5ptvxs0337xH36erqys6Ozt7PAAAAA4FwjbH2traYseOHVFWVtZjvKysLFpbW/vc5uc//3nccMMNsXz58igsLNyj71NbWxulpaXdj/Hjx+/32gEAAFIgbPOkoKCgx/Msy3qNRUTs2LEjLrnkkrj11lvjhBNO2OOvv2DBgujo6Oh+bNq0ab/XDAAAkII9ux3IPhs5cmQMHjy4193ZLVu29LqLGxGxdevWWLNmTTQ1NcU111wTERE7d+6MLMuisLAwnn322TjrrLN6bVdUVBRFRUW52QkAAIABzB3bHBs6dGhUVFREQ0NDj/GGhoaorKzsNb+kpCRee+21WLt2bfejuro6vvSlL8XatWtj6tSp+Vo6AABAEtyxzYOampq49NJLY8qUKTFt2rS47777orm5OaqrqyPik5cRv/322/HQQw/FoEGDYtKkST22HzVqVBQXF/caBwAAQNjmxaxZs6K9vT0WLVoULS0tMWnSpKivr48JEyZERERLS8tnfqYtAAAAffM5tgcpn6MFAAD54/q7f3mPLQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGGbJ4sXL47y8vIoLi6OioqKWLly5W7nPvHEE3HOOefEF77whSgpKYlp06bFM888k8fVAgAApEPY5kFdXV3MmzcvFi5cGE1NTTF9+vSYMWNGNDc39zn/hRdeiHPOOSfq6+ujsbExzjzzzLjggguiqakpzysHAAAY+AqyLMv6exEHu6lTp8bkyZNjyZIl3WMTJ06MmTNnRm1t7R59ja985Ssxa9asuOmmm/ZofmdnZ5SWlkZHR0eUlJTs07oBAIA94/q7f7ljm2Pbtm2LxsbGqKqq6jFeVVUVq1at2qOvsXPnzti6dWsMHz58t3O6urqis7OzxwMAAOBQIGxzrK2tLXbs2BFlZWU9xsvKyqK1tXWPvsb3v//9+PDDD+Oiiy7a7Zza2tooLS3tfowfP36/1g0AAJAKYZsnBQUFPZ5nWdZrrC+PPPJI3HLLLVFXVxejRo3a7bwFCxZER0dH92PTpk37vWYAAIAUFPb3Ag52I0eOjMGDB/e6O7tly5Zed3F/W11dXcyZMyceffTROPvssz91blFRURQVFe33egEAAFLjjm2ODR06NCoqKqKhoaHHeENDQ1RWVu52u0ceeSQuv/zyePjhh+P888/P9TIBAACS5Y5tHtTU1MSll14aU6ZMiWnTpsV9990Xzc3NUV1dHRGfvIz47bffjoceeigiPona2bNnxw9/+MM47bTTuu/2Dhs2LEpLS/ttPwAAAAYiYZsHs2bNivb29li0aFG0tLTEpEmTor6+PiZMmBARES0tLT0+0/bee++N7du3x9VXXx1XX3119/hll10WDz74YL6XDwAAMKD5HNuDlM/RAgCA/HH93b+8xxYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwzZPFixdHeXl5FBcXR0VFRaxcufJT5z///PNRUVERxcXFceyxx8Y999yTp5UCAACkRdjmQV1dXcybNy8WLlwYTU1NMX369JgxY0Y0Nzf3OX/jxo1x3nnnxfTp06OpqSluvPHGmDt3bjz++ON5XjkAAMDAV5BlWdbfizjYTZ06NSZPnhxLlizpHps4cWLMnDkzamtre82//vrrY8WKFbF+/fruserq6nj11Vdj9erVe/Q9Ozs7o7S0NDo6OqKkpGT/dwIAANgt19/9yx3bHNu2bVs0NjZGVVVVj/GqqqpYtWpVn9usXr261/xzzz031qxZEx9//HHO1goAAJCiwv5ewMGura0tduzYEWVlZT3Gy8rKorW1tc9tWltb+5y/ffv2aGtrizFjxvTapqurK7q6urqfd3R0RMQn/3IEAADk1q7rbi+I7R/CNk8KCgp6PM+yrNfYZ83va3yX2trauPXWW3uNjx8/fm+XCgAA7KP29vYoLS3t72UccoRtjo0cOTIGDx7c6+7sli1bet2V3WX06NF9zi8sLIwRI0b0uc2CBQuipqam+/n7778fEyZMiObmZn+xPkVnZ2eMHz8+Nm3a5L0Qu+EY7RnH6bM5RnvGcdozjtNnc4z2jOP02RyjPdPR0RFHH310DB8+vL+XckgStjk2dOjQqKioiIaGhviDP/iD7vGGhob4xje+0ec206ZNi7/7u7/rMfbss8/GlClTYsiQIX1uU1RUFEVFRb3GS0tL/QDaAyUlJY7TZ3CM9ozj9Nkcoz3jOO0Zx+mzOUZ7xnH6bI7Rnhk0yK8x6g+Oeh7U1NTE/fffH8uWLYv169fH/Pnzo7m5OaqrqyPik7uts2fP7p5fXV0db731VtTU1MT69etj2bJlsXTp0rj22mv7axcAAAAGLHds82DWrFnR3t4eixYtipaWlpg0aVLU19fHhAkTIiKipaWlx2falpeXR319fcyfPz/uvvvuGDt2bNxxxx1x4YUX9tcuAAAADFjCNk+uuuqquOqqq/r8swcffLDX2BlnnBE/+clP9vn7FRUVxc0339zny5P5T47TZ3OM9ozj9Nkcoz3jOO0Zx+mzOUZ7xnH6bI7RnnGc+ldB5vdRAwAAkDDvsQUAACBpwhYAAICkCVsAAACSJmwHqNra2viv//W/xhFHHBGjRo2KmTNnxs9+9rMec7Isi1tuuSXGjh0bw4YNi9/93d+N119/vcecrq6u+Pa3vx0jR46Mww8/PL7+9a/H5s2be8z5j//4j7j00kujtLQ0SktL49JLL433338/17t4QOTzOP35n/95VFZWxmGHHRZHHnlkrnftgMnXMfrFL34Rc+bMifLy8hg2bFgcd9xxcfPNN8e2bdvysp/7K5/n0te//vU4+uijo7i4OMaMGROXXnppvPPOOznfx/2Vz2P0m3NPOeWUKCgoiLVr1+Zq1w6ofB6nY445JgoKCno8brjhhpzv44GQ7/Pp//7f/xtTp06NYcOGxciRI+MP//APc7p/B0K+jtGPf/zjXufRrscrr7ySl33dH/k8l9544434xje+ESNHjoySkpI4/fTT47nnnsv5Ph4I+TxOP/nJT+Kcc86JI488MkaMGBH/43/8j/jggw9yvo/760Ado/vuuy9+93d/N0pKSqKgoKDP6+qUr78HrIwB6dxzz80eeOCB7N///d+ztWvXZueff3529NFHZx988EH3nL/4i7/IjjjiiOzxxx/PXnvttWzWrFnZmDFjss7Ozu451dXV2VFHHZU1NDRkP/nJT7IzzzwzO/nkk7Pt27d3z/m93/u9bNKkSdmqVauyVatWZZMmTcp+//d/P6/7u6/yeZxuuumm7Pbbb89qamqy0tLSfO7mfsnXMfqHf/iH7PLLL8+eeeaZ7M0338z+9m//Nhs1alT2ne98J+/7vC/yeS7dfvvt2erVq7Nf/OIX2UsvvZRNmzYtmzZtWl73d1/k8xjtMnfu3GzGjBlZRGRNTU352M39ls/jNGHChGzRokVZS0tL92Pr1q153d99lc/j9Nhjj2Wf//znsyVLlmQ/+9nPsp/+9KfZo48+mtf93Rf5OkZdXV09zqGWlpbsyiuvzI455phs586ded/vvZXPc+mLX/xidt5552Wvvvpq9sYbb2RXXXVVdthhh2UtLS153ed9ka/j9Pbbb2ef//zns+rq6uynP/1p9q//+q9ZZWVlduGFF+Z9n/fWgTpGf/VXf5XV1tZmtbW1WURk//Ef/9Hre6V8/T1QCdtEbNmyJYuI7Pnnn8+yLMt27tyZjR49OvuLv/iL7jm//vWvs9LS0uyee+7JsizL3n///WzIkCHZj370o+45b7/9djZo0KDs6aefzrIsy9atW5dFRPbyyy93z1m9enUWEdlPf/rTfOzaAZWr4/SbHnjggaTC9rfl4xjt8r3vfS8rLy/P0Z7kVj6P09/+7d9mBQUF2bZt23K0N7mR62NUX1+fnXjiidnrr7+eVNj+tlwepwkTJmR/9Vd/lZ8dybFcHaePP/44O+qoo7L7778/j3uTG/n6ubRt27Zs1KhR2aJFi3K4N7mTq+P0y1/+MouI7IUXXuie09nZmUVE9o//+I/52LUDKlfH6d57781GjRqV7dixo3tOU1NTFhHZz3/+83zs2gGzL8foNz333HN9hu3Bdv09UHgpciI6OjoiImL48OEREbFx48ZobW2Nqqqq7jlFRUVxxhlnxKpVqyIiorGxMT7++OMec8aOHRuTJk3qnrN69eooLS2NqVOnds857bTTorS0tHtOSnJ1nA4m+TxGHR0d3d8nNfk6Tu+9914sX748KisrY8iQIbnanZzI5TF6991347//9/8e/+f//J847LDD8rE7OZPrc+m73/1ujBgxIk455ZT48z//82Re/v/bcnWcfvKTn8Tbb78dgwYNit/5nd+JMWPGxIwZM3q9dDAF+fq5tGLFimhra4vLL788R3uSW7k6TiNGjIiJEyfGQw89FB9++GFs37497r333igrK4uKiop87d4Bk6vj1NXVFUOHDo1Bg/4zM4YNGxYRES+++GJud+oA25djtCcOtuvvgULYJiDLsqipqYmvfvWrMWnSpIiIaG1tjYiIsrKyHnPLysq6/6y1tTWGDh0an//85z91zqhRo3p9z1GjRnXPSUUuj9PBIp/H6M0334w777wzqqurD/Ru5Fw+jtP1118fhx9+eIwYMSKam5vjb//2b3O1OzmRy2OUZVlcfvnlUV1dHVOmTMn1ruRUrs+lP/3TP40f/ehH8dxzz8U111wTP/jBD+Kqq67K5S7lRC6P04YNGyIi4pZbbok/+7M/i7//+7+Pz3/+83HGGWfEe++9l9P9OpDy+fN76dKlce6558b48eMP9G7kXC6PU0FBQTQ0NERTU1McccQRUVxcHH/1V38VTz/9dFK/eyMit8fprLPOitbW1vjLv/zL2LZtW/zHf/xH3HjjjRER0dLSktP9OpD29RjtiYPp+nsgEbYJuOaaa+Lf/u3f4pFHHun1ZwUFBT2eZ1nWa+y3/facvubvydcZaHJ9nA4G+TpG77zzTvze7/1e/Lf/9t/iyiuv3L9F94N8HKfrrrsumpqa4tlnn43BgwfH7NmzI8uy/V98nuTyGN15553R2dkZCxYsOHAL7ie5Ppfmz58fZ5xxRpx00klx5ZVXxj333BNLly6N9vb2A7MDeZLL47Rz586IiFi4cGFceOGFUVFREQ888EAUFBTEo48+eoD2IPfy9fN78+bN8cwzz8ScOXP2b8H9JJfHKcuyuOqqq2LUqFGxcuXK+Nd//df4xje+Eb//+7+fVLBF5PY4feUrX4m//uu/ju9///tx2GGHxejRo+PYY4+NsrKyGDx48IHbiRw70Mfos77Gvn4d/pOwHeC+/e1vx4oVK+K5556LcePGdY+PHj06IqLXv+ps2bKl+1+RRo8e3f0vZZ8259133+31fX/5y1/2+teogSzXx+lgkK9j9M4778SZZ54Z06ZNi/vuuy8Xu5JT+TpOI0eOjBNOOCHOOeec+NGPfhT19fXx8ssv52KXDrhcH6N//ud/jpdffjmKioqisLAwvvjFL0ZExJQpU+Kyyy7L2X4daP3xc+m0006LiIj/9//+3wHZh3zI9XEaM2ZMRER8+ctf7v7zoqKiOPbYY6O5ufnA71AO5PNceuCBB2LEiBHx9a9//UDvRs7l42fT3//938ePfvSjOP3002Py5MmxePHiGDZsWPz1X/91LnftgMrH+XTJJZdEa2trvP3229He3h633HJL/PKXv4zy8vJc7dYBtT/HaE8cLNffA05O38HLPtu5c2d29dVXZ2PHjs3eeOONPv989OjR2Xe/+93usa6urj7f4F9XV9c955133unzl0f9y7/8S/ecl19+OZk3r+frOP2m1H55VD6P0ebNm7Pjjz8+u/jii/v8DbcDWX+cS7s0NzdnEZE999xzB26HciBfx+itt97KXnvtte7HM888k0VE9thjj2WbNm3K8V7uv/48l/7u7/4ui4jsrbfeOoB7lBv5Ok4dHR1ZUVFRj18eteuXI91777252r0DIt/n0s6dO7Py8vJkfpv9Lvk6TitWrMgGDRrU6zePn3DCCdmf//mf52LXDqj+/Nm0dOnS7LDDDuvztwMPJAfiGP2mz/rlUalefw9UwnaA+p//839mpaWl2Y9//OMev37/o48+6p7zF3/xF1lpaWn2xBNPZK+99lr2x3/8x33+SvZx48Zl//iP/5j95Cc/yc4666w+P+7npJNOylavXp2tXr06+y//5b8k8+vG83mc3nrrraypqSm79dZbs8997nNZU1NT1tTUNOA/WiNfx+jtt9/OvvjFL2ZnnXVWtnnz5h7fKwX5Ok7/8i//kt15551ZU1NT9otf/CL753/+5+yrX/1qdtxxx2W//vWv877feyOff99+08aNG5P6rcj5Ok6rVq3Kbr/99qypqSnbsGFDVldXl40dOzb7+te/nvd93hf5PJ/+9E//NDvqqKOyZ555JvvpT3+azZkzJxs1alT23nvv5XWf91a+/8794z/+YxYR2bp16/K2jwdCvo7TL3/5y2zEiBHZH/7hH2Zr167Nfvazn2XXXnttNmTIkGzt2rV53++9lc/z6c4778waGxuzn/3sZ9ldd92VDRs2LPvhD3+Y1/3dFwfqGLW0tGRNTU3Z//7f/7v7N2k3NTVl7e3t3XNSvv4eqITtABURfT4eeOCB7jk7d+7Mbr755mz06NFZUVFR9v/9f/9f9tprr/X4Or/61a+ya665Jhs+fHg2bNiw7Pd///ez5ubmHnPa29uzb37zm9kRRxyRHXHEEdk3v/nNAf8varvk8zhddtllfX6vgX6XLV/H6IEHHtjt90pBvo7Tv/3bv2VnnnlmNnz48KyoqCg75phjsurq6mzz5s352tV9ls+/b78ptbDN13FqbGzMpk6dmpWWlmbFxcXZl770pezmm2/OPvzww3zt6n7J5/m0bdu27Dvf+U42atSo7IgjjsjOPvvs7N///d/zsZv7Jd9/5/74j/84q6yszPVuHXD5PE6vvPJKVlVVlQ0fPjw74ogjstNOOy2rr6/Px27ut3wep0svvTQbPnx4NnTo0Oykk07KHnrooXzs4n47UMfo5ptv/syvk/L190BVkGUJ/bYSAAAA+C1+eRQAAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2OfbCCy/EBRdcEGPHjo2CgoJ46qmnPnOb559/PioqKqK4uDiOPfbYuOeee3K/UAAAgEQJ2xz78MMP4+STT4677rprj+Zv3LgxzjvvvJg+fXo0NTXFjTfeGHPnzo3HH388xysFAABIU0GWZVl/L+JQUVBQEE8++WTMnDlzt3Ouv/76WLFiRaxfv757rLq6Ol599dVYvXp1HlYJAACQlsL+XgA9rV69OqqqqnqMnXvuubF06dL4+OOPY8iQIX1u19XVFV1dXd3Pd+7cGe+9916MGDEiCgoKcrpmAAA41GVZFlu3bo2xY8fGoEFeGJtvwnaAaW1tjbKysh5jZWVlsX379mhra4sxY8b0uV1tbW3ceuut+VgiAACwG5s2bYpx48b19zIOOcJ2APrtO6y7Xi3+aXdeFyxYEDU1Nd3POzo64uijj45NmzZFSUlJbhYKAABERERnZ2eMHz8+jjjiiP5eyiFJ2A4wo0ePjtbW1h5jW7ZsicLCwhgxYsRutysqKoqioqJe4yUlJcIWAADyxNsA+4cXfw8w06ZNi4aGhh5jzz77bEyZMmW3768FAAA4lAnbHPvggw9i7dq1sXbt2oj45ON81q5dG83NzRHxyUuIZ8+e3T2/uro63nrrraipqYn169fHsmXLYunSpXHttdf2x/IBAAAGPC9FzrE1a9bEmWee2f181/tgL7vssnjwwQejpaWlO3IjIsrLy6O+vj7mz58fd999d4wdOzbuuOOOuPDCC/O+dgAAgBT4HNuDVGdnZ5SWlkZHR4f32AIAQI65/u5fXooMAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2ObJ4sWLo7y8PIqLi6OioiJWrlz5qfOXL18eJ598chx22GExZsyYuOKKK6K9vT1PqwUAAEiHsM2Durq6mDdvXixcuDCamppi+vTpMWPGjGhubu5z/osvvhizZ8+OOXPmxOuvvx6PPvpovPLKK3HllVfmeeUAAAADn7DNg9tvvz3mzJkTV155ZUycODF+8IMfxPjx42PJkiV9zn/55ZfjmGOOiblz50Z5eXl89atfjW9961uxZs2aPK8cAABg4BO2ObZt27ZobGyMqqqqHuNVVVWxatWqPreprKyMzZs3R319fWRZFu+++2489thjcf755+/2+3R1dUVnZ2ePBwAAwKFA2OZYW1tb7NixI8rKynqMl5WVRWtra5/bVFZWxvLly2PWrFkxdOjQGD16dBx55JFx55137vb71NbWRmlpafdj/PjxB3Q/AAAABiphmycFBQU9nmdZ1mtsl3Xr1sXcuXPjpptuisbGxnj66adj48aNUV1dvduvv2DBgujo6Oh+bNq06YCuHwAAYKAq7O8FHOxGjhwZgwcP7nV3dsuWLb3u4u5SW1sbp59+elx33XUREXHSSSfF4YcfHtOnT4/bbrstxowZ02uboqKiKCoqOvA7AAAAMMC5Y5tjQ4cOjYqKimhoaOgx3tDQEJWVlX1u89FHH8WgQT3/0wwePDgiPrnTCwAAwH8StnlQU1MT999/fyxbtizWr18f8+fPj+bm5u6XFi9YsCBmz57dPf+CCy6IJ554IpYsWRIbNmyIl156KebOnRunnnpqjB07tr92AwAAYEDyUuQ8mDVrVrS3t8eiRYuipaUlJk2aFPX19TFhwoSIiGhpaenxmbaXX355bN26Ne666674zne+E0ceeWScddZZ8d3vfre/dgEAAGDAKsi8tvWg1NnZGaWlpdHR0RElJSX9vRwAADiouf7uX16KDAAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdjmyeLFi6O8vDyKi4ujoqIiVq5c+anzu7q6YuHChTFhwoQoKiqK4447LpYtW5an1QIAAKSjsL8XcCioq6uLefPmxeLFi+P000+Pe++9N2bMmBHr1q2Lo48+us9tLrroonj33Xdj6dKl8cUvfjG2bNkS27dvz/PKAQAABr6CLMuy/l7EwW7q1KkxefLkWLJkSffYxIkTY+bMmVFbW9tr/tNPPx0XX3xxbNiwIYYPH75P37OzszNKS0ujo6MjSkpK9nntAADAZ3P93b+8FDnHtm3bFo2NjVFVVdVjvKqqKlatWtXnNitWrIgpU6bE9773vTjqqKPihBNOiGuvvTZ+9atf5WPJAAAASfFS5Bxra2uLHTt2RFlZWY/xsrKyaG1t7XObDRs2xIsvvhjFxcXx5JNPRltbW1x11VXx3nvv7fZ9tl1dXdHV1dX9vLOz88DtBAAAwADmjm2eFBQU9HieZVmvsV127twZBQUFsXz58jj11FPjvPPOi9tvvz0efPDB3d61ra2tjdLS0u7H+PHjD/g+AAAADETCNsdGjhwZgwcP7nV3dsuWLb3u4u4yZsyYOOqoo6K0tLR7bOLEiZFlWWzevLnPbRYsWBAdHR3dj02bNh24nQAAABjAhG2ODR06NCoqKqKhoaHHeENDQ1RWVva5zemnnx7vvPNOfPDBB91jb7zxRgwaNCjGjRvX5zZFRUVRUlLS4wEAAHAoELZ5UFNTE/fff38sW7Ys1q9fH/Pnz4/m5uaorq6OiE/uts6ePbt7/iWXXBIjRoyIK664ItatWxcvvPBCXHfddfEnf/InMWzYsP7aDQAAgAHJL4/Kg1mzZkV7e3ssWrQoWlpaYtKkSVFfXx8TJkyIiIiWlpZobm7unv+5z30uGhoa4tvf/nZMmTIlRowYERdddFHcdttt/bULAAAAA5bPsT1I+RwtAADIH9ff/ctLkQEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZs82Tx4sVRXl4excXFUVFREStXrtyj7V566aUoLCyMU045JbcLBAAASJSwzYO6urqYN29eLFy4MJqammL69OkxY8aMaG5u/tTtOjo6Yvbs2fG1r30tTysFAABIT0GWZVl/L+JgN3Xq1Jg8eXIsWbKke2zixIkxc+bMqK2t3e12F198cRx//PExePDgeOqpp2Lt2rV7/D07OzujtLQ0Ojo6oqSkZH+WDwAAfAbX3/3LHdsc27ZtWzQ2NkZVVVWP8aqqqli1atVut3vggQfizTffjJtvvnmPvk9XV1d0dnb2eAAAABwKhG2OtbW1xY4dO6KsrKzHeFlZWbS2tva5zc9//vO44YYbYvny5VFYWLhH36e2tjZKS0u7H+PHj9/vtQMAAKRA2OZJQUFBj+dZlvUai4jYsWNHXHLJJXHrrbfGCSecsMdff8GCBdHR0dH92LRp036vGQAAIAV7djuQfTZy5MgYPHhwr7uzW7Zs6XUXNyJi69atsWbNmmhqaoprrrkmIiJ27twZWZZFYWFhPPvss3HWWWf12q6oqCiKiopysxMAAAADmDu2OTZ06NCoqKiIhoaGHuMNDQ1RWVnZa35JSUm89tprsXbt2u5HdXV1fOlLX4q1a9fG1KlT87V0AACAJLhjmwc1NTVx6aWXxpQpU2LatGlx3333RXNzc1RXV0fEJy8jfvvtt+Ohhx6KQYMGxaRJk3psP2rUqCguLu41DgAAgLDNi1mzZkV7e3ssWrQoWlpaYtKkSVFfXx8TJkyIiIiWlpbP/ExbAAAA+uZzbA9SPkcLAADyx/V3//IeWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacI2TxYvXhzl5eVRXFwcFRUVsXLlyt3OfeKJJ+Kcc86JL3zhC1FSUhLTpk2LZ555Jo+rBQAASIewzYO6urqYN29eLFy4MJqammL69OkxY8aMaG5u7nP+Cy+8EOecc07U19dHY2NjnHnmmXHBBRdEU1NTnlcOAAAw8BVkWZb19yIOdlOnTo3JkyfHkiVLuscmTpwYM2fOjNra2j36Gl/5yldi1qxZcdNNN+3R/M7OzigtLY2Ojo4oKSnZp3UDAAB7xvV3/3LHNse2bdsWjY2NUVVV1WO8qqoqVq1atUdfY+fOnbF169YYPnx4LpYIAACQtML+XsDBrq2tLXbs2BFlZWU9xsvKyqK1tXWPvsb3v//9+PDDD+Oiiy7a7Zyurq7o6urqft7Z2blvCwYAAEiMO7Z5UlBQ0ON5lmW9xvryyCOPxC233BJ1dXUxatSo3c6rra2N0tLS7sf48eP3e80AAAApELY5NnLkyBg8eHCvu7NbtmzpdRf3t9XV1cWcOXPib/7mb+Lss8/+1LkLFiyIjo6O7semTZv2e+0AAAApELY5NnTo0KioqIiGhoYe4w0NDVFZWbnb7R555JG4/PLL4+GHH47zzz//M79PUVFRlJSU9HgAAAAcCrzHNg9qamri0ksvjSlTpsS0adPivvvui+bm5qiuro6IT+62vv322/HQQw9FxCdRO3v27PjhD38Yp512Wvfd3mHDhkVpaWm/7QcAAMBAJGzzYNasWdHe3h6LFi2KlpaWmDRpUtTX18eECRMiIqKlpaXHZ9ree++9sX379rj66qvj6quv7h6/7LLL4sEHH8z38gEAAAY0n2N7kPI5WgAAkD+uv/uX99gCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtnmyePHiKC8vj+Li4qioqIiVK1d+6vznn38+Kioqori4OI499ti455578rRSAACAtAjbPKirq4t58+bFwoULo6mpKaZPnx4zZsyI5ubmPudv3LgxzjvvvJg+fXo0NTXFjTfeGHPnzo3HH388zysHAAAY+AqyLMv6exEHu6lTp8bkyZNjyZIl3WMTJ06MmTNnRm1tba/5119/faxYsSLWr1/fPVZdXR2vvvpqrF69eo++Z2dnZ5SWlkZHR0eUlJTs/04AAAC75fq7f7ljm2Pbtm2LxsbGqKqq6jFeVVUVq1at6nOb1atX95p/7rnnxpo1a+Ljjz/O2VoBAABSVNjfCzjYtbW1xY4dO6KsrKzHeFlZWbS2tva5TWtra5/zt2/fHm1tbTFmzJhe23R1dUVXV1f3846Ojoj45F+OAACA3Np13e0Fsf1D2OZJQUFBj+dZlvUa+6z5fY3vUltbG7feemuv8fHjx+/tUgEAgH3U3t4epaWl/b2MQ46wzbGRI0fG4MGDe92d3bJlS6+7sruMHj26z/mFhYUxYsSIPrdZsGBB1NTUdD9///33Y8KECdHc3OwvFvuls7Mzxo8fH5s2bfJ+EfaLc4kDyfnEgeJc4kDp6OiIo48+OoYPH97fSzkkCdscGzp0aFRUVERDQ0P8wR/8Qfd4Q0NDfOMb3+hzm2nTpsXf/d3f9Rh79tlnY8qUKTFkyJA+tykqKoqioqJe46WlpX5Ic0CUlJQ4lzggnEscSM4nDhTnEgfKoEF+jVF/cNTzoKamJu6///5YtmxZrF+/PubPnx/Nzc1RXV0dEZ/cbZ09e3b3/Orq6njrrbeipqYm1q9fH8uWLYulS5fGtdde21+7AAAAMGC5Y5sHs2bNivb29li0aFG0tLTEpEmTor6+PiZMmBARES0tLT0+07a8vDzq6+tj/vz5cffdd8fYsWPjjjvuiAsvvLC/dgEAAGDAErZ5ctVVV8VVV13V5589+OCDvcbOOOOM+MlPfrLP36+oqChuvvnmPl+eDHvDucSB4lziQHI+caA4lzhQnEv9qyDz+6gBAABImPfYAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2CZs8eLFUV5eHsXFxVFRURErV6781PnPP/98VFRURHFxcRx77LFxzz335GmlDHR7cy498cQTcc4558QXvvCFKCkpiWnTpsUzzzyTx9UykO3tz6VdXnrppSgsLIxTTjkltwskGXt7LnV1dcXChQtjwoQJUVRUFMcdd1wsW7YsT6tloNvb82n58uVx8sknx2GHHRZjxoyJK664Itrb2/O0WgaqF154IS644IIYO3ZsFBQUxFNPPfWZ27j+zh9hm6i6urqYN29eLFy4MJqammL69OkxY8aMHp+H+5s2btwY5513XkyfPj2amprixhtvjLlz58bjjz+e55Uz0OztufTCCy/EOeecE/X19dHY2BhnnnlmXHDBBdHU1JTnlTPQ7O25tEtHR0fMnj07vva1r+VppQx0+3IuXXTRRfFP//RPsXTp0vjZz34WjzzySJx44ol5XDUD1d6eTy+++GLMnj075syZE6+//no8+uij8corr8SVV16Z55Uz0Hz44Ydx8sknx1133bVH811/51lGkk499dSsurq6x9iJJ56Y3XDDDX3O/1//639lJ554Yo+xb33rW9lpp52WszWShr09l/ry5S9/Obv11lsP9NJIzL6eS7Nmzcr+7M/+LLv55puzk08+OYcrJBV7ey79wz/8Q1ZaWpq1t7fnY3kkZm/Pp7/8y7/Mjj322B5jd9xxRzZu3LicrZH0RET25JNPfuoc19/55Y5tgrZt2xaNjY1RVVXVY7yqqipWrVrV5zarV6/uNf/cc8+NNWvWxMcff5yztTKw7cu59Nt27twZW7dujeHDh+diiSRiX8+lBx54IN588824+eabc71EErEv59KKFStiypQp8b3vfS+OOuqoOOGEE+Laa6+NX/3qV/lYMgPYvpxPlZWVsXnz5qivr48sy+Ldd9+Nxx57LM4///x8LJmDiOvv/Crs7wWw99ra2mLHjh1RVlbWY7ysrCxaW1v73Ka1tbXP+du3b4+2trYYM2ZMztbLwLUv59Jv+/73vx8ffvhhXHTRRblYIonYl3Pp5z//edxwww2xcuXKKCz0vyM+sS/n0oYNG+LFF1+M4uLiePLJJ6OtrS2uuuqqeO+997zP9hC3L+dTZWVlLF++PGbNmhW//vWvY/v27fH1r3897rzzznwsmYOI6+/8csc2YQUFBT2eZ1nWa+yz5vc1zqFnb8+lXR555JG45ZZboq6uLkaNGpWr5ZGQPT2XduzYEZdccknceuutccIJJ+RreSRkb34u7dy5MwoKCmL58uVx6qmnxnnnnRe33357PPjgg+7aEhF7dz6tW7cu5s6dGzfddFM0NjbG008/HRs3bozq6up8LJWDjOvv/PFP5AkaOXJkDB48uNe/NG7ZsqXXvwrtMnr06D7nFxYWxogRI3K2Vga2fTmXdqmrq4s5c+bEo48+GmeffXYul0kC9vZc2rp1a6xZsyaamprimmuuiYhP4iTLsigsLIxnn302zjrrrLysnYFlX34ujRkzJo466qgoLS3tHps4cWJkWRabN2+O448/PqdrZuDal/OptrY2Tj/99LjuuusiIuKkk06Kww8/PKZPnx633Xabu2zsMdff+eWObYKGDh0aFRUV0dDQ0GO8oaEhKisr+9xm2rRpveY/++yzMWXKlBgyZEjO1srAti/nUsQnd2ovv/zyePjhh73niIjY+3OppKQkXnvttVi7dm33o7q6Or70pS/F2rVrY+rUqflaOgPMvvxcOv300+Odd96JDz74oHvsjTfeiEGDBsW4ceNyul4Gtn05nz766KMYNKjnJfLgwYMj4j/vtsGecP2dZ/30S6vYTz/60Y+yIUOGZEuXLs3WrVuXzZs3Lzv88MOzX/ziF1mWZdkNN9yQXXrppd3zN2zYkB122GHZ/Pnzs3Xr1mVLly7NhgwZkj322GP9tQsMEHt7Lj388MNZYWFhdvfdd2ctLS3dj/fff7+/doEBYm/Ppd/mtyKzy96eS1u3bs3GjRuX/dEf/VH2+uuvZ88//3x2/PHHZ1deeWV/7QIDyN6eTw888EBWWFiYLV68OHvzzTezF198MZsyZUp26qmn9tcuMEBs3bo1a2pqypqamrKIyG6//fasqakpe+utt7Isc/3d34Rtwu6+++5swoQJ2dChQ7PJkydnzz//fPefXXbZZdkZZ5zRY/6Pf/zj7Hd+53eyoUOHZsccc0y2ZMmSPK+YgWpvzqUzzjgji4hej8suuyz/C2fA2dufS79J2PKb9vZcWr9+fXb22Wdnw4YNy8aNG5fV1NRkH330UZ5XzUC1t+fTHXfckX35y1/Ohg0blo0ZMyb75je/mW3evDnPq2agee655z71Gsj1d/8qyDKvqQAAACBd3mMLAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhm2MvvPBCXHDBBTF27NgoKCiIp5566jO3ef7556OioiKKi4vj2GOPjXvuuSf3CwUAAEiUsM2xDz/8ME4++eS466679mj+xo0b47zzzovp06dHU1NT3HjjjTF37tx4/PHHc7xSAACANBVkWZb19yIOFQUFBfHkk0/GzJkzdzvn+uuvjxUrVsT69eu7x6qrq+PVV1+N1atX52GVAAAAaSns7wXQ0+rVq6OqqqrH2LnnnhtLly6Njz/+OIYMGdLndl1dXdHV1dX9fOfOnfHee+/FiBEjoqCgIKdrBgCAQ12WZbF169YYO3ZsDBrkhbH5JmwHmNbW1igrK+sxVlZWFtu3b4+2trYYM2ZMn9vV1tbGrbfemo8lAgAAu7Fp06YYN25cfy/jkCNsB6DfvsO669Xin3bndcGCBVFTU9P9vKOjI44++ujYtGlTlJSU5GahAABARER0dnbG+PHj44gjjujvpRyShO0AM3r06Ghtbe0xtmXLligsLIwRI0bsdruioqIoKirqNV5SUiJsAQAgT7wNsH948fcAM23atGhoaOgx9uyzz8aUKVN2+/5aAACAQ5mwzbEPPvgg1q5dG2vXro2ITz7OZ+3atdHc3BwRn7yEePbs2d3zq6ur46233oqamppYv359LFu2LJYuXRrXXnttfywfAABgwPNS5Bxbs2ZNnHnmmd3Pd70P9rLLLosHH3wwWlpauiM3IqK8vDzq6+tj/vz5cffdd8fYsWPjjjvuiAsvvDDvawcAAEiBz7E9SHV2dkZpaWl0dHR4jy0AAOSY6+/+5aXIAAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShG2eLF68OMrLy6O4uDgqKipi5cqVnzp/+fLlcfLJJ8dhhx0WY8aMiSuuuCLa29vztFoAAIB0CNs8qKuri3nz5sXChQujqakppk+fHjNmzIjm5uY+57/44osxe/bsmDNnTrz++uvx6KOPxiuvvBJXXnllnlcOAAAw8AnbPLj99ttjzpw5ceWVV8bEiRPjBz/4QYwfPz6WLFnS5/yXX345jjnmmJg7d26Ul5fHV7/61fjWt74Va9asyfPKAQAABj5hm2Pbtm2LxsbGqKqq6jFeVVUVq1at6nObysrK2Lx5c9TX10eWZfHuu+/GY489Fueff34+lgwAAJAUYZtjbW1tsWPHjigrK+sxXlZWFq2trX1uU1lZGcuXL49Zs2bF0KFDY/To0XHkkUfGnXfeudvv09XVFZ2dnT0eAAAAhwJhmycFBQU9nmdZ1mtsl3Xr1sXcuXPjpptuisbGxnj66adj48aNUV1dvduvX1tbG6Wlpd2P8ePHH9D1AwAADFQFWZZl/b2Ig9m2bdvisMMOi0cffTT+4A/+oHv8T//0T2Pt2rXx/PPP99rm0ksvjV//+tfx6KOPdo+9+OKLMX369HjnnXdizJgxvbbp6uqKrq6u7uednZ0xfvz46OjoiJKSkgO8VwAAwG/q7OyM0tJS19/9xB3bHBs6dGhUVFREQ0NDj/GGhoaorKzsc5uPPvooBg3q+Z9m8ODBEfHJnd6+FBUVRUlJSY8HAADAoUDY5kFNTU3cf//9sWzZsli/fn3Mnz8/mpubu19avGDBgpg9e3b3/AsuuCCeeOKJWLJkSWzYsCFeeumlmDt3bpx66qkxduzY/toNAACAAamwvxdwKJg1a1a0t7fHokWLoqWlJSZNmhT19fUxYcKEiIhoaWnp8Zm2l19+eWzdujXuuuuu+M53vhNHHnlknHXWWfHd7363v3YBAABgwPIe24OU1/gDAED+uP7uX16KDAAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdjmyeLFi6O8vDyKi4ujoqIiVq5c+anzu7q6YuHChTFhwoQoKiqK4447LpYtW5an1QIAAKSjsL8XcCioq6uLefPmxeLFi+P000+Pe++9N2bMmBHr1q2Lo48+us9tLrroonj33Xdj6dKl8cUvfjG2bNkS27dvz/PKAQAABr6CLMuy/l7EwW7q1KkxefLkWLJkSffYxIkTY+bMmVFbW9tr/tNPPx0XX3xxbNiwIYYPH75P37OzszNKS0ujo6MjSkpK9nntAADAZ3P93b+8FDnHtm3bFo2NjVFVVdVjvKqqKlatWtXnNitWrIgpU6bE9773vTjqqKPihBNOiGuvvTZ+9atf5WPJAAAASfFS5Bxra2uLHTt2RFlZWY/xsrKyaG1t7XObDRs2xIsvvhjFxcXx5JNPRltbW1x11VXx3nvv7fZ9tl1dXdHV1dX9vLOz88DtBAAAwADmjm2eFBQU9HieZVmvsV127twZBQUFsXz58jj11FPjvPPOi9tvvz0efPDB3d61ra2tjdLS0u7H+PHjD/g+AAAADETCNsdGjhwZgwcP7nV3dsuWLb3u4u4yZsyYOOqoo6K0tLR7bOLEiZFlWWzevLnPbRYsWBAdHR3dj02bNh24nQAAABjAhG2ODR06NCoqKqKhoaHHeENDQ1RWVva5zemnnx7vvPNOfPDBB91jb7zxRgwaNCjGjRvX5zZFRUVRUlLS4wEAAHAoELZ5UFNTE/fff38sW7Ys1q9fH/Pnz4/m5uaorq6OiE/uts6ePbt7/iWXXBIjRoyIK664ItatWxcvvPBCXHfddfEnf/InMWzYsP7aDQAAgAHJL4/Kg1mzZkV7e3ssWrQoWlpaYtKkSVFfXx8TJkyIiIiWlpZobm7unv+5z30uGhoa4tvf/nZMmTIlRowYERdddFHcdttt/bULAAAAA5bPsT1I+RwtAADIH9ff/ctLkQEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZs82Tx4sVRXl4excXFUVFREStXrtyj7V566aUoLCyMU045JbcLBAAASJSwzYO6urqYN29eLFy4MJqammL69OkxY8aMaG5u/tTtOjo6Yvbs2fG1r30tTysFAABIT0GWZVl/L+JgN3Xq1Jg8eXIsWbKke2zixIkxc+bMqK2t3e12F198cRx//PExePDgeOqpp2Lt2rV7/D07OzujtLQ0Ojo6oqSkZH+WDwAAfAbX3/3LHdsc27ZtWzQ2NkZVVVWP8aqqqli1atVut3vggQfizTffjJtvvnmPvk9XV1d0dnb2eAAAABwKhG2OtbW1xY4dO6KsrKzHeFlZWbS2tva5zc9//vO44YYbYvny5VFYWLhH36e2tjZKS0u7H+PHj9/vtQMAAKRA2OZJQUFBj+dZlvUai4jYsWNHXHLJJXHrrbfGCSecsMdff8GCBdHR0dH92LRp036vGQAAIAV7djuQfTZy5MgYPHhwr7uzW7Zs6XUXNyJi69atsWbNmmhqaoprrrkmIiJ27twZWZZFYWFhPPvss3HWWWf12q6oqCiKiopysxMAAAADmDu2OTZ06NCoqKiIhoaGHuMNDQ1RWVnZa35JSUm89tprsXbt2u5HdXV1fOlLX4q1a9fG1KlT87V0AACAJLhjmwc1NTVx6aWXxpQpU2LatGlx3333RXNzc1RXV0fEJy8jfvvtt+Ohhx6KQYMGxaRJk3psP2rUqCguLu41DgAAgLDNi1mzZkV7e3ssWrQoWlpaYtKkSVFfXx8TJkyIiIiWlpbP/ExbAAAA+uZzbA9SPkcLAADyx/V3//IeWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacI2TxYvXhzl5eVRXFwcFRUVsXLlyt3OfeKJJ+Kcc86JL3zhC1FSUhLTpk2LZ555Jo+rBQAASIewzYO6urqYN29eLFy4MJqammL69OkxY8aMaG5u7nP+Cy+8EOecc07U19dHY2NjnHnmmXHBBRdEU1NTnlcOAAAw8BVkWZb19yIOdlOnTo3JkyfHkiVLuscmTpwYM2fOjNra2j36Gl/5yldi1qxZcdNNN+3R/M7OzigtLY2Ojo4oKSnZp3UDAAB7xvV3/3LHNse2bdsWjY2NUVVV1WO8qqoqVq1atUdfY+fOnbF169YYPnx4LpYIAACQtML+XsDBrq2tLXbs2BFlZWU9xsvKyqK1tXWPvsb3v//9+PDDD+Oiiy7a7Zyurq7o6urqft7Z2blvCwYAAEiMO7Z5UlBQ0ON5lmW9xvryyCOPxC233BJ1dXUxatSo3c6rra2N0tLS7sf48eP3e80AAAApELY5NnLkyBg8eHCvu7NbtmzpdRf3t9XV1cWcOXPib/7mb+Lss8/+1LkLFiyIjo6O7semTZv2e+0AAAApELY5NnTo0KioqIiGhoYe4w0NDVFZWbnb7R555JG4/PLL4+GHH47zzz//M79PUVFRlJSU9HgAAAAcCrzHNg9qamri0ksvjSlTpsS0adPivvvui+bm5qiuro6IT+62vv322/HQQw9FxCdRO3v27PjhD38Yp512Wvfd3mHDhkVpaWm/7QcAAMBAJGzzYNasWdHe3h6LFi2KlpaWmDRpUtTX18eECRMiIqKlpaXHZ9ree++9sX379rj66qvj6quv7h6/7LLL4sEHH8z38gEAAAY0n2N7kPI5WgAAkD+uv/uX99gCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2ObJ4sWLo7y8PIqLi6OioiJWrlz5qfOff/75qKioiOLi4jj22GPjnnvuydNKAQAA0iJs86Curi7mzZsXCxcujKamppg+fXrMmDEjmpub+5y/cePGOO+882L69OnR1NQUN954Y8ydOzcef/zxPK8cAABg4CvIsizr70Uc7KZOnRqTJ0+OJUuWdI9NnDgxZs6cGbW1tb3mX3/99bFixYpYv35991h1dXW8+uqrsXr16j36np2dnVFaWhodHR1RUlKy/zsBAADsluvv/lXY3ws42G3bti0aGxvjhhtu6DFeVVUVq1at6nOb1atXR1VVVY+xc889N5YuXRoff/xxDBkypNc2XV1d0dXV1f28o6MjIj75CwYAAOTWrutu9w37h7DNsba2ttixY0eUlZX1GC8rK4vW1tY+t2ltbe1z/vbt26OtrS3GjBnTa5va2tq49dZbe42PHz9+P1YPAADsjfb29igtLe3vZRxyhG2eFBQU9HieZVmvsc+a39f4LgsWLIiampru5++//35MmDAhmpub/cViv3R2dsb48eNj06ZNXlbDfnEucSA5nzhQnEscKB0dHXH00UfH8OHD+3sphyRhm2MjR46MwYMH97o7u2XLll53ZXcZPXp0n/MLCwtjxIgRfW5TVFQURUVFvcZLS0v9kOaAKCkpcS5xQDiXOJCcTxwoziUOlEGD/H7e/uCo59jQoUOjoqIiGhoaeow3NDREZWVln9tMmzat1/xnn302pkyZ0uf7awEAAA5lwjYPampq4v77749ly5bF+vXrY/78+dHc3BzV1dUR8cnLiGfPnt09v7q6Ot56662oqamJ9evXx7Jly2Lp0qVx7bXX9tcuAAAADFheipwHs2bNivb29li0aFG0tLTEpEmTor6+PiZMmBARES0tLT0+07a8vDzq6+tj/vz5cffdd8fYsWPjjjvuiAsvvHCPv2dRUVHcfPPNfb48GfaGc4kDxbnEgeR84kBxLnGgOJf6l8+xBQAAIGleigwAAEDShC0AAABJE7YAAAAkTdgCAACQNGGbsMWLF0d5eXkUFxdHRUVFrFy58lPnP//881FRURHFxcVx7LHHxj333JOnlTLQ7c259MQTT8Q555wTX/jCF6KkpCSmTZsWzzzzTB5Xy0C2tz+XdnnppZeisLAwTjnllNwukGTs7bnU1dUVCxcujAkTJkRRUVEcd9xxsWzZsjytloFub8+n5cuXx8knnxyHHXZYjBkzJq644opob2/P02oZqF544YW44IILYuzYsVFQUBBPPfXUZ27j+jt/hG2i6urqYt68ebFw4cJoamqK6dOnx4wZM3p8bNBv2rhxY5x33nkxffr0aGpqihtvvDHmzp0bjz/+eJ5XzkCzt+fSCy+8EOecc07U19dHY2NjnHnmmXHBBRdEU1NTnlfOQLO359IuHR0dMXv27Pja176Wp5Uy0O3LuXTRRRfFP/3TP8XSpUvjZz/7WTzyyCNx4okn5nHVDFR7ez69+OKLMXv27JgzZ068/vrr8eijj8Yrr7wSV155ZZ5XzkDz4Ycfxsknnxx33XXXHs13/Z1nGUk69dRTs+rq6h5jJ554YnbDDTf0Of9//a//lZ144ok9xr71rW9lp512Ws7WSBr29lzqy5e//OXs1ltvPdBLIzH7ei7NmjUr+7M/+7Ps5ptvzk4++eQcrpBU7O259A//8A9ZaWlp1t7eno/lkZi9PZ/+8i//Mjv22GN7jN1xxx3ZuHHjcrZG0hMR2ZNPPvmpc1x/55c7tgnatm1bNDY2RlVVVY/xqqqqWLVqVZ/brF69utf8c889N9asWRMff/xxztbKwLYv59Jv27lzZ2zdujWGDx+eiyWSiH09lx544IF488034+abb871EknEvpxLK1asiClTpsT3vve9OOqoo+KEE06Ia6+9Nn71q1/lY8kMYPtyPlVWVsbmzZujvr4+siyLd999Nx577LE4//zz87FkDiKuv/OrsL8XwN5ra2uLHTt2RFlZWY/xsrKyaG1t7XOb1tbWPudv37492traYsyYMTlbLwPXvpxLv+373/9+fPjhh3HRRRflYokkYl/OpZ///Odxww03xMqVK6Ow0P+O+MS+nEsbNmyIF198MYqLi+PJJ5+Mtra2uOqqq+K9997zPttD3L6cT5WVlbF8+fKYNWtW/PrXv47t27fH17/+9bjzzjvzsWQOIq6/88sd24QVFBT0eJ5lWa+xz5rf1ziHnr09l3Z55JFH4pZbbom6uroYNWpUrpZHQvb0XNqxY0dccsklceutt8YJJ5yQr+WRkL35ubRz584oKCiI5cuXx6mnnhrnnXde3H777fHggw+6a0tE7N35tG7dupg7d27cdNNN0djYGE8//XRs3Lgxqqur87FUDjKuv/PHP5EnaOTIkTF48OBe/9K4ZcuWXv8qtMvo0aP7nF9YWBgjRozI2VoZ2PblXNqlrq4u5syZE48++micffbZuVwmCdjbc2nr1q2xZs2aaGpqimuuuSYiPomTLMuisLAwnn322TjrrLPysnYGln35uTRmzJg46qijorS0tHts4sSJkWVZbN68OY4//vicrpmBa1/Op9ra2jj99NPjuuuui4iIk046KQ4//PCYPn163Hbbbe6yscdcf+eXO7YJGjp0aFRUVERDQ0OP8YaGhqisrOxzm2nTpvWa/+yzz8aUKVNiyJAhOVsrA9u+nEsRn9ypvfzyy+Phhx/2niMiYu/PpZKSknjttddi7dq13Y/q6ur40pe+FGvXro2pU6fma+kMMPvyc+n000+Pd955Jz744IPusTfeeCMGDRoU48aNy+l6Gdj25Xz66KOPYtCgnpfIgwcPjoj/vNsGe8L1d5710y+tYj/96Ec/yoYMGZItXbo0W7duXTZv3rzs8MMPz37xi19kWZZlN9xwQ3bppZd2z9+wYUN22GGHZfPnz8/WrVuXLV26NBsyZEj22GOP9dcuMEDs7bn08MMPZ4WFhdndd9+dtbS0dD/ef//9/toFBoi9PZd+m9+KzC57ey5t3bo1GzduXPZHf/RH2euvv549//zz2fHHH59deeWV/bULDCB7ez498MADWWFhYbZ48eLszTffzF588cVsypQp2amnntpfu8AAsXXr1qypqSlramrKIiK7/fbbs6ampuytt97Kssz1d38Ttgm7++67swkTJmRDhw7NJk+enD3//PPdf3bZZZdlZ5xxRo/5P/7xj7Pf+Z3fyYYOHZodc8wx2ZIlS/K8YgaqvTmXzjjjjCwiej0uu+yy/C+cAWdvfy79JmHLb9rbc2n9+vXZ2WefnQ0bNiwbN25cVlNTk3300Ud5XjUD1d6eT3fccUf25S9/ORs2bFg2ZsyY7Jvf/Ga2efPmPK+agea555771Gsg19/9qyDLvKYCAACAdHmPLQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACTt/wcLAF7/XlacegAAAABJRU5ErkJggg==", + "text/html": [ + "\n", + "
\n", + "
\n", + " Figure\n", + "
\n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'cloudfraction'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'cloudfraction'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/SFCCLDGRID/.ipynb_checkpoints/sfccldgrid2longcaracena.c1-checkpoint.ipynb b/VAPs/quicklook/SFCCLDGRID/.ipynb_checkpoints/sfccldgrid2longcaracena.c1-checkpoint.ipynb new file mode 100644 index 00000000..d7d3c877 --- /dev/null +++ b/VAPs/quicklook/SFCCLDGRID/.ipynb_checkpoints/sfccldgrid2longcaracena.c1-checkpoint.ipynb @@ -0,0 +1,5150 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# SFCCLDGRID2LONGCARACENA.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/sfccldgrid) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'sfccldgrid2longcaracena'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2020-06-01', 'facility': 'N1', 'site': 'sgp', 'start_date': '2011-10-21'}]" + ] + }, + { + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ac6764f5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The following locations and date ranges are available for this VAP:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sitefacilitystart_dateend_date
0sgpN12011-10-212020-06-01
\n", + "
" + ], + "text/plain": [ + " site facility start_date end_date\n", + "0 sgp N1 2011-10-21 2020-06-01" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'N1' )\n", + "\n", + "date_start = '2020-05-29'\n", + "date_end = '2020-05-31'" + ] + }, + { + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/data/archive/sgp/sgpsfccldgrid2longcaracenaN1.c1'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['20200529', '20200530', '20200531']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "51feea2e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['/data/archive/sgp/sgpsfccldgrid2longcaracenaN1.c1/sgpsfccldgrid2longcaracenaN1.c1.20200529.060000.nc',\n", + " '/data/archive/sgp/sgpsfccldgrid2longcaracenaN1.c1/sgpsfccldgrid2longcaracenaN1.c1.20200530.060000.nc',\n", + " '/data/archive/sgp/sgpsfccldgrid2longcaracenaN1.c1/sgpsfccldgrid2longcaracenaN1.c1.20200531.060000.nc']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3 files loaded\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:                                                         (time: 288,\n",
+       "                                                                     bound: 2,\n",
+       "                                                                     lat: 8,\n",
+       "                                                                     lon: 11)\n",
+       "Coordinates:\n",
+       "  * time                                                            (time) datetime64[ns] ...\n",
+       "  * lat                                                             (lat) float32 ...\n",
+       "  * lon                                                             (lon) float32 ...\n",
+       "Dimensions without coordinates: bound\n",
+       "Data variables: (12/59)\n",
+       "    base_time                                                       (time) datetime64[ns] ...\n",
+       "    time_offset                                                     (time) datetime64[ns] ...\n",
+       "    time_bounds                                                     (time, bound) object dask.array<chunksize=(96, 2), meta=np.ndarray>\n",
+       "    downwelling_shortwave                                           (time, lat, lon) float32 dask.array<chunksize=(96, 8, 11), meta=np.ndarray>\n",
+       "    source_central_facility_downwelling_shortwave                   (time) int32 dask.array<chunksize=(96,), meta=np.ndarray>\n",
+       "    qc_downwelling_shortwave                                        (time, lat, lon) int32 dask.array<chunksize=(96, 8, 11), meta=np.ndarray>\n",
+       "    ...                                                              ...\n",
+       "    qc_visible_cloud_optical_depth                                  (time, lat, lon) int32 dask.array<chunksize=(96, 8, 11), meta=np.ndarray>\n",
+       "    cloud_radiating_temperature                                     (time, lat, lon) float32 dask.array<chunksize=(96, 8, 11), meta=np.ndarray>\n",
+       "    source_central_facility_cloud_radiating_temperature             (time) int32 dask.array<chunksize=(96,), meta=np.ndarray>\n",
+       "    qc_cloud_radiating_temperature                                  (time, lat, lon) int32 dask.array<chunksize=(96, 8, 11), meta=np.ndarray>\n",
+       "    azimuth                                                         (time) float32 dask.array<chunksize=(96,), meta=np.ndarray>\n",
+       "    alt                                                             (time, lat, lon) float32 dask.array<chunksize=(96, 8, 11), meta=np.ndarray>\n",
+       "Attributes: (12/17)\n",
+       "    command_line:          sfccldgrid2long_caracena -s sgp -f N1 -b 20171001 ...\n",
+       "    Conventions:           ARM-1.3\n",
+       "    process_version:       vap-sfccldgrid2long_caracena-1.4-0.el7\n",
+       "    dod_version:           sfccldgrid2longcaracena-c1-1.2\n",
+       "    input_datastreams:     sgpsfccldgrid2longstationN1.c1 : 1.4 : 20200529.06...\n",
+       "    site_id:               sgp\n",
+       "    ...                    ...\n",
+       "    doi:                   10.5439/1393588\n",
+       "    history:               created by user gaustad on machine agate at 2022-0...\n",
+       "    _file_dates:           ['20200529', '20200530', '20200531']\n",
+       "    _file_times:           ['060000', '060000', '060000']\n",
+       "    _datastream:           sgpsfccldgrid2longcaracenaN1.c1\n",
+       "    _arm_standards_flag:   1
" + ], + "text/plain": [ + "\n", + "Dimensions: (time: 288,\n", + " bound: 2,\n", + " lat: 8,\n", + " lon: 11)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] ...\n", + " * lat (lat) float32 ...\n", + " * lon (lon) float32 ...\n", + "Dimensions without coordinates: bound\n", + "Data variables: (12/59)\n", + " base_time (time) datetime64[ns] ...\n", + " time_offset (time) datetime64[ns] ...\n", + " time_bounds (time, bound) object dask.array\n", + " downwelling_shortwave (time, lat, lon) float32 dask.array\n", + " source_central_facility_downwelling_shortwave (time) int32 dask.array\n", + " qc_downwelling_shortwave (time, lat, lon) int32 dask.array\n", + " ... ...\n", + " qc_visible_cloud_optical_depth (time, lat, lon) int32 dask.array\n", + " cloud_radiating_temperature (time, lat, lon) float32 dask.array\n", + " source_central_facility_cloud_radiating_temperature (time) int32 dask.array\n", + " qc_cloud_radiating_temperature (time, lat, lon) int32 dask.array\n", + " azimuth (time) float32 dask.array\n", + " alt (time, lat, lon) float32 dask.array\n", + "Attributes: (12/17)\n", + " command_line: sfccldgrid2long_caracena -s sgp -f N1 -b 20171001 ...\n", + " Conventions: ARM-1.3\n", + " process_version: vap-sfccldgrid2long_caracena-1.4-0.el7\n", + " dod_version: sfccldgrid2longcaracena-c1-1.2\n", + " input_datastreams: sgpsfccldgrid2longstationN1.c1 : 1.4 : 20200529.06...\n", + " site_id: sgp\n", + " ... ...\n", + " doi: 10.5439/1393588\n", + " history: created by user gaustad on machine agate at 2022-0...\n", + " _file_dates: ['20200529', '20200530', '20200531']\n", + " _file_times: ['060000', '060000', '060000']\n", + " _datastream: sgpsfccldgrid2longcaracenaN1.c1\n", + " _arm_standards_flag: 1" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['downwelling_shortwave', 'source_central_facility_downwelling_shortwave', 'clearsky_downwelling_shortwave']" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "Dimensions of C (11, 8, 288) should be one smaller than X(288) and Y(8) while using shading='flat' see help(pcolormesh)", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[9], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m ts_display\u001b[38;5;241m.\u001b[39madd_subplots((\u001b[38;5;28mlen\u001b[39m(variables_to_plot),), figsize \u001b[38;5;241m=\u001b[39m (\u001b[38;5;241m9.5\u001b[39m,\u001b[38;5;241m4\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mlen\u001b[39m(variables_to_plot)))\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i,v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(variables_to_plot):\n\u001b[0;32m----> 5\u001b[0m ts_ax \u001b[38;5;241m=\u001b[39m \u001b[43mts_display\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mplot\u001b[49m\u001b[43m(\u001b[49m\u001b[43mv\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msubplot_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mi\u001b[49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mset_title\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mds\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvariables\u001b[49m\u001b[43m[\u001b[49m\u001b[43mv\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mattrs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mlong_name\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6\u001b[0m ts_ax\u001b[38;5;241m.\u001b[39mgrid()\n\u001b[1;32m 8\u001b[0m plt\u001b[38;5;241m.\u001b[39mshow()\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/plotting/timeseriesdisplay.py:588\u001b[0m, in \u001b[0;36mTimeSeriesDisplay.plot\u001b[0;34m(self, field, dsname, subplot_index, cmap, set_title, add_nan, day_night_background, invert_y_axis, abs_limits, time_rng, y_rng, use_var_for_y, set_shading, assessment_overplot, overplot_marker, overplot_behind, overplot_markersize, assessment_overplot_category, assessment_overplot_category_color, force_line_plot, labels, cbar_label, cbar_h_adjust, secondary_y, y_axis_flag_meanings, colorbar_labels, cb_friendly, **kwargs)\u001b[0m\n\u001b[1;32m 586\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 587\u001b[0m kwargs[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mface\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m--> 588\u001b[0m mesh \u001b[38;5;241m=\u001b[39m \u001b[43max\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpcolormesh\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 589\u001b[0m \u001b[43m \u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43masarray\u001b[49m\u001b[43m(\u001b[49m\u001b[43mxdata\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 590\u001b[0m \u001b[43m \u001b[49m\u001b[43mydata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 591\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtranspose\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 592\u001b[0m \u001b[43m \u001b[49m\u001b[43mshading\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mset_shading\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 593\u001b[0m \u001b[43m \u001b[49m\u001b[43mcmap\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcmap\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 594\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 595\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 597\u001b[0m \u001b[38;5;66;03m# Set Title\u001b[39;00m\n\u001b[1;32m 598\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m set_title \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/__init__.py:1442\u001b[0m, in \u001b[0;36m_preprocess_data..inner\u001b[0;34m(ax, data, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1439\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 1440\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minner\u001b[39m(ax, \u001b[38;5;241m*\u001b[39margs, data\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 1441\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m data \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1442\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43max\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mmap\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43msanitize_sequence\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1444\u001b[0m bound \u001b[38;5;241m=\u001b[39m new_sig\u001b[38;5;241m.\u001b[39mbind(ax, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 1445\u001b[0m auto_label \u001b[38;5;241m=\u001b[39m (bound\u001b[38;5;241m.\u001b[39marguments\u001b[38;5;241m.\u001b[39mget(label_namer)\n\u001b[1;32m 1446\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m bound\u001b[38;5;241m.\u001b[39mkwargs\u001b[38;5;241m.\u001b[39mget(label_namer))\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/axes/_axes.py:6220\u001b[0m, in \u001b[0;36mAxes.pcolormesh\u001b[0;34m(self, alpha, norm, cmap, vmin, vmax, shading, antialiased, *args, **kwargs)\u001b[0m\n\u001b[1;32m 6217\u001b[0m shading \u001b[38;5;241m=\u001b[39m shading\u001b[38;5;241m.\u001b[39mlower()\n\u001b[1;32m 6218\u001b[0m kwargs\u001b[38;5;241m.\u001b[39msetdefault(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnone\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m-> 6220\u001b[0m X, Y, C, shading \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_pcolorargs\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mpcolormesh\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6221\u001b[0m \u001b[43m \u001b[49m\u001b[43mshading\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mshading\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6222\u001b[0m coords \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mstack([X, Y], axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m 6223\u001b[0m \u001b[38;5;66;03m# convert to one dimensional array, except for 3D RGB(A) arrays\u001b[39;00m\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/axes/_axes.py:5751\u001b[0m, in \u001b[0;36mAxes._pcolorargs\u001b[0;34m(self, funcname, shading, *args, **kwargs)\u001b[0m\n\u001b[1;32m 5749\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m shading \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mflat\u001b[39m\u001b[38;5;124m'\u001b[39m:\n\u001b[1;32m 5750\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (Nx, Ny) \u001b[38;5;241m!=\u001b[39m (ncols \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m, nrows \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m):\n\u001b[0;32m-> 5751\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDimensions of C \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mC\u001b[38;5;241m.\u001b[39mshape\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m should\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 5752\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m be one smaller than X(\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mNx\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m) and Y(\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mNy\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 5753\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m while using shading=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mflat\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 5754\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m see help(\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfuncname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 5755\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m: \u001b[38;5;66;03m# ['nearest', 'gouraud']:\u001b[39;00m\n\u001b[1;32m 5756\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (Nx, Ny) \u001b[38;5;241m!=\u001b[39m (ncols, nrows):\n", + "\u001b[0;31mTypeError\u001b[0m: Dimensions of C (11, 8, 288) should be one smaller than X(288) and Y(8) while using shading='flat' see help(pcolormesh)" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "dd49c62706534d8988ce7d6a5c25f646", + "version_major": 2, + "version_minor": 0 + }, + "image/png": "", + "text/html": [ + "\n", + "
\n", + "
\n", + " Figure\n", + "
\n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'downwelling_shortwave'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'downwelling_shortwave'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/SFCCLDGRID/15swfcldgrid1long.c1.ipynb b/VAPs/quicklook/SFCCLDGRID/15swfcldgrid1long.c1.ipynb new file mode 100644 index 00000000..5df1fe58 --- /dev/null +++ b/VAPs/quicklook/SFCCLDGRID/15swfcldgrid1long.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# 15SWFCLDGRID1LONG.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/sfccldgrid) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = '15swfcldgrid1long'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2009-11-25', 'facility': 'N1', 'site': 'sgp', 'start_date': '1997-01-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'N1' )\n", + "\n", + "date_start = '2009-11-23'\n", + "date_end = '2009-11-25'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['cloudfraction', 'cf_cloudfraction', 'tswfluxdn']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'cloudfraction'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'cloudfraction'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/SFCCLDGRID/SFCCLDGRID_tutorial.ipynb b/VAPs/quicklook/SFCCLDGRID/SFCCLDGRID_tutorial.ipynb new file mode 100644 index 00000000..e04e7387 --- /dev/null +++ b/VAPs/quicklook/SFCCLDGRID/SFCCLDGRID_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# 15SWFCLDGRID1LONG.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/sfccldgrid) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using 15swfcldgrid1long as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `15swfcldgrid1long.c1`, where `15swfcldgrid1long` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `sgp` and facility `N1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/sgp/sgp15swfcldgrid1longN1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"15swfcldgrid1long\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"sgp\"\n", + "facility = \"N1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/SFCCLDGRID/sfccldgrid2longcaracena.c1.ipynb b/VAPs/quicklook/SFCCLDGRID/sfccldgrid2longcaracena.c1.ipynb new file mode 100644 index 00000000..c3b7dceb --- /dev/null +++ b/VAPs/quicklook/SFCCLDGRID/sfccldgrid2longcaracena.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# SFCCLDGRID2LONGCARACENA.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/sfccldgrid) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'sfccldgrid2longcaracena'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2020-06-01', 'facility': 'N1', 'site': 'sgp', 'start_date': '2011-10-21'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'N1' )\n", + "\n", + "date_start = '2020-05-29'\n", + "date_end = '2020-05-31'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['downwelling_shortwave', 'source_central_facility_downwelling_shortwave', 'clearsky_downwelling_shortwave']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'downwelling_shortwave'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'downwelling_shortwave'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/SFCCLDGRID/sfccldgrid2longstation.c1.ipynb b/VAPs/quicklook/SFCCLDGRID/sfccldgrid2longstation.c1.ipynb new file mode 100644 index 00000000..bf1a8b6b --- /dev/null +++ b/VAPs/quicklook/SFCCLDGRID/sfccldgrid2longstation.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# SFCCLDGRID2LONGSTATION.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/sfccldgrid) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'sfccldgrid2longstation'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2020-06-01', 'facility': 'N1', 'site': 'sgp', 'start_date': '2009-01-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'N1' )\n", + "\n", + "date_start = '2020-05-29'\n", + "date_end = '2020-05-31'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['swdn', 'cswdn', 'lwdn']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'swdn'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'swdn'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/SHALLOWCUMULUS/SHALLOWCUMULUS_tutorial.ipynb b/VAPs/quicklook/SHALLOWCUMULUS/SHALLOWCUMULUS_tutorial.ipynb new file mode 100644 index 00000000..01eddf79 --- /dev/null +++ b/VAPs/quicklook/SHALLOWCUMULUS/SHALLOWCUMULUS_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# SHALLOWCUMULUS.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/shallowcumulus) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using shallowcumulus as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `shallowcumulus.c1`, where `shallowcumulus` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `sgp` and facility `C1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/sgp/sgpshallowcumulusC1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"shallowcumulus\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"sgp\"\n", + "facility = \"C1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/SHALLOWCUMULUS/shallowcumulus.c1.ipynb b/VAPs/quicklook/SHALLOWCUMULUS/shallowcumulus.c1.ipynb new file mode 100644 index 00000000..e060d9dc --- /dev/null +++ b/VAPs/quicklook/SHALLOWCUMULUS/shallowcumulus.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# SHALLOWCUMULUS.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/shallowcumulus) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'shallowcumulus'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2022-07-29', 'facility': 'C1', 'site': 'sgp', 'start_date': '2000-07-02'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2022-07-27'\n", + "date_end = '2022-07-29'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['flag_shcu', 'flag_st_shcu', 'flag_shcu_st']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'flag_shcu'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/SHALLOWCUMULUS/shcusummary.c1.ipynb b/VAPs/quicklook/SHALLOWCUMULUS/shcusummary.c1.ipynb new file mode 100644 index 00000000..f22c2770 --- /dev/null +++ b/VAPs/quicklook/SHALLOWCUMULUS/shcusummary.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# SHCUSUMMARY.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/shallowcumulus) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'shcusummary'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2023-05-29', 'facility': 'C1', 'site': 'sgp', 'start_date': '2000-07-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2023-05-26'\n", + "date_end = '2023-05-28'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['shallowcumulus_event']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'shallowcumulus_event'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/SO2-AIR/SO2-AIR_tutorial.ipynb b/VAPs/quicklook/SO2-AIR/SO2-AIR_tutorial.ipynb new file mode 100644 index 00000000..7f0d5340 --- /dev/null +++ b/VAPs/quicklook/SO2-AIR/SO2-AIR_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AAFSO2.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/so2-air) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using aafso2 as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `aafso2.c1`, where `aafso2` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `cor` and facility `F1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/cor/coraafso2F1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"aafso2\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"cor\"\n", + "facility = \"F1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/SO2-AIR/aafso2.c1.ipynb b/VAPs/quicklook/SO2-AIR/aafso2.c1.ipynb new file mode 100644 index 00000000..64bb0b4c --- /dev/null +++ b/VAPs/quicklook/SO2-AIR/aafso2.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AAFSO2.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/so2-air) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'aafso2'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2018-12-08', 'facility': 'F1', 'site': 'cor', 'start_date': '2018-11-04'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'cor', 'F1' )\n", + "\n", + "date_start = '2018-12-06'\n", + "date_end = '2018-12-08'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['so2']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'so2'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/SONDEADJUST/.ipynb_checkpoints/SONDEADJUST_tutorial-checkpoint.ipynb b/VAPs/quicklook/SONDEADJUST/.ipynb_checkpoints/SONDEADJUST_tutorial-checkpoint.ipynb new file mode 100644 index 00000000..698be192 --- /dev/null +++ b/VAPs/quicklook/SONDEADJUST/.ipynb_checkpoints/SONDEADJUST_tutorial-checkpoint.ipynb @@ -0,0 +1,4058 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# SONDEADJUST.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/sondeadjust) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using sondeadjust as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `sondeadjust.c1`, where `sondeadjust` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `nsa` and facility `C1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/nsa/nsasondeadjustC1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "586993fd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/data/archive/nsa/nsasondeadjustC1.c1\n", + "True\n" + ] + } + ], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"sondeadjust\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"nsa\"\n", + "facility = \"C1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "0742f7c1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['nsasondeadjustC1.c1.20110710.054600.cdf',\n", + " 'nsasondeadjustC1.c1.20090319.051600.cdf',\n", + " 'nsasondeadjustC1.c1.20080508.052600.cdf',\n", + " 'nsasondeadjustC1.c1.20070816.181100.cdf',\n", + " 'nsasondeadjustC1.c1.20110923.174200.cdf']" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "39b98a36", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20080601.053500.cdf'" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "902d514e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['nsasondeadjustC1.c1.20020428.184800.cdf', 'nsasondeadjustC1.c1.20020428.220500.cdf', 'nsasondeadjustC1.c1.20020428.235900.cdf', 'nsasondeadjustC1.c1.20020429.013100.cdf', 'nsasondeadjustC1.c1.20020429.182500.cdf']\n", + "['nsasondeadjustC1.c1.20120716.173000.cdf', 'nsasondeadjustC1.c1.20120716.053000.cdf', 'nsasondeadjustC1.c1.20120715.214900.cdf', 'nsasondeadjustC1.c1.20120715.173000.cdf', 'nsasondeadjustC1.c1.20120715.053000.cdf']\n" + ] + } + ], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "ec5923b2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070921.173300.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070929.190700.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070925.201400.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070928.174800.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070930.172800.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070905.052400.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070927.052700.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070920.165900.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070929.053300.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070909.053200.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070904.204700.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070913.210100.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070925.052400.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070907.173500.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070919.053400.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070901.172500.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070919.174900.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070903.210800.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070913.180600.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070914.204200.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070916.052500.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070917.213100.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070904.052100.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070915.172400.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070923.172500.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070903.173300.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070928.205300.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070905.174000.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070926.213000.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070918.052700.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070914.173800.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070914.053000.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070918.205800.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070911.052500.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070917.180300.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070904.174000.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070901.052400.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070918.181600.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070911.174500.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070907.053800.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070910.213600.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070902.172900.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070924.173400.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070910.052900.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070906.220400.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070920.053400.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070926.173600.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070906.053200.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070923.052600.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070911.221800.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070917.052500.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070927.180200.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070907.213000.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070910.173600.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070912.212300.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070912.052600.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070922.172900.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070925.180300.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070909.172700.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070921.052700.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070928.053300.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070903.052400.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070902.052300.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070930.052600.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070912.173700.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070915.052900.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070927.211200.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070916.173200.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070913.053500.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070922.061100.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070908.052500.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070905.210700.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070906.175700.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070908.175400.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070924.052900.cdf',\n", + " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070922.053900.cdf']" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "a440a329", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20080601.053500.cdf\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:       (time: 2975)\n",
+       "Coordinates:\n",
+       "  * time          (time) datetime64[ns] 2008-06-01T05:35:00 ... 2008-06-01T07...\n",
+       "Data variables: (12/35)\n",
+       "    base_time     datetime64[ns] ...\n",
+       "    time_offset   (time) datetime64[ns] ...\n",
+       "    qc_time       (time) int32 ...\n",
+       "    pres          (time) float32 ...\n",
+       "    qc_pres       (time) int32 ...\n",
+       "    tdry          (time) float32 ...\n",
+       "    ...            ...\n",
+       "    qc_rh_scaled  (time) int32 ...\n",
+       "    dp_scaled     (time) float32 ...\n",
+       "    qc_dp_scaled  (time) int32 ...\n",
+       "    lat           (time) float32 ...\n",
+       "    lon           (time) float32 ...\n",
+       "    alt           (time) float32 ...\n",
+       "Attributes: (12/16)\n",
+       "    process_version:                $State: vap-sonde_adjust-8.0-0.sol5_10$\n",
+       "    command_line:                   sonde_adjust -d 20080601 -f nsaC1 -a 0\n",
+       "    site_id:                        nsa\n",
+       "    facility_id:                    C1: Barrow, Alaska\n",
+       "    reference1:                     Wang et.al. 2002. "Corrections of Humidit...\n",
+       "    reference2:                     Miloshevich et.al. 2004. "Development and...\n",
+       "    ...                             ...\n",
+       "    station_elevation:              8 m MSL\n",
+       "    input_datastreams_description:  A string consisting of the datastream(s),...\n",
+       "    input_datastreams_num:          6\n",
+       "    input_datastreams:              nsasondewnpnC1.b1 : 5.190000 : 20080601.0...\n",
+       "    zeb_platform:                   nsasondeadjustC1.c1\n",
+       "    history:                        created by user gervais on machine emeral...
" + ], + "text/plain": [ + "\n", + "Dimensions: (time: 2975)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 2008-06-01T05:35:00 ... 2008-06-01T07...\n", + "Data variables: (12/35)\n", + " base_time datetime64[ns] ...\n", + " time_offset (time) datetime64[ns] ...\n", + " qc_time (time) int32 ...\n", + " pres (time) float32 ...\n", + " qc_pres (time) int32 ...\n", + " tdry (time) float32 ...\n", + " ... ...\n", + " qc_rh_scaled (time) int32 ...\n", + " dp_scaled (time) float32 ...\n", + " qc_dp_scaled (time) int32 ...\n", + " lat (time) float32 ...\n", + " lon (time) float32 ...\n", + " alt (time) float32 ...\n", + "Attributes: (12/16)\n", + " process_version: $State: vap-sonde_adjust-8.0-0.sol5_10$\n", + " command_line: sonde_adjust -d 20080601 -f nsaC1 -a 0\n", + " site_id: nsa\n", + " facility_id: C1: Barrow, Alaska\n", + " reference1: Wang et.al. 2002. \"Corrections of Humidit...\n", + " reference2: Miloshevich et.al. 2004. \"Development and...\n", + " ... ...\n", + " station_elevation: 8 m MSL\n", + " input_datastreams_description: A string consisting of the datastream(s),...\n", + " input_datastreams_num: 6\n", + " input_datastreams: nsasondewnpnC1.b1 : 5.190000 : 20080601.0...\n", + " zeb_platform: nsasondeadjustC1.c1\n", + " history: created by user gervais on machine emeral..." + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "b0143a3d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Dimensions: (time: 2975)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 2008-06-01T05:35:00 ... 2008-06-01T07...\n", + "Data variables: (12/35)\n", + " base_time datetime64[ns] ...\n", + " time_offset (time) datetime64[ns] ...\n", + " qc_time (time) int32 ...\n", + " pres (time) float32 ...\n", + " qc_pres (time) int32 ...\n", + " tdry (time) float32 ...\n", + " ... ...\n", + " qc_rh_scaled (time) int32 ...\n", + " dp_scaled (time) float32 ...\n", + " qc_dp_scaled (time) int32 ...\n", + " lat (time) float32 ...\n", + " lon (time) float32 ...\n", + " alt (time) float32 ...\n", + "Attributes: (12/16)\n", + " process_version: $State: vap-sonde_adjust-8.0-0.sol5_10$\n", + " command_line: sonde_adjust -d 20080601 -f nsaC1 -a 0\n", + " site_id: nsa\n", + " facility_id: C1: Barrow, Alaska\n", + " reference1: Wang et.al. 2002. \"Corrections of Humidit...\n", + " reference2: Miloshevich et.al. 2004. \"Development and...\n", + " ... ...\n", + " station_elevation: 8 m MSL\n", + " input_datastreams_description: A string consisting of the datastream(s),...\n", + " input_datastreams_num: 6\n", + " input_datastreams: nsasondewnpnC1.b1 : 5.190000 : 20080601.0...\n", + " zeb_platform: nsasondeadjustC1.c1\n", + " history: created by user gervais on machine emeral...\n", + "\n" + ] + } + ], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "1c0f8939", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20120716.173000.cdf', '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20120716.053000.cdf', '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20120715.214900.cdf']\n", + "\n", + "\n", + "Dimensions: (time: 7872)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 2012-07-15T21:49:00 ... 2012-07-16T18...\n", + "Data variables: (12/35)\n", + " base_time (time) datetime64[ns] 2012-07-15T21:49:00 ... 2012-07-16T17...\n", + " time_offset (time) datetime64[ns] dask.array\n", + " qc_time (time) int32 dask.array\n", + " pres (time) float32 dask.array\n", + " qc_pres (time) int32 dask.array\n", + " tdry (time) float32 dask.array\n", + " ... ...\n", + " qc_rh_scaled (time) int32 dask.array\n", + " dp_scaled (time) float32 dask.array\n", + " qc_dp_scaled (time) int32 dask.array\n", + " lat (time) float32 dask.array\n", + " lon (time) float32 dask.array\n", + " alt (time) float32 dask.array\n", + "Attributes: (12/16)\n", + " process_version: $State: vap-sonde_adjust-8.0-0.sol5_10$\n", + " command_line: sonde_adjust -d 20120715 -f nsaC1 -a 0\n", + " site_id: nsa\n", + " facility_id: C1: Barrow, Alaska\n", + " reference1: Wang et.al. 2002. \"Corrections of Humidit...\n", + " reference2: Miloshevich et.al. 2004. \"Development and...\n", + " ... ...\n", + " station_elevation: 8 m MSL\n", + " input_datastreams_description: A string consisting of the datastream(s),...\n", + " input_datastreams_num: 6\n", + " input_datastreams: nsasondewnpnC1.b1 : 10.800000 : 20120715....\n", + " zeb_platform: nsasondeadjustC1.c1\n", + " history: created by user gervais on machine emeral...\n" + ] + } + ], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "77ecf85d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:       (time: 2975)\n",
+       "Coordinates:\n",
+       "  * time          (time) datetime64[ns] 2008-06-01T05:35:00 ... 2008-06-01T07...\n",
+       "Data variables: (12/35)\n",
+       "    base_time     datetime64[ns] ...\n",
+       "    time_offset   (time) datetime64[ns] ...\n",
+       "    qc_time       (time) int32 ...\n",
+       "    pres          (time) float32 ...\n",
+       "    qc_pres       (time) int32 ...\n",
+       "    tdry          (time) float32 ...\n",
+       "    ...            ...\n",
+       "    qc_rh_scaled  (time) int32 ...\n",
+       "    dp_scaled     (time) float32 ...\n",
+       "    qc_dp_scaled  (time) int32 ...\n",
+       "    lat           (time) float32 ...\n",
+       "    lon           (time) float32 ...\n",
+       "    alt           (time) float32 ...\n",
+       "Attributes: (12/16)\n",
+       "    process_version:                $State: vap-sonde_adjust-8.0-0.sol5_10$\n",
+       "    command_line:                   sonde_adjust -d 20080601 -f nsaC1 -a 0\n",
+       "    site_id:                        nsa\n",
+       "    facility_id:                    C1: Barrow, Alaska\n",
+       "    reference1:                     Wang et.al. 2002. "Corrections of Humidit...\n",
+       "    reference2:                     Miloshevich et.al. 2004. "Development and...\n",
+       "    ...                             ...\n",
+       "    station_elevation:              8 m MSL\n",
+       "    input_datastreams_description:  A string consisting of the datastream(s),...\n",
+       "    input_datastreams_num:          6\n",
+       "    input_datastreams:              nsasondewnpnC1.b1 : 5.190000 : 20080601.0...\n",
+       "    zeb_platform:                   nsasondeadjustC1.c1\n",
+       "    history:                        created by user gervais on machine emeral...
" + ], + "text/plain": [ + "\n", + "Dimensions: (time: 2975)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 2008-06-01T05:35:00 ... 2008-06-01T07...\n", + "Data variables: (12/35)\n", + " base_time datetime64[ns] ...\n", + " time_offset (time) datetime64[ns] ...\n", + " qc_time (time) int32 ...\n", + " pres (time) float32 ...\n", + " qc_pres (time) int32 ...\n", + " tdry (time) float32 ...\n", + " ... ...\n", + " qc_rh_scaled (time) int32 ...\n", + " dp_scaled (time) float32 ...\n", + " qc_dp_scaled (time) int32 ...\n", + " lat (time) float32 ...\n", + " lon (time) float32 ...\n", + " alt (time) float32 ...\n", + "Attributes: (12/16)\n", + " process_version: $State: vap-sonde_adjust-8.0-0.sol5_10$\n", + " command_line: sonde_adjust -d 20080601 -f nsaC1 -a 0\n", + " site_id: nsa\n", + " facility_id: C1: Barrow, Alaska\n", + " reference1: Wang et.al. 2002. \"Corrections of Humidit...\n", + " reference2: Miloshevich et.al. 2004. \"Development and...\n", + " ... ...\n", + " station_elevation: 8 m MSL\n", + " input_datastreams_description: A string consisting of the datastream(s),...\n", + " input_datastreams_num: 6\n", + " input_datastreams: nsasondewnpnC1.b1 : 5.190000 : 20080601.0...\n", + " zeb_platform: nsasondeadjustC1.c1\n", + " history: created by user gervais on machine emeral..." + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "25e7de09", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Frozen({'base_time': \n", + "[1 values with dtype=datetime64[ns]]\n", + "Attributes:\n", + " string: 1-Jun-2008,5:35:00 GMT\n", + " long_name: Base time in Epoch, 'time_offset': \n", + "[2975 values with dtype=datetime64[ns]]\n", + "Attributes:\n", + " long_name: Time offset from base_time, 'time': \n", + "array(['2008-06-01T05:35:00.000000000', '2008-06-01T05:35:02.000000000',\n", + " '2008-06-01T05:35:04.000000000', ..., '2008-06-01T07:14:04.000000000',\n", + " '2008-06-01T07:14:06.000000000', '2008-06-01T07:14:08.000000000'],\n", + " dtype='datetime64[ns]')\n", + "Attributes:\n", + " long_name: Time offset from midnight, 'qc_time': \n", + "[2975 values with dtype=int32]\n", + "Attributes: (12/13)\n", + " long_name: Quality check results on field: Time offset from mi...\n", + " units: unitless\n", + " description: This field contains bit packed values which should ...\n", + " bit_1_description: Delta time between current and previous samples is ...\n", + " bit_1_assessment: Bad\n", + " bit_2_description: Delta time between current and previous samples is ...\n", + " ... ...\n", + " bit_3_description: Delta time between current and previous samples is ...\n", + " bit_3_assessment: Bad\n", + " delta_t_lower_limit: 20.0\n", + " delta_t_upper_limit: 20.0\n", + " prior_sample_flag: 1\n", + " comment: If the 'prior_sample_flag' is set the first sample ..., 'pres': \n", + "[2975 values with dtype=float32]\n", + "Attributes:\n", + " long_name: Barometric pressure\n", + " units: hPa\n", + " valid_min: 0.0\n", + " valid_max: 1100.0, 'qc_pres': \n", + "[2975 values with dtype=int32]\n", + "Attributes:\n", + " long_name: Quality check results on field: Barometric pressure\n", + " units: unitless\n", + " description: This field contains bit packed values which should be...\n", + " bit_1_description: Value is less than the valid_min.\n", + " bit_1_assessment: Indeterminate\n", + " bit_2_description: Value is greater than the valid_max.\n", + " bit_2_assessment: Indeterminate\n", + " bit_3_description: Data value not available in input file, data value se...\n", + " bit_3_assessment: Bad, 'tdry': \n", + "[2975 values with dtype=float32]\n", + "Attributes:\n", + " long_name: Dry bulb temperature\n", + " units: C\n", + " valid_min: -80.0\n", + " valid_max: 50.0, 'qc_tdry': \n", + "[2975 values with dtype=int32]\n", + "Attributes:\n", + " long_name: Quality check results on field: Dry bulb temperature\n", + " units: unitless\n", + " description: This field contains bit packed values which should be...\n", + " bit_1_description: Value is less than the valid_min.\n", + " bit_1_assessment: Indeterminate\n", + " bit_2_description: Value is greater than the valid_max.\n", + " bit_2_assessment: Indeterminate\n", + " bit_3_description: Data value not available in input file, data value se...\n", + " bit_3_assessment: Bad, 'dp': \n", + "[2975 values with dtype=float32]\n", + "Attributes:\n", + " long_name: Dewpoint temperature\n", + " units: C\n", + " valid_min: -110.0\n", + " valid_max: 50.0, 'qc_dp': \n", + "[2975 values with dtype=int32]\n", + "Attributes:\n", + " long_name: Quality check results on field: Dewpoint temperature\n", + " units: unitless\n", + " description: This field contains bit packed values which should be...\n", + " bit_1_description: Value is less than the valid_min.\n", + " bit_1_assessment: Indeterminate\n", + " bit_2_description: Value is greater than the valid_max.\n", + " bit_2_assessment: Indeterminate\n", + " bit_3_description: Data value not available in input file, data value se...\n", + " bit_3_assessment: Bad, 'wspd': \n", + "[2975 values with dtype=float32]\n", + "Attributes:\n", + " long_name: Wind speed\n", + " units: m/s\n", + " valid_min: 0.0\n", + " valid_max: 100.0, 'qc_wspd': \n", + "[2975 values with dtype=int32]\n", + "Attributes:\n", + " long_name: Quality check results on field: Wind speed\n", + " units: unitless\n", + " description: This field contains bit packed values which should be...\n", + " bit_1_description: Value is less than the valid_min.\n", + " bit_1_assessment: Indeterminate\n", + " bit_2_description: Value is greater than the valid_max.\n", + " bit_2_assessment: Indeterminate\n", + " bit_3_description: Data value not available in input file, data value se...\n", + " bit_3_assessment: Bad, 'deg': \n", + "[2975 values with dtype=float32]\n", + "Attributes:\n", + " long_name: Wind direction\n", + " units: deg\n", + " valid_min: 0.0\n", + " valid_max: 360.0, 'qc_deg': \n", + "[2975 values with dtype=int32]\n", + "Attributes:\n", + " long_name: Quality check results on field: Wind direction\n", + " units: unitless\n", + " description: This field contains bit packed values which should be...\n", + " bit_1_description: Value is less than the valid_min.\n", + " bit_1_assessment: Indeterminate\n", + " bit_2_description: Value is greater than the valid_max.\n", + " bit_2_assessment: Indeterminate\n", + " bit_3_description: Data value not available in input file, data value se...\n", + " bit_3_assessment: Bad, 'rh': \n", + "[2975 values with dtype=float32]\n", + "Attributes:\n", + " long_name: Relative humidity\n", + " units: %\n", + " valid_min: 0.0\n", + " valid_max: 105.0, 'qc_rh': \n", + "[2975 values with dtype=int32]\n", + "Attributes:\n", + " long_name: Quality check results on field: Relative humidity\n", + " units: unitless\n", + " description: This field contains bit packed values which should be...\n", + " bit_1_description: Value is less than the valid_min.\n", + " bit_1_assessment: Indeterminate\n", + " bit_2_description: Value is greater than the valid_max.\n", + " bit_2_assessment: Indeterminate\n", + " bit_3_description: Data value not available in input file, data value se...\n", + " bit_3_assessment: Bad, 'u_wind': \n", + "[2975 values with dtype=float32]\n", + "Attributes:\n", + " long_name: Eastward wind component\n", + " units: m/s\n", + " calc: -1 * sin( deg ) * wspd\n", + " valid_min: -75.0\n", + " valid_max: 75.0, 'qc_u_wind': \n", + "[2975 values with dtype=int32]\n", + "Attributes:\n", + " long_name: Quality check results on field: Eastward wind component\n", + " units: unitless\n", + " description: This field contains bit packed values which should be...\n", + " bit_1_description: Value is less than the valid_min.\n", + " bit_1_assessment: Indeterminate\n", + " bit_2_description: Value is greater than the valid_max.\n", + " bit_2_assessment: Indeterminate\n", + " bit_3_description: Data value not available in input file, data value se...\n", + " bit_3_assessment: Bad, 'v_wind': \n", + "[2975 values with dtype=float32]\n", + "Attributes:\n", + " long_name: Northward wind component\n", + " units: m/s\n", + " calc: -1 * cos( deg ) * wspd\n", + " valid_min: -75.0\n", + " valid_max: 75.0, 'qc_v_wind': \n", + "[2975 values with dtype=int32]\n", + "Attributes:\n", + " long_name: Quality check results on field: Northward wind component\n", + " units: unitless\n", + " description: This field contains bit packed values which should be...\n", + " bit_1_description: Value is less than the valid_min.\n", + " bit_1_assessment: Indeterminate\n", + " bit_2_description: Value is greater than the valid_max.\n", + " bit_2_assessment: Indeterminate\n", + " bit_3_description: Data value not available in input file, data value se...\n", + " bit_3_assessment: Bad, 'wstat': \n", + "[2975 values with dtype=float32]\n", + "Attributes:\n", + " long_name: Wind status\n", + " units: unitless, 'asc': \n", + "[2975 values with dtype=float32]\n", + "Attributes:\n", + " long_name: Ascent rate\n", + " units: m/s\n", + " valid_min: -10.0\n", + " valid_max: 20.0, 'qc_asc': \n", + "[2975 values with dtype=int32]\n", + "Attributes:\n", + " long_name: Quality check results on field: Ascent rate\n", + " units: unitless\n", + " description: This field contains bit packed values which should be...\n", + " bit_1_description: Value is less than the valid_min.\n", + " bit_1_assessment: Indeterminate\n", + " bit_2_description: Value is greater than the valid_max.\n", + " bit_2_assessment: Indeterminate\n", + " bit_3_description: Data value not available in input file, data value se...\n", + " bit_3_assessment: Bad, 'rh_smooth': \n", + "[2975 values with dtype=float32]\n", + "Attributes:\n", + " long_name: Smoothed original relative humidity\n", + " units: %\n", + " valid_min: 0.0\n", + " valid_max: 100.0\n", + " note: Intermediate RH profile created by smoothing original RH sond..., 'qc_rh_smooth': \n", + "[2975 values with dtype=int32]\n", + "Attributes:\n", + " long_name: Quality check results on field: Smoothed original rel...\n", + " units: unitless\n", + " description: This field contains bit packed values which should be...\n", + " bit_1_description: Value is less than the valid_min.\n", + " bit_1_assessment: Indeterminate\n", + " bit_2_description: Value is greater than the valid_max.\n", + " bit_2_assessment: Indeterminate\n", + " bit_3_description: Data value not available in input file, data value se...\n", + " bit_3_assessment: Bad, 'rh_biased': \n", + "[2975 values with dtype=float32]\n", + "Attributes:\n", + " long_name: Dry bias corrected relative humidity\n", + " units: %\n", + " valid_min: 0.0\n", + " valid_max: 100.0\n", + " note1: Eliminates the dry bias as described in Wang 2002\n", + " note2: This field differs from the rh_smooth field for only the RS-8..., 'qc_rh_biased': \n", + "[2975 values with dtype=int32]\n", + "Attributes:\n", + " long_name: Quality check results on field: Dry bias corrected re...\n", + " units: unitless\n", + " description: This field contains bit packed values which should be...\n", + " bit_1_description: Value is less than the valid_min.\n", + " bit_1_assessment: Indeterminate\n", + " bit_2_description: Value is greater than the valid_max.\n", + " bit_2_assessment: Indeterminate\n", + " bit_3_description: Data value not available in input file, data value se...\n", + " bit_3_assessment: Bad, 'rh_adjust': \n", + "[2975 values with dtype=float32]\n", + "Attributes:\n", + " long_name: Final corrected ambient relative humidity\n", + " units: %\n", + " valid_min: 0.0\n", + " valid_max: 100.0\n", + " note: corrects for sensor time-lag (RS-80 sondes) and the solar war..., 'qc_rh_adjust': \n", + "[2975 values with dtype=int32]\n", + "Attributes:\n", + " long_name: Quality check results on field: Final corrected ambie...\n", + " units: unitless\n", + " description: This field contains bit packed values which should be...\n", + " bit_1_description: Value is less than the valid_min.\n", + " bit_1_assessment: Indeterminate\n", + " bit_2_description: Value is greater than the valid_max.\n", + " bit_2_assessment: Indeterminate\n", + " bit_3_description: Data value not available in input file, data value se...\n", + " bit_3_assessment: Bad, 'rh_scaled': \n", + "[2975 values with dtype=float32]\n", + "Attributes:\n", + " long_name: Scaled final corrected ambient relative humidity\n", + " units: %\n", + " valid_min: 0.0\n", + " valid_max: 100.0\n", + " note1: scale factor is the be_pwv from mwrret1liljclou datasteam\n", + " note2: when there is no mwr or when pwv < 0.8, values are -9999, 'qc_rh_scaled': \n", + "[2975 values with dtype=int32]\n", + "Attributes:\n", + " long_name: Quality check results on field: Scaled final correcte...\n", + " units: unitless\n", + " description: This field contains bit packed values which should be...\n", + " bit_1_description: Value is less than the valid_min.\n", + " bit_1_assessment: Indeterminate\n", + " bit_2_description: Value is greater than the valid_max.\n", + " bit_2_assessment: Indeterminate\n", + " bit_3_description: Data value not available in input file, data value se...\n", + " bit_3_assessment: Bad\n", + " bit_4_description: The value of the pwv from the mwr file used to scale ...\n", + " bit_4_assessment: Bad, 'dp_scaled': \n", + "[2975 values with dtype=float32]\n", + "Attributes:\n", + " long_name: Scaled dewpoint temperature\n", + " units: C\n", + " valid_min: -110.0\n", + " valid_max: 50.0\n", + " note1: scale factor is the be_pwv from mwrret1liljclou datastream\n", + " note2: when there is no mwr or when pwv < 0.8, values are -9999, 'qc_dp_scaled': \n", + "[2975 values with dtype=int32]\n", + "Attributes:\n", + " long_name: Quality check results on field: Scaled dewpoint tempe...\n", + " units: unitless\n", + " description: This field contains bit packed values which should be...\n", + " bit_1_description: Value is less than the valid_min.\n", + " bit_1_assessment: Indeterminate\n", + " bit_2_description: Value is greater than the valid_max.\n", + " bit_2_assessment: Indeterminate\n", + " bit_3_description: Data value not available in input file, data value se...\n", + " bit_3_assessment: Bad\n", + " bit_4_description: The value of the pwv from the mwr file used to scale ...\n", + " bit_4_assessment: Bad, 'lat': \n", + "[2975 values with dtype=float32]\n", + "Attributes:\n", + " long_name: North latitude\n", + " units: degree_N\n", + " valid_min: -90.0\n", + " valid_max: 90.0, 'lon': \n", + "[2975 values with dtype=float32]\n", + "Attributes:\n", + " long_name: East longitude\n", + " units: degree_E\n", + " valid_min: -180.0\n", + " valid_max: 180.0, 'alt': \n", + "[2975 values with dtype=float32]\n", + "Attributes:\n", + " long_name: Altitude above mean sea level\n", + " units: m})" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "8c41b67e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Frozen({'time': 2975})" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "156f1dfc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Coordinates:\n", + " * time (time) datetime64[ns] 2008-06-01T05:35:00 ... 2008-06-01T07:14:08" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "277d6064", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'process_version': '$State: vap-sonde_adjust-8.0-0.sol5_10$',\n", + " 'command_line': 'sonde_adjust -d 20080601 -f nsaC1 -a 0',\n", + " 'site_id': 'nsa',\n", + " 'facility_id': 'C1: Barrow, Alaska',\n", + " 'reference1': 'Wang et.al. 2002. \"Corrections of Humidity Measurement Errors from the Vaisala RS80-Radiosonde -- Application to TOGA COARE Data.\" Journal of Atmospheric and Oceanic Technology',\n", + " 'reference2': 'Miloshevich et.al. 2004. \"Development and Validation of a Time-Lag Correction for Vaisala Radiosonde Humidity Measurement.\" Journal of Atmospheric and Oceanic Technology',\n", + " 'reference3': 'Miloshevich et.al. 2009. \"Accuracy Assessment and Correction of Vaisala RS92 Radiosonde Water Vapor Measurements.\" Journal of Geophysical Research--Atmospheres',\n", + " 'qc_standards_version': '1.0',\n", + " 'dod_version': '5.0',\n", + " 'sonde_serial_number': 'C3526394',\n", + " 'station_elevation': '8 m MSL',\n", + " 'input_datastreams_description': 'A string consisting of the datastream(s), datastream version(s), and datastream date (range).',\n", + " 'input_datastreams_num': 6,\n", + " 'input_datastreams': 'nsasondewnpnC1.b1 : 5.190000 : 20080601.053500-20080601.175900 ;\\nnsametC1.b1 : 4.100000 : 20080531.234500-20080602.000000 ;',\n", + " 'zeb_platform': 'nsasondeadjustC1.c1',\n", + " 'history': 'created by user gervais on machine emerald at 2-Sep-2014,22:07:00, using $State: zebra-zeblib-4.23-0.el5 $'}" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "d334681f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\n", + "\n", + "\n" + ] + } + ], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "643399d6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['base_time',\n", + " 'time_offset',\n", + " 'time',\n", + " 'qc_time',\n", + " 'pres',\n", + " 'qc_pres',\n", + " 'tdry',\n", + " 'qc_tdry',\n", + " 'dp',\n", + " 'qc_dp',\n", + " 'wspd',\n", + " 'qc_wspd',\n", + " 'deg',\n", + " 'qc_deg',\n", + " 'rh',\n", + " 'qc_rh',\n", + " 'u_wind',\n", + " 'qc_u_wind',\n", + " 'v_wind',\n", + " 'qc_v_wind',\n", + " 'wstat',\n", + " 'asc',\n", + " 'qc_asc',\n", + " 'rh_smooth',\n", + " 'qc_rh_smooth',\n", + " 'rh_biased',\n", + " 'qc_rh_biased',\n", + " 'rh_adjust',\n", + " 'qc_rh_adjust',\n", + " 'rh_scaled',\n", + " 'qc_rh_scaled',\n", + " 'dp_scaled',\n", + " 'qc_dp_scaled',\n", + " 'lat',\n", + " 'lon',\n", + " 'alt']" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "5a57e136", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'time' (time: 2975)>\n",
+       "array(['2008-06-01T05:35:00.000000000', '2008-06-01T05:35:02.000000000',\n",
+       "       '2008-06-01T05:35:04.000000000', ..., '2008-06-01T07:14:04.000000000',\n",
+       "       '2008-06-01T07:14:06.000000000', '2008-06-01T07:14:08.000000000'],\n",
+       "      dtype='datetime64[ns]')\n",
+       "Coordinates:\n",
+       "  * time     (time) datetime64[ns] 2008-06-01T05:35:00 ... 2008-06-01T07:14:08\n",
+       "Attributes:\n",
+       "    long_name:  Time offset from midnight
" + ], + "text/plain": [ + "\n", + "array(['2008-06-01T05:35:00.000000000', '2008-06-01T05:35:02.000000000',\n", + " '2008-06-01T05:35:04.000000000', ..., '2008-06-01T07:14:04.000000000',\n", + " '2008-06-01T07:14:06.000000000', '2008-06-01T07:14:08.000000000'],\n", + " dtype='datetime64[ns]')\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 2008-06-01T05:35:00 ... 2008-06-01T07:14:08\n", + "Attributes:\n", + " long_name: Time offset from midnight" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "a632a525", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "var.name: \n", + " time \n", + "\n", + "var.data: \n", + " ['2008-06-01T05:35:00.000000000' '2008-06-01T05:35:02.000000000'\n", + " '2008-06-01T05:35:04.000000000' ... '2008-06-01T07:14:04.000000000'\n", + " '2008-06-01T07:14:06.000000000' '2008-06-01T07:14:08.000000000'] \n", + "\n", + "var.attrs: \n", + " {'long_name': 'Time offset from midnight'} \n", + "\n", + "var.dims: \n", + " ('time',) \n", + "\n", + "var.data.dtype: \n", + " datetime64[ns] \n", + "\n" + ] + } + ], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "b256b07f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
var_namedimsis_dimn_dimattrsdtype
0base_time()False0{'string': '1-Jun-2008,5:35:00 GMT', 'long_nam...datetime64[ns]
1time_offset(time,)False1{'long_name': 'Time offset from base_time'}datetime64[ns]
2time(time,)True1{'long_name': 'Time offset from midnight'}datetime64[ns]
3qc_time(time,)False1{'long_name': 'Quality check results on field:...int32
4pres(time,)False1{'long_name': 'Barometric pressure', 'units': ...float32
5qc_pres(time,)False1{'long_name': 'Quality check results on field:...int32
6tdry(time,)False1{'long_name': 'Dry bulb temperature', 'units':...float32
7qc_tdry(time,)False1{'long_name': 'Quality check results on field:...int32
8dp(time,)False1{'long_name': 'Dewpoint temperature', 'units':...float32
9qc_dp(time,)False1{'long_name': 'Quality check results on field:...int32
10wspd(time,)False1{'long_name': 'Wind speed', 'units': 'm/s', 'v...float32
11qc_wspd(time,)False1{'long_name': 'Quality check results on field:...int32
12deg(time,)False1{'long_name': 'Wind direction', 'units': 'deg'...float32
13qc_deg(time,)False1{'long_name': 'Quality check results on field:...int32
14rh(time,)False1{'long_name': 'Relative humidity', 'units': '%...float32
15qc_rh(time,)False1{'long_name': 'Quality check results on field:...int32
16u_wind(time,)False1{'long_name': 'Eastward wind component', 'unit...float32
17qc_u_wind(time,)False1{'long_name': 'Quality check results on field:...int32
18v_wind(time,)False1{'long_name': 'Northward wind component', 'uni...float32
19qc_v_wind(time,)False1{'long_name': 'Quality check results on field:...int32
20wstat(time,)False1{'long_name': 'Wind status', 'units': 'unitless'}float32
21asc(time,)False1{'long_name': 'Ascent rate', 'units': 'm/s', '...float32
22qc_asc(time,)False1{'long_name': 'Quality check results on field:...int32
23rh_smooth(time,)False1{'long_name': 'Smoothed original relative humi...float32
24qc_rh_smooth(time,)False1{'long_name': 'Quality check results on field:...int32
25rh_biased(time,)False1{'long_name': 'Dry bias corrected relative hum...float32
26qc_rh_biased(time,)False1{'long_name': 'Quality check results on field:...int32
27rh_adjust(time,)False1{'long_name': 'Final corrected ambient relativ...float32
28qc_rh_adjust(time,)False1{'long_name': 'Quality check results on field:...int32
29rh_scaled(time,)False1{'long_name': 'Scaled final corrected ambient ...float32
30qc_rh_scaled(time,)False1{'long_name': 'Quality check results on field:...int32
31dp_scaled(time,)False1{'long_name': 'Scaled dewpoint temperature', '...float32
32qc_dp_scaled(time,)False1{'long_name': 'Quality check results on field:...int32
33lat(time,)False1{'long_name': 'North latitude', 'units': 'degr...float32
34lon(time,)False1{'long_name': 'East longitude', 'units': 'degr...float32
35alt(time,)False1{'long_name': 'Altitude above mean sea level',...float32
\n", + "
" + ], + "text/plain": [ + " var_name dims is_dim n_dim \\\n", + "0 base_time () False 0 \n", + "1 time_offset (time,) False 1 \n", + "2 time (time,) True 1 \n", + "3 qc_time (time,) False 1 \n", + "4 pres (time,) False 1 \n", + "5 qc_pres (time,) False 1 \n", + "6 tdry (time,) False 1 \n", + "7 qc_tdry (time,) False 1 \n", + "8 dp (time,) False 1 \n", + "9 qc_dp (time,) False 1 \n", + "10 wspd (time,) False 1 \n", + "11 qc_wspd (time,) False 1 \n", + "12 deg (time,) False 1 \n", + "13 qc_deg (time,) False 1 \n", + "14 rh (time,) False 1 \n", + "15 qc_rh (time,) False 1 \n", + "16 u_wind (time,) False 1 \n", + "17 qc_u_wind (time,) False 1 \n", + "18 v_wind (time,) False 1 \n", + "19 qc_v_wind (time,) False 1 \n", + "20 wstat (time,) False 1 \n", + "21 asc (time,) False 1 \n", + "22 qc_asc (time,) False 1 \n", + "23 rh_smooth (time,) False 1 \n", + "24 qc_rh_smooth (time,) False 1 \n", + "25 rh_biased (time,) False 1 \n", + "26 qc_rh_biased (time,) False 1 \n", + "27 rh_adjust (time,) False 1 \n", + "28 qc_rh_adjust (time,) False 1 \n", + "29 rh_scaled (time,) False 1 \n", + "30 qc_rh_scaled (time,) False 1 \n", + "31 dp_scaled (time,) False 1 \n", + "32 qc_dp_scaled (time,) False 1 \n", + "33 lat (time,) False 1 \n", + "34 lon (time,) False 1 \n", + "35 alt (time,) False 1 \n", + "\n", + " attrs dtype \n", + "0 {'string': '1-Jun-2008,5:35:00 GMT', 'long_nam... datetime64[ns] \n", + "1 {'long_name': 'Time offset from base_time'} datetime64[ns] \n", + "2 {'long_name': 'Time offset from midnight'} datetime64[ns] \n", + "3 {'long_name': 'Quality check results on field:... int32 \n", + "4 {'long_name': 'Barometric pressure', 'units': ... float32 \n", + "5 {'long_name': 'Quality check results on field:... int32 \n", + "6 {'long_name': 'Dry bulb temperature', 'units':... float32 \n", + "7 {'long_name': 'Quality check results on field:... int32 \n", + "8 {'long_name': 'Dewpoint temperature', 'units':... float32 \n", + "9 {'long_name': 'Quality check results on field:... int32 \n", + "10 {'long_name': 'Wind speed', 'units': 'm/s', 'v... float32 \n", + "11 {'long_name': 'Quality check results on field:... int32 \n", + "12 {'long_name': 'Wind direction', 'units': 'deg'... float32 \n", + "13 {'long_name': 'Quality check results on field:... int32 \n", + "14 {'long_name': 'Relative humidity', 'units': '%... float32 \n", + "15 {'long_name': 'Quality check results on field:... int32 \n", + "16 {'long_name': 'Eastward wind component', 'unit... float32 \n", + "17 {'long_name': 'Quality check results on field:... int32 \n", + "18 {'long_name': 'Northward wind component', 'uni... float32 \n", + "19 {'long_name': 'Quality check results on field:... int32 \n", + "20 {'long_name': 'Wind status', 'units': 'unitless'} float32 \n", + "21 {'long_name': 'Ascent rate', 'units': 'm/s', '... float32 \n", + "22 {'long_name': 'Quality check results on field:... int32 \n", + "23 {'long_name': 'Smoothed original relative humi... float32 \n", + "24 {'long_name': 'Quality check results on field:... int32 \n", + "25 {'long_name': 'Dry bias corrected relative hum... float32 \n", + "26 {'long_name': 'Quality check results on field:... int32 \n", + "27 {'long_name': 'Final corrected ambient relativ... float32 \n", + "28 {'long_name': 'Quality check results on field:... int32 \n", + "29 {'long_name': 'Scaled final corrected ambient ... float32 \n", + "30 {'long_name': 'Quality check results on field:... int32 \n", + "31 {'long_name': 'Scaled dewpoint temperature', '... float32 \n", + "32 {'long_name': 'Quality check results on field:... int32 \n", + "33 {'long_name': 'North latitude', 'units': 'degr... float32 \n", + "34 {'long_name': 'East longitude', 'units': 'degr... float32 \n", + "35 {'long_name': 'Altitude above mean sea level',... float32 " + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "30edac2d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
var_namedimsis_dimn_dimattrsdtype
4pres(time,)False1{'long_name': 'Barometric pressure', 'units': ...float32
6tdry(time,)False1{'long_name': 'Dry bulb temperature', 'units':...float32
8dp(time,)False1{'long_name': 'Dewpoint temperature', 'units':...float32
10wspd(time,)False1{'long_name': 'Wind speed', 'units': 'm/s', 'v...float32
12deg(time,)False1{'long_name': 'Wind direction', 'units': 'deg'...float32
14rh(time,)False1{'long_name': 'Relative humidity', 'units': '%...float32
16u_wind(time,)False1{'long_name': 'Eastward wind component', 'unit...float32
18v_wind(time,)False1{'long_name': 'Northward wind component', 'uni...float32
20wstat(time,)False1{'long_name': 'Wind status', 'units': 'unitless'}float32
21asc(time,)False1{'long_name': 'Ascent rate', 'units': 'm/s', '...float32
23rh_smooth(time,)False1{'long_name': 'Smoothed original relative humi...float32
25rh_biased(time,)False1{'long_name': 'Dry bias corrected relative hum...float32
27rh_adjust(time,)False1{'long_name': 'Final corrected ambient relativ...float32
29rh_scaled(time,)False1{'long_name': 'Scaled final corrected ambient ...float32
31dp_scaled(time,)False1{'long_name': 'Scaled dewpoint temperature', '...float32
\n", + "
" + ], + "text/plain": [ + " var_name dims is_dim n_dim \\\n", + "4 pres (time,) False 1 \n", + "6 tdry (time,) False 1 \n", + "8 dp (time,) False 1 \n", + "10 wspd (time,) False 1 \n", + "12 deg (time,) False 1 \n", + "14 rh (time,) False 1 \n", + "16 u_wind (time,) False 1 \n", + "18 v_wind (time,) False 1 \n", + "20 wstat (time,) False 1 \n", + "21 asc (time,) False 1 \n", + "23 rh_smooth (time,) False 1 \n", + "25 rh_biased (time,) False 1 \n", + "27 rh_adjust (time,) False 1 \n", + "29 rh_scaled (time,) False 1 \n", + "31 dp_scaled (time,) False 1 \n", + "\n", + " attrs dtype \n", + "4 {'long_name': 'Barometric pressure', 'units': ... float32 \n", + "6 {'long_name': 'Dry bulb temperature', 'units':... float32 \n", + "8 {'long_name': 'Dewpoint temperature', 'units':... float32 \n", + "10 {'long_name': 'Wind speed', 'units': 'm/s', 'v... float32 \n", + "12 {'long_name': 'Wind direction', 'units': 'deg'... float32 \n", + "14 {'long_name': 'Relative humidity', 'units': '%... float32 \n", + "16 {'long_name': 'Eastward wind component', 'unit... float32 \n", + "18 {'long_name': 'Northward wind component', 'uni... float32 \n", + "20 {'long_name': 'Wind status', 'units': 'unitless'} float32 \n", + "21 {'long_name': 'Ascent rate', 'units': 'm/s', '... float32 \n", + "23 {'long_name': 'Smoothed original relative humi... float32 \n", + "25 {'long_name': 'Dry bias corrected relative hum... float32 \n", + "27 {'long_name': 'Final corrected ambient relativ... float32 \n", + "29 {'long_name': 'Scaled final corrected ambient ... float32 \n", + "31 {'long_name': 'Scaled dewpoint temperature', '... float32 " + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "fb5ae985", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "09bd4adc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
var_namedimsis_dimn_dimattrsdtype
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [var_name, dims, is_dim, n_dim, attrs, dtype]\n", + "Index: []" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "index 0 is out of bounds for axis 0 with size 0\n" + ] + } + ], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "f5f48879", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "name 'var_2d' is not defined\n" + ] + } + ], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "71c2096f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "844f8505", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
var_namedimsis_dimn_dimattrsdtype
4pres(time,)False1{'long_name': 'Barometric pressure', 'units': ...float32
6tdry(time,)False1{'long_name': 'Dry bulb temperature', 'units':...float32
8dp(time,)False1{'long_name': 'Dewpoint temperature', 'units':...float32
10wspd(time,)False1{'long_name': 'Wind speed', 'units': 'm/s', 'v...float32
12deg(time,)False1{'long_name': 'Wind direction', 'units': 'deg'...float32
14rh(time,)False1{'long_name': 'Relative humidity', 'units': '%...float32
16u_wind(time,)False1{'long_name': 'Eastward wind component', 'unit...float32
18v_wind(time,)False1{'long_name': 'Northward wind component', 'uni...float32
21asc(time,)False1{'long_name': 'Ascent rate', 'units': 'm/s', '...float32
23rh_smooth(time,)False1{'long_name': 'Smoothed original relative humi...float32
25rh_biased(time,)False1{'long_name': 'Dry bias corrected relative hum...float32
27rh_adjust(time,)False1{'long_name': 'Final corrected ambient relativ...float32
29rh_scaled(time,)False1{'long_name': 'Scaled final corrected ambient ...float32
31dp_scaled(time,)False1{'long_name': 'Scaled dewpoint temperature', '...float32
\n", + "
" + ], + "text/plain": [ + " var_name dims is_dim n_dim \\\n", + "4 pres (time,) False 1 \n", + "6 tdry (time,) False 1 \n", + "8 dp (time,) False 1 \n", + "10 wspd (time,) False 1 \n", + "12 deg (time,) False 1 \n", + "14 rh (time,) False 1 \n", + "16 u_wind (time,) False 1 \n", + "18 v_wind (time,) False 1 \n", + "21 asc (time,) False 1 \n", + "23 rh_smooth (time,) False 1 \n", + "25 rh_biased (time,) False 1 \n", + "27 rh_adjust (time,) False 1 \n", + "29 rh_scaled (time,) False 1 \n", + "31 dp_scaled (time,) False 1 \n", + "\n", + " attrs dtype \n", + "4 {'long_name': 'Barometric pressure', 'units': ... float32 \n", + "6 {'long_name': 'Dry bulb temperature', 'units':... float32 \n", + "8 {'long_name': 'Dewpoint temperature', 'units':... float32 \n", + "10 {'long_name': 'Wind speed', 'units': 'm/s', 'v... float32 \n", + "12 {'long_name': 'Wind direction', 'units': 'deg'... float32 \n", + "14 {'long_name': 'Relative humidity', 'units': '%... float32 \n", + "16 {'long_name': 'Eastward wind component', 'unit... float32 \n", + "18 {'long_name': 'Northward wind component', 'uni... float32 \n", + "21 {'long_name': 'Ascent rate', 'units': 'm/s', '... float32 \n", + "23 {'long_name': 'Smoothed original relative humi... float32 \n", + "25 {'long_name': 'Dry bias corrected relative hum... float32 \n", + "27 {'long_name': 'Final corrected ambient relativ... float32 \n", + "29 {'long_name': 'Scaled final corrected ambient ... float32 \n", + "31 {'long_name': 'Scaled dewpoint temperature', '... float32 " + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "pres\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/kefeimo/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/utils/datetime_utils.py:136: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning.\n", + " mode = stats.mode(np.diff(time))\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "bb733804", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
var_namedimsis_dimn_dimattrsdtype
4pres(time,)False1{'long_name': 'Barometric pressure', 'units': ...float32
6tdry(time,)False1{'long_name': 'Dry bulb temperature', 'units':...float32
8dp(time,)False1{'long_name': 'Dewpoint temperature', 'units':...float32
10wspd(time,)False1{'long_name': 'Wind speed', 'units': 'm/s', 'v...float32
12deg(time,)False1{'long_name': 'Wind direction', 'units': 'deg'...float32
14rh(time,)False1{'long_name': 'Relative humidity', 'units': '%...float32
16u_wind(time,)False1{'long_name': 'Eastward wind component', 'unit...float32
18v_wind(time,)False1{'long_name': 'Northward wind component', 'uni...float32
20wstat(time,)False1{'long_name': 'Wind status', 'units': 'unitless'}float32
21asc(time,)False1{'long_name': 'Ascent rate', 'units': 'm/s', '...float32
23rh_smooth(time,)False1{'long_name': 'Smoothed original relative humi...float32
25rh_biased(time,)False1{'long_name': 'Dry bias corrected relative hum...float32
27rh_adjust(time,)False1{'long_name': 'Final corrected ambient relativ...float32
29rh_scaled(time,)False1{'long_name': 'Scaled final corrected ambient ...float32
31dp_scaled(time,)False1{'long_name': 'Scaled dewpoint temperature', '...float32
\n", + "
" + ], + "text/plain": [ + " var_name dims is_dim n_dim \\\n", + "4 pres (time,) False 1 \n", + "6 tdry (time,) False 1 \n", + "8 dp (time,) False 1 \n", + "10 wspd (time,) False 1 \n", + "12 deg (time,) False 1 \n", + "14 rh (time,) False 1 \n", + "16 u_wind (time,) False 1 \n", + "18 v_wind (time,) False 1 \n", + "20 wstat (time,) False 1 \n", + "21 asc (time,) False 1 \n", + "23 rh_smooth (time,) False 1 \n", + "25 rh_biased (time,) False 1 \n", + "27 rh_adjust (time,) False 1 \n", + "29 rh_scaled (time,) False 1 \n", + "31 dp_scaled (time,) False 1 \n", + "\n", + " attrs dtype \n", + "4 {'long_name': 'Barometric pressure', 'units': ... float32 \n", + "6 {'long_name': 'Dry bulb temperature', 'units':... float32 \n", + "8 {'long_name': 'Dewpoint temperature', 'units':... float32 \n", + "10 {'long_name': 'Wind speed', 'units': 'm/s', 'v... float32 \n", + "12 {'long_name': 'Wind direction', 'units': 'deg'... float32 \n", + "14 {'long_name': 'Relative humidity', 'units': '%... float32 \n", + "16 {'long_name': 'Eastward wind component', 'unit... float32 \n", + "18 {'long_name': 'Northward wind component', 'uni... float32 \n", + "20 {'long_name': 'Wind status', 'units': 'unitless'} float32 \n", + "21 {'long_name': 'Ascent rate', 'units': 'm/s', '... float32 \n", + "23 {'long_name': 'Smoothed original relative humi... float32 \n", + "25 {'long_name': 'Dry bias corrected relative hum... float32 \n", + "27 {'long_name': 'Final corrected ambient relativ... float32 \n", + "29 {'long_name': 'Scaled final corrected ambient ... float32 \n", + "31 {'long_name': 'Scaled dewpoint temperature', '... float32 " + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "id": "ba714aa6", + "metadata": {}, + "source": [ + "## Skew-T Plot" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "606dc551", + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'files_list' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[37], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m launch_times \u001b[38;5;241m=\u001b[39m [\u001b[38;5;28mstr\u001b[39m(datetime\u001b[38;5;241m.\u001b[39mstrptime(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(f\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m'\u001b[39m)[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m3\u001b[39m:\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]), \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mY\u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mm\u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mH\u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mM\u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mS\u001b[39m\u001b[38;5;124m'\u001b[39m)) \u001b[38;5;28;01mfor\u001b[39;00m f \u001b[38;5;129;01min\u001b[39;00m \u001b[43mfiles_list\u001b[49m]\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mAvailable sonde launch times:\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 3\u001b[0m display(pd\u001b[38;5;241m.\u001b[39mDataFrame(launch_times, columns\u001b[38;5;241m=\u001b[39m[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mLaunch Time\u001b[39m\u001b[38;5;124m'\u001b[39m]))\n", + "\u001b[0;31mNameError\u001b[0m: name 'files_list' is not defined" + ] + } + ], + "source": [ + "launch_times = [str(datetime.strptime(''.join(f.split('.')[-3:-1]), '%Y%m%d%H%M%S')) for f in files_list]\n", + "print('Available sonde launch times:')\n", + "display(pd.DataFrame(launch_times, columns=['Launch Time']))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d85d73a9", + "metadata": {}, + "outputs": [], + "source": [ + "# select sonde launch time from the list\n", + "launch_time_index = 0\n", + "sonde_file = files_list[launch_time_index]\n", + "sonde_ds = act.io.armfiles.read_netcdf(sonde_file)\n", + "\n", + "# Calculate stability indicies\n", + "sonde_ds = act.retrievals.calculate_stability_indicies(\n", + " sonde_ds, temp_name='tdry', td_name='dp', p_name='pres', rh_name='rh'\n", + ")\n", + "\n", + "# Set up plot\n", + "skewt = act.plotting.SkewTDisplay(sonde_ds, figsize=(7, 10))\n", + "\n", + "# Add data\n", + "skewt.plot_from_u_and_v('u_wind', 'v_wind', 'pres', 'tdry', 'dp', set_title=f'Skew-T Plot for {launch_times[launch_time_index]}')\n", + "sonde_ds.close()\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/SONDEADJUST/SONDEADJUST_tutorial.ipynb b/VAPs/quicklook/SONDEADJUST/SONDEADJUST_tutorial.ipynb new file mode 100644 index 00000000..b964d6ae --- /dev/null +++ b/VAPs/quicklook/SONDEADJUST/SONDEADJUST_tutorial.ipynb @@ -0,0 +1,884 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# SONDEADJUST.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/sondeadjust) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using sondeadjust as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `sondeadjust.c1`, where `sondeadjust` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `fkb` and facility `M1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/fkb/fkbsondeadjustM1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"sondeadjust\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"fkb\"\n", + "facility = \"M1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "id": "10d523d7", + "metadata": {}, + "source": [ + "## Skew-T Plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9133a183", + "metadata": {}, + "outputs": [], + "source": [ + "launch_times = [str(datetime.strptime(''.join(f.split('.')[-3:-1]), '%Y%m%d%H%M%S')) for f in files_list]\n", + "print('Available sonde launch times:')\n", + "display(pd.DataFrame(launch_times, columns=['Launch Time']))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3af3f5a", + "metadata": {}, + "outputs": [], + "source": [ + "# select sonde launch time from the list\n", + "launch_time_index = 0\n", + "sonde_file = files_list[launch_time_index]\n", + "sonde_ds = act.io.armfiles.read_netcdf(sonde_file)\n", + "\n", + "# Calculate stability indicies\n", + "sonde_ds = act.retrievals.calculate_stability_indicies(\n", + " sonde_ds, temp_name='tdry', td_name='dp', p_name='pres', rh_name='rh'\n", + ")\n", + "\n", + "# Set up plot\n", + "skewt = act.plotting.SkewTDisplay(sonde_ds, figsize=(7, 10))\n", + "\n", + "# Add data\n", + "skewt.plot_from_u_and_v('u_wind', 'v_wind', 'pres', 'tdry', 'dp', set_title=f'Skew-T Plot for {launch_times[launch_time_index]}')\n", + "sonde_ds.close()\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/SONDEADJUST/sondeadjust.c1.ipynb b/VAPs/quicklook/SONDEADJUST/sondeadjust.c1.ipynb new file mode 100644 index 00000000..2cda801c --- /dev/null +++ b/VAPs/quicklook/SONDEADJUST/sondeadjust.c1.ipynb @@ -0,0 +1,385 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# SONDEADJUST.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/sondeadjust) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'sondeadjust'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2008-01-01', 'facility': 'M1', 'site': 'fkb', 'start_date': '2007-03-24'}, {'end_date': '2008-12-28', 'facility': 'M1', 'site': 'hfe', 'start_date': '2008-05-14'}, {'end_date': '2012-02-09', 'facility': 'M1', 'site': 'gan', 'start_date': '2011-09-17'}, {'end_date': '2011-01-05', 'facility': 'M1', 'site': 'grw', 'start_date': '2009-04-16'}, {'end_date': '2013-06-29', 'facility': 'M1', 'site': 'pvc', 'start_date': '2012-06-25'}, {'end_date': '2005-09-15', 'facility': 'M1', 'site': 'pye', 'start_date': '2005-02-25'}, {'end_date': '2012-03-31', 'facility': 'M1', 'site': 'pgh', 'start_date': '2011-06-15'}, {'end_date': '2012-07-16', 'facility': 'C1', 'site': 'nsa', 'start_date': '2002-04-28'}, {'end_date': '2007-01-08', 'facility': 'M1', 'site': 'nim', 'start_date': '2006-01-07'}, {'end_date': '2011-04-24', 'facility': 'M1', 'site': 'sbs', 'start_date': '2010-11-08'}, {'end_date': '2012-09-01', 'facility': 'C1', 'site': 'sgp', 'start_date': '1999-07-20'}, {'end_date': '2011-09-09', 'facility': 'C1', 'site': 'twp', 'start_date': '1997-08-28'}, {'end_date': '2012-04-05', 'facility': 'C2', 'site': 'twp', 'start_date': '1998-11-05'}, {'end_date': '2012-12-19', 'facility': 'C3', 'site': 'twp', 'start_date': '2002-04-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2012-08-30'\n", + "date_end = '2012-08-31'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['pres', 'tdry', 'dp']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],linestyle='None')\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'pres'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,linestyle='None')\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'pres'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],linestyle='None')\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],linestyle='None')\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + }, + { + "cell_type": "markdown", + "id": "bd86dac3", + "metadata": {}, + "source": [ + "## Skew-T Plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c23b6c56", + "metadata": {}, + "outputs": [], + "source": [ + "launch_times = [str(datetime.strptime(''.join(f.split('.')[-3:-1]), '%Y%m%d%H%M%S')) for f in files_list]\n", + "print('Available sonde launch times:')\n", + "display(pd.DataFrame(launch_times, columns=['Launch Time']))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c958b473", + "metadata": {}, + "outputs": [], + "source": [ + "# select sonde launch time from the list\n", + "launch_time_index = 0\n", + "sonde_file = files_list[launch_time_index]\n", + "sonde_ds = act.io.armfiles.read_netcdf(sonde_file)\n", + "\n", + "# Calculate stability indicies\n", + "sonde_ds = act.retrievals.calculate_stability_indicies(\n", + " sonde_ds, temp_name='tdry', td_name='dp', p_name='pres', rh_name='rh'\n", + ")\n", + "\n", + "# Set up plot\n", + "skewt = act.plotting.SkewTDisplay(sonde_ds, figsize=(7, 10))\n", + "\n", + "# Add data\n", + "skewt.plot_from_u_and_v('u_wind', 'v_wind', 'pres', 'tdry', 'dp', set_title=f'Skew-T Plot for {launch_times[launch_time_index]}')\n", + "sonde_ds.close()\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/SONDEPARAM/SONDEPARAM_tutorial.ipynb b/VAPs/quicklook/SONDEPARAM/SONDEPARAM_tutorial.ipynb new file mode 100644 index 00000000..32d9fc85 --- /dev/null +++ b/VAPs/quicklook/SONDEPARAM/SONDEPARAM_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# SONDEPARAM.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/sondeparam) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using sondeparam as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `sondeparam.c1`, where `sondeparam` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `guc` and facility `M1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/guc/gucsondeparamM1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"sondeparam\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"guc\"\n", + "facility = \"M1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/SONDEPARAM/sondeparam.c1.ipynb b/VAPs/quicklook/SONDEPARAM/sondeparam.c1.ipynb new file mode 100644 index 00000000..e9d8e3c7 --- /dev/null +++ b/VAPs/quicklook/SONDEPARAM/sondeparam.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# SONDEPARAM.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/sondeparam) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'sondeparam'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2023-06-15', 'facility': 'M1', 'site': 'guc', 'start_date': '2021-09-01'}, {'end_date': '2023-12-18', 'facility': 'C1', 'site': 'ena', 'start_date': '2013-09-28'}, {'end_date': '2022-10-01', 'facility': 'M1', 'site': 'hou', 'start_date': '2021-08-28'}, {'end_date': '2022-09-25', 'facility': 'S1', 'site': 'hou', 'start_date': '2021-08-28'}, {'end_date': '2015-12-01', 'facility': 'M1', 'site': 'mao', 'start_date': '2013-12-17'}, {'end_date': '2023-12-18', 'facility': 'C1', 'site': 'sgp', 'start_date': '2001-04-01'}, {'end_date': '2014-09-12', 'facility': 'M1', 'site': 'tmp', 'start_date': '2014-01-21'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2023-12-16'\n", + "date_end = '2023-12-18'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['cape', 'cin', 'lcl']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'cape'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/SP2-AIR/SP2-AIR_tutorial.ipynb b/VAPs/quicklook/SP2-AIR/SP2-AIR_tutorial.ipynb new file mode 100644 index 00000000..c18b98d1 --- /dev/null +++ b/VAPs/quicklook/SP2-AIR/SP2-AIR_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AAFSP2RBC10S.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/sp2-air) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using aafsp2rbc10s as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `aafsp2rbc10s.c1`, where `aafsp2rbc10s` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `cor` and facility `F1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/cor/coraafsp2rbc10sF1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"aafsp2rbc10s\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"cor\"\n", + "facility = \"F1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/SP2-AIR/aafsp2rbc10s.c1.ipynb b/VAPs/quicklook/SP2-AIR/aafsp2rbc10s.c1.ipynb new file mode 100644 index 00000000..27259847 --- /dev/null +++ b/VAPs/quicklook/SP2-AIR/aafsp2rbc10s.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# AAFSP2RBC10S.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/sp2-air) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'aafsp2rbc10s'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2018-12-08', 'facility': 'F1', 'site': 'cor', 'start_date': '2018-11-04'}, {'end_date': '2018-02-18', 'facility': 'F1', 'site': 'ena', 'start_date': '2017-06-23'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'cor', 'F1' )\n", + "\n", + "date_start = '2018-12-06'\n", + "date_end = '2018-12-08'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['rBC', 'N_dN_rBC']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'rBC'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'rBC'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/SPHOTCOD/SPHOTCOD_tutorial.ipynb b/VAPs/quicklook/SPHOTCOD/SPHOTCOD_tutorial.ipynb new file mode 100644 index 00000000..4be5635f --- /dev/null +++ b/VAPs/quicklook/SPHOTCOD/SPHOTCOD_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# SPHOTCOD2CHIU.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/sphotcod) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using sphotcod2chiu as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `sphotcod2chiu.c1`, where `sphotcod2chiu` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `ena` and facility `C1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/ena/enasphotcod2chiuC1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"sphotcod2chiu\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"ena\"\n", + "facility = \"C1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/SPHOTCOD/sphotcod2chiu.c1.ipynb b/VAPs/quicklook/SPHOTCOD/sphotcod2chiu.c1.ipynb new file mode 100644 index 00000000..54d31aa7 --- /dev/null +++ b/VAPs/quicklook/SPHOTCOD/sphotcod2chiu.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# SPHOTCOD2CHIU.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/sphotcod) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'sphotcod2chiu'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2019-12-31', 'facility': 'C1', 'site': 'ena', 'start_date': '2014-01-02'}, {'end_date': '2021-05-31', 'facility': 'C1', 'site': 'sgp', 'start_date': '2014-01-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2021-05-29'\n", + "date_end = '2021-05-31'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['cloud_optical_depth', 'liquid_water_path', 'effective_radius']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'cloud_optical_depth'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/SURFSPECALB/SURFSPECALB_tutorial.ipynb b/VAPs/quicklook/SURFSPECALB/SURFSPECALB_tutorial.ipynb new file mode 100644 index 00000000..e0e12e11 --- /dev/null +++ b/VAPs/quicklook/SURFSPECALB/SURFSPECALB_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# SURFSPECALB1MLAWER.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/surfspecalb) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using surfspecalb1mlawer as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `surfspecalb1mlawer.c1`, where `surfspecalb1mlawer` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `nsa` and facility `C1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/nsa/nsasurfspecalb1mlawerC1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"surfspecalb1mlawer\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"nsa\"\n", + "facility = \"C1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/SURFSPECALB/surfspecalb1mlawer.c1.ipynb b/VAPs/quicklook/SURFSPECALB/surfspecalb1mlawer.c1.ipynb new file mode 100644 index 00000000..55d77626 --- /dev/null +++ b/VAPs/quicklook/SURFSPECALB/surfspecalb1mlawer.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# SURFSPECALB1MLAWER.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/surfspecalb) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'surfspecalb1mlawer'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2020-10-06', 'facility': 'C1', 'site': 'nsa', 'start_date': '1998-04-07'}, {'end_date': '2021-01-04', 'facility': 'C1', 'site': 'sgp', 'start_date': '2004-01-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2021-01-01'\n", + "date_end = '2021-01-03'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['be_surface_albedo_mfr_broadband_10m', 'be_surface_albedo_mfr_narrowband_10m', 'be_surface_albedo_psp_broadband_10m']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],force_line_plot=True)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'hemisp_broadband_mfrsrC1'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,force_line_plot=True)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'be_surface_albedo_mfr_broadband_10m'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],force_line_plot=True)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],force_line_plot=True)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/SURFSPECALB/surfspecalb7nch1mlawer.c1.ipynb b/VAPs/quicklook/SURFSPECALB/surfspecalb7nch1mlawer.c1.ipynb new file mode 100644 index 00000000..11e217f4 --- /dev/null +++ b/VAPs/quicklook/SURFSPECALB/surfspecalb7nch1mlawer.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# SURFSPECALB7NCH1MLAWER.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/surfspecalb) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'surfspecalb7nch1mlawer'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2023-08-17', 'facility': 'C1', 'site': 'nsa', 'start_date': '2021-06-23'}, {'end_date': '2023-08-01', 'facility': 'C1', 'site': 'sgp', 'start_date': '2021-01-13'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2023-07-29'\n", + "date_end = '2023-07-31'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['be_surface_albedo_mfr_narrowband_10m', 'be_surface_albedo_psp_broadband_10m', 'estimated_spectral_albedo_10m']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'hemisp_narrowband_mfrsrC1'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'be_surface_albedo_mfr_narrowband_10m'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/SWFLUXANAL/.ipynb_checkpoints/1swfanalsirs1long.c1-checkpoint.ipynb b/VAPs/quicklook/SWFLUXANAL/.ipynb_checkpoints/1swfanalsirs1long.c1-checkpoint.ipynb new file mode 100644 index 00000000..108d3647 --- /dev/null +++ b/VAPs/quicklook/SWFLUXANAL/.ipynb_checkpoints/1swfanalsirs1long.c1-checkpoint.ipynb @@ -0,0 +1,2654 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# 1SWFANALSIRS1LONG.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/swfluxanal) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = '1swfanalsirs1long'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2015-05-18', 'facility': 'C1', 'site': 'sgp', 'start_date': '1997-03-25'}, {'end_date': '2011-10-15', 'facility': 'E10', 'site': 'sgp', 'start_date': '1997-02-16'}, {'end_date': '2015-05-18', 'facility': 'E11', 'site': 'sgp', 'start_date': '1995-09-26'}, {'end_date': '2015-05-26', 'facility': 'E12', 'site': 'sgp', 'start_date': '1996-01-21'}, {'end_date': '2015-05-18', 'facility': 'E13', 'site': 'sgp', 'start_date': '1994-01-07'}, {'end_date': '2015-05-18', 'facility': 'E15', 'site': 'sgp', 'start_date': '1994-03-31'}, {'end_date': '2011-11-09', 'facility': 'E16', 'site': 'sgp', 'start_date': '1995-09-22'}, {'end_date': '2009-11-06', 'facility': 'E18', 'site': 'sgp', 'start_date': '1996-06-20'}, {'end_date': '2011-05-21', 'facility': 'E19', 'site': 'sgp', 'start_date': '1998-07-21'}, {'end_date': '2009-05-07', 'facility': 'E1', 'site': 'sgp', 'start_date': '1995-11-16'}, {'end_date': '2011-11-14', 'facility': 'E20', 'site': 'sgp', 'start_date': '1995-04-02'}, {'end_date': '2015-05-11', 'facility': 'E21', 'site': 'sgp', 'start_date': '1999-09-13'}, {'end_date': '2009-11-29', 'facility': 'E22', 'site': 'sgp', 'start_date': '1995-11-09'}, {'end_date': '2009-11-06', 'facility': 'E24', 'site': 'sgp', 'start_date': '1995-11-08'}, {'end_date': '2002-04-03', 'facility': 'E25', 'site': 'sgp', 'start_date': '1997-11-19'}, {'end_date': '2009-07-15', 'facility': 'E27', 'site': 'sgp', 'start_date': '2003-05-16'}, {'end_date': '2009-10-18', 'facility': 'E2', 'site': 'sgp', 'start_date': '1996-04-02'}, {'end_date': '2015-05-12', 'facility': 'E31', 'site': 'sgp', 'start_date': '2011-10-13'}, {'end_date': '2015-05-18', 'facility': 'E32', 'site': 'sgp', 'start_date': '2012-02-05'}, {'end_date': '2015-05-18', 'facility': 'E33', 'site': 'sgp', 'start_date': '2011-08-26'}, {'end_date': '2015-05-26', 'facility': 'E34', 'site': 'sgp', 'start_date': '2011-09-04'}, {'end_date': '2015-05-18', 'facility': 'E35', 'site': 'sgp', 'start_date': '2011-10-06'}, {'end_date': '2015-05-18', 'facility': 'E36', 'site': 'sgp', 'start_date': '2011-09-29'}, {'end_date': '2015-05-18', 'facility': 'E37', 'site': 'sgp', 'start_date': '2011-09-30'}, {'end_date': '2015-05-27', 'facility': 'E38', 'site': 'sgp', 'start_date': '2011-09-05'}, {'end_date': '2009-08-30', 'facility': 'E3', 'site': 'sgp', 'start_date': '1996-03-07'}, {'end_date': '2011-09-25', 'facility': 'E4', 'site': 'sgp', 'start_date': '1996-03-07'}, {'end_date': '2009-10-31', 'facility': 'E5', 'site': 'sgp', 'start_date': '1996-06-17'}, {'end_date': '2011-10-15', 'facility': 'E6', 'site': 'sgp', 'start_date': '1996-03-07'}, {'end_date': '2011-11-12', 'facility': 'E7', 'site': 'sgp', 'start_date': '1995-10-20'}, {'end_date': '2009-11-04', 'facility': 'E8', 'site': 'sgp', 'start_date': '1995-09-29'}, {'end_date': '2015-05-26', 'facility': 'E9', 'site': 'sgp', 'start_date': '1994-01-19'}]" + ] + }, + { + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ac6764f5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The following locations and date ranges are available for this VAP:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sitefacilitystart_dateend_date
0sgpC11997-03-252015-05-18
1sgpE101997-02-162011-10-15
2sgpE111995-09-262015-05-18
3sgpE121996-01-212015-05-26
4sgpE131994-01-072015-05-18
5sgpE151994-03-312015-05-18
6sgpE161995-09-222011-11-09
7sgpE181996-06-202009-11-06
8sgpE191998-07-212011-05-21
9sgpE11995-11-162009-05-07
10sgpE201995-04-022011-11-14
11sgpE211999-09-132015-05-11
12sgpE221995-11-092009-11-29
13sgpE241995-11-082009-11-06
14sgpE251997-11-192002-04-03
15sgpE272003-05-162009-07-15
16sgpE21996-04-022009-10-18
17sgpE312011-10-132015-05-12
18sgpE322012-02-052015-05-18
19sgpE332011-08-262015-05-18
20sgpE342011-09-042015-05-26
21sgpE352011-10-062015-05-18
22sgpE362011-09-292015-05-18
23sgpE372011-09-302015-05-18
24sgpE382011-09-052015-05-27
25sgpE31996-03-072009-08-30
26sgpE41996-03-072011-09-25
27sgpE51996-06-172009-10-31
28sgpE61996-03-072011-10-15
29sgpE71995-10-202011-11-12
30sgpE81995-09-292009-11-04
31sgpE91994-01-192015-05-26
\n", + "
" + ], + "text/plain": [ + " site facility start_date end_date\n", + "0 sgp C1 1997-03-25 2015-05-18\n", + "1 sgp E10 1997-02-16 2011-10-15\n", + "2 sgp E11 1995-09-26 2015-05-18\n", + "3 sgp E12 1996-01-21 2015-05-26\n", + "4 sgp E13 1994-01-07 2015-05-18\n", + "5 sgp E15 1994-03-31 2015-05-18\n", + "6 sgp E16 1995-09-22 2011-11-09\n", + "7 sgp E18 1996-06-20 2009-11-06\n", + "8 sgp E19 1998-07-21 2011-05-21\n", + "9 sgp E1 1995-11-16 2009-05-07\n", + "10 sgp E20 1995-04-02 2011-11-14\n", + "11 sgp E21 1999-09-13 2015-05-11\n", + "12 sgp E22 1995-11-09 2009-11-29\n", + "13 sgp E24 1995-11-08 2009-11-06\n", + "14 sgp E25 1997-11-19 2002-04-03\n", + "15 sgp E27 2003-05-16 2009-07-15\n", + "16 sgp E2 1996-04-02 2009-10-18\n", + "17 sgp E31 2011-10-13 2015-05-12\n", + "18 sgp E32 2012-02-05 2015-05-18\n", + "19 sgp E33 2011-08-26 2015-05-18\n", + "20 sgp E34 2011-09-04 2015-05-26\n", + "21 sgp E35 2011-10-06 2015-05-18\n", + "22 sgp E36 2011-09-29 2015-05-18\n", + "23 sgp E37 2011-09-30 2015-05-18\n", + "24 sgp E38 2011-09-05 2015-05-27\n", + "25 sgp E3 1996-03-07 2009-08-30\n", + "26 sgp E4 1996-03-07 2011-09-25\n", + "27 sgp E5 1996-06-17 2009-10-31\n", + "28 sgp E6 1996-03-07 2011-10-15\n", + "29 sgp E7 1995-10-20 2011-11-12\n", + "30 sgp E8 1995-09-29 2009-11-04\n", + "31 sgp E9 1994-01-19 2015-05-26" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2015-05-15'\n", + "date_end = '2015-05-17'" + ] + }, + { + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/data/archive/sgp/sgp1swfanalsirs1longC1.c1'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['20150515', '20150516', '20150517']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "51feea2e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['/data/archive/sgp/sgp1swfanalsirs1longC1.c1/sgp1swfanalsirs1longC1.c1.20150515.112900.cdf',\n", + " '/data/archive/sgp/sgp1swfanalsirs1longC1.c1/sgp1swfanalsirs1longC1.c1.20150516.112800.cdf',\n", + " '/data/archive/sgp/sgp1swfanalsirs1longC1.c1/sgp1swfanalsirs1longC1.c1.20150517.112700.cdf']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "89 files loaded\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:                          (time: 838)\n",
+       "Coordinates:\n",
+       "  * time                             (time) timedelta64[ns] 00:00:00 ... 13:5...\n",
+       "Data variables: (12/47)\n",
+       "    base_time                        object ...\n",
+       "    time_offset                      (time) timedelta64[ns] dask.array<chunksize=(838,), meta=np.ndarray>\n",
+       "    base_time_LST                    object ...\n",
+       "    time_offset_LST                  (time) timedelta64[ns] dask.array<chunksize=(838,), meta=np.ndarray>\n",
+       "    site                             |S64 ...\n",
+       "    coef_date                        float64 ...\n",
+       "    ...                               ...\n",
+       "    qc_difswfluxdn                   (time) int16 dask.array<chunksize=(838,), meta=np.ndarray>\n",
+       "    qc_dirswfluxdn                   (time) int16 dask.array<chunksize=(838,), meta=np.ndarray>\n",
+       "    qc_sswfluxdn                     (time) int16 dask.array<chunksize=(838,), meta=np.ndarray>\n",
+       "    lat                              float32 ...\n",
+       "    lon                              float32 ...\n",
+       "    alt                              float32 ...\n",
+       "Attributes: (12/14)\n",
+       "    Date:                      Sat Jun 20 17:08:21 GMT 2015\n",
+       "    Fitmode:                   01\n",
+       "    Version:                   $State: vap-swfanal1long-3.12-0.sol5_10$\n",
+       "    Number_Input_Platforms:    1\n",
+       "    Input_Platforms:           sgpsirsC1.b1\n",
+       "    Input_Platforms_Versions:  /usr/lib/ld.so.1\n",
+       "    ...                        ...\n",
+       "    comment:                   fitmode=01 indicates a daily fit, fitmode=00 i...\n",
+       "    _file_dates:               ['20150515']\n",
+       "    _file_times:               ['112900']\n",
+       "    datastream:                sgp1swfanalsirs1longC1.c1\n",
+       "    _datastream:               sgp1swfanalsirs1longC1.c1\n",
+       "    _arm_standards_flag:       1
" + ], + "text/plain": [ + "\n", + "Dimensions: (time: 838)\n", + "Coordinates:\n", + " * time (time) timedelta64[ns] 00:00:00 ... 13:5...\n", + "Data variables: (12/47)\n", + " base_time object ...\n", + " time_offset (time) timedelta64[ns] dask.array\n", + " base_time_LST object ...\n", + " time_offset_LST (time) timedelta64[ns] dask.array\n", + " site |S64 ...\n", + " coef_date float64 ...\n", + " ... ...\n", + " qc_difswfluxdn (time) int16 dask.array\n", + " qc_dirswfluxdn (time) int16 dask.array\n", + " qc_sswfluxdn (time) int16 dask.array\n", + " lat float32 ...\n", + " lon float32 ...\n", + " alt float32 ...\n", + "Attributes: (12/14)\n", + " Date: Sat Jun 20 17:08:21 GMT 2015\n", + " Fitmode: 01\n", + " Version: $State: vap-swfanal1long-3.12-0.sol5_10$\n", + " Number_Input_Platforms: 1\n", + " Input_Platforms: sgpsirsC1.b1\n", + " Input_Platforms_Versions: /usr/lib/ld.so.1\n", + " ... ...\n", + " comment: fitmode=01 indicates a daily fit, fitmode=00 i...\n", + " _file_dates: ['20150515']\n", + " _file_times: ['112900']\n", + " datastream: sgp1swfanalsirs1longC1.c1\n", + " _datastream: sgp1swfanalsirs1longC1.c1\n", + " _arm_standards_flag: 1" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter [0]\n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['gswfluxdn_measured', 'gswfluxdn_clearskyfit', 'difswfluxdn_measured']" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/kefeimo/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/coding/variables.py:147: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison\n", + " condition |= data == fv\n" + ] + }, + { + "ename": "OverflowError", + "evalue": "int too big to convert", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mOverflowError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/IPython/core/formatters.py:972\u001b[0m, in \u001b[0;36mMimeBundleFormatter.__call__\u001b[0;34m(self, obj, include, exclude)\u001b[0m\n\u001b[1;32m 969\u001b[0m method \u001b[38;5;241m=\u001b[39m get_real_method(obj, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprint_method)\n\u001b[1;32m 971\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m method \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 972\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\u001b[43minclude\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minclude\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexclude\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mexclude\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 973\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 974\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/ipympl/backend_nbagg.py:336\u001b[0m, in \u001b[0;36mCanvas._repr_mimebundle_\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 333\u001b[0m plaintext \u001b[38;5;241m=\u001b[39m plaintext[:\u001b[38;5;241m110\u001b[39m] \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m…\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 335\u001b[0m buf \u001b[38;5;241m=\u001b[39m io\u001b[38;5;241m.\u001b[39mBytesIO()\n\u001b[0;32m--> 336\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfigure\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msavefig\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbuf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mformat\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mpng\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdpi\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mfigure\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 338\u001b[0m base64_image \u001b[38;5;241m=\u001b[39m b64encode(buf\u001b[38;5;241m.\u001b[39mgetvalue())\u001b[38;5;241m.\u001b[39mdecode(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mutf-8\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 339\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_data_url \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdata:image/png;base64,\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mbase64_image\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/figure.py:3343\u001b[0m, in \u001b[0;36mFigure.savefig\u001b[0;34m(self, fname, transparent, **kwargs)\u001b[0m\n\u001b[1;32m 3339\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m ax \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maxes:\n\u001b[1;32m 3340\u001b[0m stack\u001b[38;5;241m.\u001b[39menter_context(\n\u001b[1;32m 3341\u001b[0m ax\u001b[38;5;241m.\u001b[39mpatch\u001b[38;5;241m.\u001b[39m_cm_set(facecolor\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnone\u001b[39m\u001b[38;5;124m'\u001b[39m, edgecolor\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnone\u001b[39m\u001b[38;5;124m'\u001b[39m))\n\u001b[0;32m-> 3343\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcanvas\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprint_figure\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/backend_bases.py:2366\u001b[0m, in \u001b[0;36mFigureCanvasBase.print_figure\u001b[0;34m(self, filename, dpi, facecolor, edgecolor, orientation, format, bbox_inches, pad_inches, bbox_extra_artists, backend, **kwargs)\u001b[0m\n\u001b[1;32m 2362\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 2363\u001b[0m \u001b[38;5;66;03m# _get_renderer may change the figure dpi (as vector formats\u001b[39;00m\n\u001b[1;32m 2364\u001b[0m \u001b[38;5;66;03m# force the figure dpi to 72), so we need to set it again here.\u001b[39;00m\n\u001b[1;32m 2365\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m cbook\u001b[38;5;241m.\u001b[39m_setattr_cm(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfigure, dpi\u001b[38;5;241m=\u001b[39mdpi):\n\u001b[0;32m-> 2366\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mprint_method\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2367\u001b[0m \u001b[43m \u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2368\u001b[0m \u001b[43m \u001b[49m\u001b[43mfacecolor\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfacecolor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2369\u001b[0m \u001b[43m \u001b[49m\u001b[43medgecolor\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43medgecolor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2370\u001b[0m \u001b[43m \u001b[49m\u001b[43morientation\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43morientation\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2371\u001b[0m \u001b[43m \u001b[49m\u001b[43mbbox_inches_restore\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_bbox_inches_restore\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2372\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2373\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 2374\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m bbox_inches \u001b[38;5;129;01mand\u001b[39;00m restore_bbox:\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/backend_bases.py:2232\u001b[0m, in \u001b[0;36mFigureCanvasBase._switch_canvas_and_return_print_method..\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 2228\u001b[0m optional_kws \u001b[38;5;241m=\u001b[39m { \u001b[38;5;66;03m# Passed by print_figure for other renderers.\u001b[39;00m\n\u001b[1;32m 2229\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdpi\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfacecolor\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124medgecolor\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124morientation\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 2230\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbbox_inches_restore\u001b[39m\u001b[38;5;124m\"\u001b[39m}\n\u001b[1;32m 2231\u001b[0m skip \u001b[38;5;241m=\u001b[39m optional_kws \u001b[38;5;241m-\u001b[39m {\u001b[38;5;241m*\u001b[39minspect\u001b[38;5;241m.\u001b[39msignature(meth)\u001b[38;5;241m.\u001b[39mparameters}\n\u001b[0;32m-> 2232\u001b[0m print_method \u001b[38;5;241m=\u001b[39m functools\u001b[38;5;241m.\u001b[39mwraps(meth)(\u001b[38;5;28;01mlambda\u001b[39;00m \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: \u001b[43mmeth\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2233\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m{\u001b[49m\u001b[43mk\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mv\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mk\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mv\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mitems\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mk\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mskip\u001b[49m\u001b[43m}\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 2234\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m: \u001b[38;5;66;03m# Let third-parties do as they see fit.\u001b[39;00m\n\u001b[1;32m 2235\u001b[0m print_method \u001b[38;5;241m=\u001b[39m meth\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/backends/backend_agg.py:509\u001b[0m, in \u001b[0;36mFigureCanvasAgg.print_png\u001b[0;34m(self, filename_or_obj, metadata, pil_kwargs)\u001b[0m\n\u001b[1;32m 462\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mprint_png\u001b[39m(\u001b[38;5;28mself\u001b[39m, filename_or_obj, \u001b[38;5;241m*\u001b[39m, metadata\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, pil_kwargs\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[1;32m 463\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 464\u001b[0m \u001b[38;5;124;03m Write the figure to a PNG file.\u001b[39;00m\n\u001b[1;32m 465\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 507\u001b[0m \u001b[38;5;124;03m *metadata*, including the default 'Software' key.\u001b[39;00m\n\u001b[1;32m 508\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 509\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_print_pil\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilename_or_obj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpng\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpil_kwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmetadata\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/backends/backend_agg.py:457\u001b[0m, in \u001b[0;36mFigureCanvasAgg._print_pil\u001b[0;34m(self, filename_or_obj, fmt, pil_kwargs, metadata)\u001b[0m\n\u001b[1;32m 452\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_print_pil\u001b[39m(\u001b[38;5;28mself\u001b[39m, filename_or_obj, fmt, pil_kwargs, metadata\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[1;32m 453\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 454\u001b[0m \u001b[38;5;124;03m Draw the canvas, then save it using `.image.imsave` (to which\u001b[39;00m\n\u001b[1;32m 455\u001b[0m \u001b[38;5;124;03m *pil_kwargs* and *metadata* are forwarded).\u001b[39;00m\n\u001b[1;32m 456\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 457\u001b[0m \u001b[43mFigureCanvasAgg\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdraw\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 458\u001b[0m mpl\u001b[38;5;241m.\u001b[39mimage\u001b[38;5;241m.\u001b[39mimsave(\n\u001b[1;32m 459\u001b[0m filename_or_obj, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbuffer_rgba(), \u001b[38;5;28mformat\u001b[39m\u001b[38;5;241m=\u001b[39mfmt, origin\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mupper\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 460\u001b[0m dpi\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfigure\u001b[38;5;241m.\u001b[39mdpi, metadata\u001b[38;5;241m=\u001b[39mmetadata, pil_kwargs\u001b[38;5;241m=\u001b[39mpil_kwargs)\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/backends/backend_agg.py:400\u001b[0m, in \u001b[0;36mFigureCanvasAgg.draw\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 396\u001b[0m \u001b[38;5;66;03m# Acquire a lock on the shared font cache.\u001b[39;00m\n\u001b[1;32m 397\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m RendererAgg\u001b[38;5;241m.\u001b[39mlock, \\\n\u001b[1;32m 398\u001b[0m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtoolbar\u001b[38;5;241m.\u001b[39m_wait_cursor_for_draw_cm() \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtoolbar\n\u001b[1;32m 399\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m nullcontext()):\n\u001b[0;32m--> 400\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfigure\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdraw\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrenderer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 401\u001b[0m \u001b[38;5;66;03m# A GUI class may be need to update a window using this draw, so\u001b[39;00m\n\u001b[1;32m 402\u001b[0m \u001b[38;5;66;03m# don't forget to call the superclass.\u001b[39;00m\n\u001b[1;32m 403\u001b[0m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39mdraw()\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/artist.py:95\u001b[0m, in \u001b[0;36m_finalize_rasterization..draw_wrapper\u001b[0;34m(artist, renderer, *args, **kwargs)\u001b[0m\n\u001b[1;32m 93\u001b[0m \u001b[38;5;129m@wraps\u001b[39m(draw)\n\u001b[1;32m 94\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdraw_wrapper\u001b[39m(artist, renderer, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m---> 95\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mdraw\u001b[49m\u001b[43m(\u001b[49m\u001b[43martist\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrenderer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 96\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m renderer\u001b[38;5;241m.\u001b[39m_rasterizing:\n\u001b[1;32m 97\u001b[0m renderer\u001b[38;5;241m.\u001b[39mstop_rasterizing()\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/artist.py:72\u001b[0m, in \u001b[0;36mallow_rasterization..draw_wrapper\u001b[0;34m(artist, renderer)\u001b[0m\n\u001b[1;32m 69\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m artist\u001b[38;5;241m.\u001b[39mget_agg_filter() \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 70\u001b[0m renderer\u001b[38;5;241m.\u001b[39mstart_filter()\n\u001b[0;32m---> 72\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mdraw\u001b[49m\u001b[43m(\u001b[49m\u001b[43martist\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrenderer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 73\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 74\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m artist\u001b[38;5;241m.\u001b[39mget_agg_filter() \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/figure.py:3140\u001b[0m, in \u001b[0;36mFigure.draw\u001b[0;34m(self, renderer)\u001b[0m\n\u001b[1;32m 3137\u001b[0m \u001b[38;5;66;03m# ValueError can occur when resizing a window.\u001b[39;00m\n\u001b[1;32m 3139\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpatch\u001b[38;5;241m.\u001b[39mdraw(renderer)\n\u001b[0;32m-> 3140\u001b[0m \u001b[43mmimage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_draw_list_compositing_images\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3141\u001b[0m \u001b[43m \u001b[49m\u001b[43mrenderer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43martists\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msuppressComposite\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3143\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m sfig \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msubfigs:\n\u001b[1;32m 3144\u001b[0m sfig\u001b[38;5;241m.\u001b[39mdraw(renderer)\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/image.py:131\u001b[0m, in \u001b[0;36m_draw_list_compositing_images\u001b[0;34m(renderer, parent, artists, suppress_composite)\u001b[0m\n\u001b[1;32m 129\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m not_composite \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m has_images:\n\u001b[1;32m 130\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m a \u001b[38;5;129;01min\u001b[39;00m artists:\n\u001b[0;32m--> 131\u001b[0m \u001b[43ma\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdraw\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrenderer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 132\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 133\u001b[0m \u001b[38;5;66;03m# Composite any adjacent images together\u001b[39;00m\n\u001b[1;32m 134\u001b[0m image_group \u001b[38;5;241m=\u001b[39m []\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/artist.py:72\u001b[0m, in \u001b[0;36mallow_rasterization..draw_wrapper\u001b[0;34m(artist, renderer)\u001b[0m\n\u001b[1;32m 69\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m artist\u001b[38;5;241m.\u001b[39mget_agg_filter() \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 70\u001b[0m renderer\u001b[38;5;241m.\u001b[39mstart_filter()\n\u001b[0;32m---> 72\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mdraw\u001b[49m\u001b[43m(\u001b[49m\u001b[43martist\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrenderer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 73\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 74\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m artist\u001b[38;5;241m.\u001b[39mget_agg_filter() \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/axes/_base.py:3064\u001b[0m, in \u001b[0;36m_AxesBase.draw\u001b[0;34m(self, renderer)\u001b[0m\n\u001b[1;32m 3061\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m artists_rasterized:\n\u001b[1;32m 3062\u001b[0m _draw_rasterized(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfigure, artists_rasterized, renderer)\n\u001b[0;32m-> 3064\u001b[0m \u001b[43mmimage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_draw_list_compositing_images\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3065\u001b[0m \u001b[43m \u001b[49m\u001b[43mrenderer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43martists\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfigure\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msuppressComposite\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3067\u001b[0m renderer\u001b[38;5;241m.\u001b[39mclose_group(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124maxes\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 3068\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstale \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/image.py:131\u001b[0m, in \u001b[0;36m_draw_list_compositing_images\u001b[0;34m(renderer, parent, artists, suppress_composite)\u001b[0m\n\u001b[1;32m 129\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m not_composite \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m has_images:\n\u001b[1;32m 130\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m a \u001b[38;5;129;01min\u001b[39;00m artists:\n\u001b[0;32m--> 131\u001b[0m \u001b[43ma\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdraw\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrenderer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 132\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 133\u001b[0m \u001b[38;5;66;03m# Composite any adjacent images together\u001b[39;00m\n\u001b[1;32m 134\u001b[0m image_group \u001b[38;5;241m=\u001b[39m []\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/artist.py:72\u001b[0m, in \u001b[0;36mallow_rasterization..draw_wrapper\u001b[0;34m(artist, renderer)\u001b[0m\n\u001b[1;32m 69\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m artist\u001b[38;5;241m.\u001b[39mget_agg_filter() \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 70\u001b[0m renderer\u001b[38;5;241m.\u001b[39mstart_filter()\n\u001b[0;32m---> 72\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mdraw\u001b[49m\u001b[43m(\u001b[49m\u001b[43martist\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrenderer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 73\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 74\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m artist\u001b[38;5;241m.\u001b[39mget_agg_filter() \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/axis.py:1376\u001b[0m, in \u001b[0;36mAxis.draw\u001b[0;34m(self, renderer, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1373\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[1;32m 1374\u001b[0m renderer\u001b[38;5;241m.\u001b[39mopen_group(\u001b[38;5;18m__name__\u001b[39m, gid\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_gid())\n\u001b[0;32m-> 1376\u001b[0m ticks_to_draw \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_update_ticks\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1377\u001b[0m tlb1, tlb2 \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_ticklabel_bboxes(ticks_to_draw, renderer)\n\u001b[1;32m 1379\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m tick \u001b[38;5;129;01min\u001b[39;00m ticks_to_draw:\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/axis.py:1263\u001b[0m, in \u001b[0;36mAxis._update_ticks\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1258\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1259\u001b[0m \u001b[38;5;124;03mUpdate ticks (position and labels) using the current data interval of\u001b[39;00m\n\u001b[1;32m 1260\u001b[0m \u001b[38;5;124;03mthe axes. Return the list of ticks that will be drawn.\u001b[39;00m\n\u001b[1;32m 1261\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1262\u001b[0m major_locs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_majorticklocs()\n\u001b[0;32m-> 1263\u001b[0m major_labels \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmajor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mformatter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mformat_ticks\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmajor_locs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1264\u001b[0m major_ticks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_major_ticks(\u001b[38;5;28mlen\u001b[39m(major_locs))\n\u001b[1;32m 1265\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmajor\u001b[38;5;241m.\u001b[39mformatter\u001b[38;5;241m.\u001b[39mset_locs(major_locs)\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/ticker.py:218\u001b[0m, in \u001b[0;36mFormatter.format_ticks\u001b[0;34m(self, values)\u001b[0m\n\u001b[1;32m 216\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Return the tick labels for all the ticks at once.\"\"\"\u001b[39;00m\n\u001b[1;32m 217\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mset_locs(values)\n\u001b[0;32m--> 218\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m [\u001b[38;5;28mself\u001b[39m(value, i) \u001b[38;5;28;01mfor\u001b[39;00m i, value \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(values)]\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/ticker.py:218\u001b[0m, in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 216\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Return the tick labels for all the ticks at once.\"\"\"\u001b[39;00m\n\u001b[1;32m 217\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mset_locs(values)\n\u001b[0;32m--> 218\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m [\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mi\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m i, value \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(values)]\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/dates.py:651\u001b[0m, in \u001b[0;36mDateFormatter.__call__\u001b[0;34m(self, x, pos)\u001b[0m\n\u001b[1;32m 650\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, x, pos\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m):\n\u001b[0;32m--> 651\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mnum2date\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtz\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mstrftime(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfmt)\n\u001b[1;32m 652\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _wrap_in_tex(result) \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_usetex \u001b[38;5;28;01melse\u001b[39;00m result\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/dates.py:544\u001b[0m, in \u001b[0;36mnum2date\u001b[0;34m(x, tz)\u001b[0m\n\u001b[1;32m 518\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 519\u001b[0m \u001b[38;5;124;03mConvert Matplotlib dates to `~datetime.datetime` objects.\u001b[39;00m\n\u001b[1;32m 520\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 541\u001b[0m \u001b[38;5;124;03mFor details, see the module docstring.\u001b[39;00m\n\u001b[1;32m 542\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 543\u001b[0m tz \u001b[38;5;241m=\u001b[39m _get_tzinfo(tz)\n\u001b[0;32m--> 544\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_from_ordinalf_np_vectorized\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtz\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mtolist()\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/numpy/lib/function_base.py:2329\u001b[0m, in \u001b[0;36mvectorize.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 2326\u001b[0m vargs \u001b[38;5;241m=\u001b[39m [args[_i] \u001b[38;5;28;01mfor\u001b[39;00m _i \u001b[38;5;129;01min\u001b[39;00m inds]\n\u001b[1;32m 2327\u001b[0m vargs\u001b[38;5;241m.\u001b[39mextend([kwargs[_n] \u001b[38;5;28;01mfor\u001b[39;00m _n \u001b[38;5;129;01min\u001b[39;00m names])\n\u001b[0;32m-> 2329\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_vectorize_call\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mvargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/numpy/lib/function_base.py:2412\u001b[0m, in \u001b[0;36mvectorize._vectorize_call\u001b[0;34m(self, func, args)\u001b[0m\n\u001b[1;32m 2409\u001b[0m \u001b[38;5;66;03m# Convert args to object arrays first\u001b[39;00m\n\u001b[1;32m 2410\u001b[0m inputs \u001b[38;5;241m=\u001b[39m [asanyarray(a, dtype\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mobject\u001b[39m) \u001b[38;5;28;01mfor\u001b[39;00m a \u001b[38;5;129;01min\u001b[39;00m args]\n\u001b[0;32m-> 2412\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[43mufunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2414\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ufunc\u001b[38;5;241m.\u001b[39mnout \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m 2415\u001b[0m res \u001b[38;5;241m=\u001b[39m asanyarray(outputs, dtype\u001b[38;5;241m=\u001b[39motypes[\u001b[38;5;241m0\u001b[39m])\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/dates.py:359\u001b[0m, in \u001b[0;36m_from_ordinalf\u001b[0;34m(x, tz)\u001b[0m\n\u001b[1;32m 346\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 347\u001b[0m \u001b[38;5;124;03mConvert Gregorian float of the date, preserving hours, minutes,\u001b[39;00m\n\u001b[1;32m 348\u001b[0m \u001b[38;5;124;03mseconds and microseconds. Return value is a `.datetime`.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 353\u001b[0m \u001b[38;5;124;03m:rc:`timezone`.\u001b[39;00m\n\u001b[1;32m 354\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 356\u001b[0m tz \u001b[38;5;241m=\u001b[39m _get_tzinfo(tz)\n\u001b[1;32m 358\u001b[0m dt \u001b[38;5;241m=\u001b[39m (np\u001b[38;5;241m.\u001b[39mdatetime64(get_epoch()) \u001b[38;5;241m+\u001b[39m\n\u001b[0;32m--> 359\u001b[0m \u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtimedelta64\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mint\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mround\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mMUSECONDS_PER_DAY\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mus\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 360\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m dt \u001b[38;5;241m<\u001b[39m np\u001b[38;5;241m.\u001b[39mdatetime64(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m0001-01-01\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m dt \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mdatetime64(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m10000-01-01\u001b[39m\u001b[38;5;124m'\u001b[39m):\n\u001b[1;32m 361\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mDate ordinal \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mx\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m converts to \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdt\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m (using \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 362\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mepoch \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mget_epoch()\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m), but Matplotlib dates must be \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 363\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbetween year 0001 and 9999.\u001b[39m\u001b[38;5;124m'\u001b[39m)\n", + "\u001b[0;31mOverflowError\u001b[0m: int too big to convert" + ] + }, + { + "data": { + "text/plain": [ + "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous view', 'arrow-left', 'back'), ('Forward', 'Forward to next view', 'arrow-right', 'forward'), ('Pan', 'Left button pans, Right button zooms\\nx/y fixes axis, CTRL fixes aspect', 'arrows', 'pan'), ('Zoom', 'Zoom to rectangle\\nx/y fixes axis', 'square-o', 'zoom'), ('Download', 'Download plot', 'floppy-o', 'save_figure')]))" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'gswfluxdn'" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "'gswfluxdn'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/dataset.py:1348\u001b[0m, in \u001b[0;36mDataset._construct_dataarray\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 1347\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1348\u001b[0m variable \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_variables\u001b[49m\u001b[43m[\u001b[49m\u001b[43mname\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 1349\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n", + "\u001b[0;31mKeyError\u001b[0m: 'gswfluxdn'", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[12], line 7\u001b[0m\n\u001b[1;32m 5\u001b[0m qc_display \u001b[38;5;241m=\u001b[39m act\u001b[38;5;241m.\u001b[39mplotting\u001b[38;5;241m.\u001b[39mTimeSeriesDisplay(ds)\n\u001b[1;32m 6\u001b[0m qc_display\u001b[38;5;241m.\u001b[39madd_subplots((\u001b[38;5;241m2\u001b[39m,), figsize \u001b[38;5;241m=\u001b[39m (\u001b[38;5;241m9.5\u001b[39m,\u001b[38;5;241m10\u001b[39m))\n\u001b[0;32m----> 7\u001b[0m qc_ax \u001b[38;5;241m=\u001b[39m \u001b[43mqc_display\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mplot\u001b[49m\u001b[43m(\u001b[49m\u001b[43mqc_variable\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msubplot_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mset_title\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mQC results on field: \u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mqc_variable\u001b[49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 8\u001b[0m qc_ax\u001b[38;5;241m.\u001b[39mgrid()\n\u001b[1;32m 9\u001b[0m qc_display\u001b[38;5;241m.\u001b[39mqc_flag_block_plot(qc_variable, subplot_index\u001b[38;5;241m=\u001b[39m(\u001b[38;5;241m1\u001b[39m,))\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/plotting/timeseriesdisplay.py:418\u001b[0m, in \u001b[0;36mTimeSeriesDisplay.plot\u001b[0;34m(self, field, dsname, subplot_index, cmap, set_title, add_nan, day_night_background, invert_y_axis, abs_limits, time_rng, y_rng, use_var_for_y, set_shading, assessment_overplot, overplot_marker, overplot_behind, overplot_markersize, assessment_overplot_category, assessment_overplot_category_color, force_line_plot, labels, cbar_label, cbar_h_adjust, secondary_y, y_axis_flag_meanings, colorbar_labels, cb_friendly, **kwargs)\u001b[0m\n\u001b[1;32m 415\u001b[0m assessment_overplot_category_color[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mAcceptable\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m (\u001b[38;5;241m0.0\u001b[39m, \u001b[38;5;241m0.4240129715562796\u001b[39m, \u001b[38;5;241m0.4240129715562796\u001b[39m),\n\u001b[1;32m 417\u001b[0m \u001b[38;5;66;03m# Get data and dimensions\u001b[39;00m\n\u001b[0;32m--> 418\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_obj\u001b[49m\u001b[43m[\u001b[49m\u001b[43mdsname\u001b[49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[43mfield\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 419\u001b[0m dim \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_obj[dsname][field]\u001b[38;5;241m.\u001b[39mdims)\n\u001b[1;32m 420\u001b[0m xdata \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_obj[dsname][dim[\u001b[38;5;241m0\u001b[39m]]\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/dataset.py:1439\u001b[0m, in \u001b[0;36mDataset.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1437\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39misel(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkey)\n\u001b[1;32m 1438\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m utils\u001b[38;5;241m.\u001b[39mhashable(key):\n\u001b[0;32m-> 1439\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_construct_dataarray\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1440\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m utils\u001b[38;5;241m.\u001b[39miterable_of_hashable(key):\n\u001b[1;32m 1441\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_copy_listed(key)\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/dataset.py:1350\u001b[0m, in \u001b[0;36mDataset._construct_dataarray\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 1348\u001b[0m variable \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_variables[name]\n\u001b[1;32m 1349\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[0;32m-> 1350\u001b[0m _, name, variable \u001b[38;5;241m=\u001b[39m \u001b[43m_get_virtual_variable\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_variables\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdims\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1352\u001b[0m needed_dims \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m(variable\u001b[38;5;241m.\u001b[39mdims)\n\u001b[1;32m 1354\u001b[0m coords: \u001b[38;5;28mdict\u001b[39m[Hashable, Variable] \u001b[38;5;241m=\u001b[39m {}\n", + "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/dataset.py:186\u001b[0m, in \u001b[0;36m_get_virtual_variable\u001b[0;34m(variables, key, dim_sizes)\u001b[0m\n\u001b[1;32m 184\u001b[0m split_key \u001b[38;5;241m=\u001b[39m key\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m 185\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(split_key) \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m2\u001b[39m:\n\u001b[0;32m--> 186\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key)\n\u001b[1;32m 188\u001b[0m ref_name, var_name \u001b[38;5;241m=\u001b[39m split_key\n\u001b[1;32m 189\u001b[0m ref_var \u001b[38;5;241m=\u001b[39m variables[ref_name]\n", + "\u001b[0;31mKeyError\u001b[0m: 'gswfluxdn'" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "fd79e0ba342048999f2bf386b4218b60", + "version_major": 2, + "version_minor": 0 + }, + "image/png": "", + "text/html": [ + "\n", + "
\n", + "
\n", + " Figure\n", + "
\n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'gswfluxdn_measured'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/TBSMERGED/TBSMERGED_tutorial.ipynb b/VAPs/quicklook/TBSMERGED/TBSMERGED_tutorial.ipynb new file mode 100644 index 00000000..5ffca6bd --- /dev/null +++ b/VAPs/quicklook/TBSMERGED/TBSMERGED_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# TBSMERGED.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/tbsmerged) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using tbsmerged as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `tbsmerged.c1`, where `tbsmerged` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `guc` and facility `S4`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/guc/guctbsmergedS4.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"tbsmerged\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"guc\"\n", + "facility = \"S4\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/TBSMERGED/tbsmerged.c1.ipynb b/VAPs/quicklook/TBSMERGED/tbsmerged.c1.ipynb new file mode 100644 index 00000000..e0216b35 --- /dev/null +++ b/VAPs/quicklook/TBSMERGED/tbsmerged.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# TBSMERGED.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/tbsmerged) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'tbsmerged'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2023-06-16', 'facility': 'S4', 'site': 'guc', 'start_date': '2023-01-21'}, {'end_date': '2022-09-14', 'facility': 'S3', 'site': 'hou', 'start_date': '2022-06-03'}, {'end_date': '2023-07-16', 'facility': 'C1', 'site': 'sgp', 'start_date': '2019-04-25'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2023-07-14'\n", + "date_end = '2023-07-16'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['tbscpc_total_concentration', 'tbsimet_air_temperature', 'tbsimet_rh']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'tbscpc_total_concentration'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'tbscpc_total_concentration'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/TBSMERGED/tbsmergedincloud.c1.ipynb b/VAPs/quicklook/TBSMERGED/tbsmergedincloud.c1.ipynb new file mode 100644 index 00000000..97b44116 --- /dev/null +++ b/VAPs/quicklook/TBSMERGED/tbsmergedincloud.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# TBSMERGEDINCLOUD.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/tbsmerged) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'tbsmergedincloud'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2020-11-20', 'facility': 'M1', 'site': 'oli', 'start_date': '2017-04-09'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'oli', 'M1' )\n", + "\n", + "date_start = '2020-11-18'\n", + "date_end = '2020-11-20'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['bl_height_1', 'first_cbh', 'tbscpc_total_concentration']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'bl_height_1'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'bl_height_1'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/TDMA/TDMA_tutorial.ipynb b/VAPs/quicklook/TDMA/TDMA_tutorial.ipynb new file mode 100644 index 00000000..a4748c35 --- /dev/null +++ b/VAPs/quicklook/TDMA/TDMA_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# TDMAAPSSIZE.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/tdma) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using tdmaapssize as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `tdmaapssize.c1`, where `tdmaapssize` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `sgp` and facility `C1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/sgp/sgptdmaapssizeC1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"tdmaapssize\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"sgp\"\n", + "facility = \"C1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/TDMA/tdmaapssize.c1.ipynb b/VAPs/quicklook/TDMA/tdmaapssize.c1.ipynb new file mode 100644 index 00000000..02a9cff9 --- /dev/null +++ b/VAPs/quicklook/TDMA/tdmaapssize.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# TDMAAPSSIZE.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/tdma) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'tdmaapssize'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2014-11-20', 'facility': 'C1', 'site': 'sgp', 'start_date': '2010-01-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2014-11-18'\n", + "date_end = '2014-11-20'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['number_concentration_DMA_APS', 'integrated_number_concentration_DMA']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'number_concentration_DMA_APS'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'number_concentration_DMA_APS'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/TWRMR/1twrmr.c1.ipynb b/VAPs/quicklook/TWRMR/1twrmr.c1.ipynb new file mode 100644 index 00000000..7a36af24 --- /dev/null +++ b/VAPs/quicklook/TWRMR/1twrmr.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# 1TWRMR.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/twrmr) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = '1twrmr'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2023-12-17', 'facility': 'C1', 'site': 'sgp', 'start_date': '1998-04-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2023-12-15'\n", + "date_end = '2023-12-17'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['pres_02m', 'pres_25m', 'pres_60m']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'pres_02m'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'pres_02m'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/TWRMR/30twrmr.c1.ipynb b/VAPs/quicklook/TWRMR/30twrmr.c1.ipynb new file mode 100644 index 00000000..92c0c433 --- /dev/null +++ b/VAPs/quicklook/TWRMR/30twrmr.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# 30TWRMR.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/twrmr) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = '30twrmr'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2009-09-29', 'facility': 'C1', 'site': 'sgp', 'start_date': '1998-04-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2009-09-28'\n", + "date_end = '2009-09-29'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['pres_02m', 'pres_25m', 'pres_60m']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'pres_02m'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/TWRMR/TWRMR_tutorial.ipynb b/VAPs/quicklook/TWRMR/TWRMR_tutorial.ipynb new file mode 100644 index 00000000..e13c8a9c --- /dev/null +++ b/VAPs/quicklook/TWRMR/TWRMR_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# 1TWRMR.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/twrmr) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using 1twrmr as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `1twrmr.c1`, where `1twrmr` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `sgp` and facility `C1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/sgp/sgp1twrmrC1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"1twrmr\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"sgp\"\n", + "facility = \"C1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/VARANAL/180varanaecmwf.c1.ipynb b/VAPs/quicklook/VARANAL/180varanaecmwf.c1.ipynb new file mode 100644 index 00000000..96a3022c --- /dev/null +++ b/VAPs/quicklook/VARANAL/180varanaecmwf.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# 180VARANAECMWF.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/varanal) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = '180varanaecmwf'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2016-05-31', 'facility': 'M1', 'site': 'awr', 'start_date': '2016-01-01'}, {'end_date': '2018-02-28', 'facility': 'C1', 'site': 'ena', 'start_date': '2017-06-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'awr', 'M1' )\n", + "\n", + "date_start = '2016-04-29'\n", + "date_end = '2016-05-01'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['T_adv_h', 'T_adv_v', 'q_adv_h']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'T_adv_h'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/VARANAL/180varanamerra001.c1.ipynb b/VAPs/quicklook/VARANAL/180varanamerra001.c1.ipynb new file mode 100644 index 00000000..a523a3bc --- /dev/null +++ b/VAPs/quicklook/VARANAL/180varanamerra001.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# 180VARANAMERRA001.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/varanal) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = '180varanamerra001'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2008-11-30', 'facility': 'M1', 'site': 'hfe', 'start_date': '2008-11-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'hfe', 'M1' )\n", + "\n", + "date_start = '2008-10-30'\n", + "date_end = '2008-11-01'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['year', 'month', 'day']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'year'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/VARANAL/VARANAL_tutorial.ipynb b/VAPs/quicklook/VARANAL/VARANAL_tutorial.ipynb new file mode 100644 index 00000000..622bb767 --- /dev/null +++ b/VAPs/quicklook/VARANAL/VARANAL_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# 180VARANAECMWF.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/varanal) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using 180varanaecmwf as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `180varanaecmwf.c1`, where `180varanaecmwf` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `awr` and facility `M1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/awr/awr180varanaecmwfM1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"180varanaecmwf\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"awr\"\n", + "facility = \"M1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/VARANAL3D/180varanal3dera5.c1.ipynb b/VAPs/quicklook/VARANAL3D/180varanal3dera5.c1.ipynb new file mode 100644 index 00000000..662c5dee --- /dev/null +++ b/VAPs/quicklook/VARANAL3D/180varanal3dera5.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# 180VARANAL3DERA5.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/varanal3d) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = '180varanal3dera5'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2011-06-06', 'facility': 'C1', 'site': 'sgp', 'start_date': '2011-04-22'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2011-04-20'\n", + "date_end = '2011-04-22'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['u', 'v', 'T']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'u'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/VARANAL3D/180varanal3dncep.c1.ipynb b/VAPs/quicklook/VARANAL3D/180varanal3dncep.c1.ipynb new file mode 100644 index 00000000..8879ec8a --- /dev/null +++ b/VAPs/quicklook/VARANAL3D/180varanal3dncep.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# 180VARANAL3DNCEP.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/varanal3d) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = '180varanal3dncep'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2011-06-06', 'facility': 'C1', 'site': 'sgp', 'start_date': '2011-04-22'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2011-04-20'\n", + "date_end = '2011-04-22'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['u', 'v', 'T']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'u'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/VARANAL3D/VARANAL3D_tutorial.ipynb b/VAPs/quicklook/VARANAL3D/VARANAL3D_tutorial.ipynb new file mode 100644 index 00000000..5dfa64cf --- /dev/null +++ b/VAPs/quicklook/VARANAL3D/VARANAL3D_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# 180VARANAL3DERA5.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/varanal3d) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using 180varanal3dera5 as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `180varanal3dera5.c1`, where `180varanal3dera5` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `sgp` and facility `C1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/sgp/sgp180varanal3dera5C1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"180varanal3dera5\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"sgp\"\n", + "facility = \"C1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/VDISQUANTS/VDISQUANTS_tutorial.ipynb b/VAPs/quicklook/VDISQUANTS/VDISQUANTS_tutorial.ipynb new file mode 100644 index 00000000..f36f368b --- /dev/null +++ b/VAPs/quicklook/VDISQUANTS/VDISQUANTS_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# VDISQUANTS.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/vdisquants) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using vdisquants as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `vdisquants.c1`, where `vdisquants` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `ena` and facility `C1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/ena/enavdisquantsC1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"vdisquants\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"ena\"\n", + "facility = \"C1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/VDISQUANTS/vdisquants.c1.ipynb b/VAPs/quicklook/VDISQUANTS/vdisquants.c1.ipynb new file mode 100644 index 00000000..278606b5 --- /dev/null +++ b/VAPs/quicklook/VDISQUANTS/vdisquants.c1.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# VDISQUANTS.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/vdisquants) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'vdisquants'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2023-12-15', 'facility': 'C1', 'site': 'ena', 'start_date': '2014-10-31'}, {'end_date': '2023-12-12', 'facility': 'M1', 'site': 'epc', 'start_date': '2023-01-15'}, {'end_date': '2019-04-29', 'facility': 'M1', 'site': 'cor', 'start_date': '2018-09-26'}, {'end_date': '2022-09-29', 'facility': 'M1', 'site': 'hou', 'start_date': '2021-08-07'}, {'end_date': '2023-12-16', 'facility': 'C1', 'site': 'sgp', 'start_date': '2016-10-01'}, {'end_date': '2023-06-20', 'facility': 'E13', 'site': 'sgp', 'start_date': '2018-06-20'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'sgp', 'C1' )\n", + "\n", + "date_start = '2023-12-14'\n", + "date_end = '2023-12-16'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['rain_rate', 'reflectivity_factor_sband20c', 'reflectivity_factor_cband20c']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'rain_rate'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/WACRARSCL/WACRARSCL_tutorial.ipynb b/VAPs/quicklook/WACRARSCL/WACRARSCL_tutorial.ipynb new file mode 100644 index 00000000..05ccaaa1 --- /dev/null +++ b/VAPs/quicklook/WACRARSCL/WACRARSCL_tutorial.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# ARSCLWACR1KOLLIAS.C1 Notebook\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/wacrarscl) for more information about this vap." + ] + }, + { + "cell_type": "markdown", + "id": "97097763", + "metadata": {}, + "source": [ + "In this notebook, we demonstrate the workflow to explore ARM vap data (using arsclwacr1kollias as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", + "Here is the main content we will cover." + ] + }, + { + "cell_type": "markdown", + "id": "eddec40f", + "metadata": {}, + "source": [ + "# Table of Content\n", + "## Access the data\n", + "* How to retrieve the data\n", + "* Data path and file name conventions\n", + "* Load data\n", + "## Explore the data\n", + "* NetCDF Data structure\n", + "* Xarray essentials\n", + "* Xarray Variable\n", + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "import random\n", + "\n", + "import glob\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "2372d5be", + "metadata": {}, + "source": [ + "## Access the data" + ] + }, + { + "cell_type": "markdown", + "id": "5b0d9684", + "metadata": {}, + "source": [ + "### How to retrieve the data\n", + "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", + "\n", + "\n", + "### Data path and file name conventions\n", + "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", + "\n", + "For example, this notebook is called `arsclwacr1kollias.c1`, where `arsclwacr1kollias` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", + "\n", + "This datastream also contains site `hfe` and facility `M1`. (Note: individual datastream might have multiple site-facility pairs.)\n", + "In such a case, the data of this data-stream is stored at `/data/archive/hfe/hfearsclwacr1kolliasM1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", + "\n", + "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", + "\n", + "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", + "\n", + "Please see the following examples in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7e9eb85", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify if DATA_DIR path exists\n", + "DATA_DIR = \"/data/archive\"\n", + "os.path.exists(DATA_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "586993fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Speicify datastream_dir following the path conventions and check its existence\n", + "DATASTREAM_NAME = \"arsclwacr1kollias\"\n", + "DATA_LEVEL = \"c1\"\n", + "site = \"hfe\"\n", + "facility = \"M1\"\n", + "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", + "print(datastream_dir)\n", + "print(os.path.exists(datastream_dir))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0742f7c1", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: list 5 (random) files under datastream_dir\n", + "files = os.listdir(datastream_dir)\n", + "files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b98a36", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: get most recent file\n", + "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", + "latest_file = max(list_of_files, key=os.path.getctime)\n", + "latest_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902d514e", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus sort datastream files based on datetime\n", + "files = os.listdir(datastream_dir)\n", + "file_sorted = files.copy()\n", + "file_sorted.sort() \n", + "print(file_sorted[:5])\n", + "\n", + "# to reverse\n", + "file_sorted_reverse = files.copy() \n", + "file_sorted_reverse.sort(reverse=True)\n", + "print(file_sorted_reverse[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5923b2", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: pattern matching\n", + "# filter the 200709** files under datastream_dir\n", + "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a4aa18", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" + ] + }, + { + "cell_type": "markdown", + "id": "cfeb9efc", + "metadata": {}, + "source": [ + "### Load data\n", + "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", + "\n", + "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", + "\n", + "See the following example in action" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a440a329", + "metadata": {}, + "outputs": [], + "source": [ + "# open single file\n", + "full_path = latest_file\n", + "print(full_path)\n", + "ds_single = xr.open_dataset(full_path)\n", + "ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0143a3d", + "metadata": {}, + "outputs": [], + "source": [ + "print(ds_single)\n", + "print(type(ds_single))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c0f8939", + "metadata": {}, + "outputs": [], + "source": [ + "# open multiple files\n", + "n_files = 3\n", + "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", + "print(full_paths)\n", + "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", + " ds_mutiple = xr.open_mfdataset(full_paths)\n", + " # ds_mutiple\n", + " print(type(ds_mutiple))\n", + " print(ds_mutiple)\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "3fa59943", + "metadata": {}, + "source": [ + "## Explore the data" + ] + }, + { + "cell_type": "markdown", + "id": "571f69b9", + "metadata": {}, + "source": [ + "### NetCDF Data structure \n", + "\n", + "\n", + "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", + "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", + "* Dataset\n", + "* Data array\n", + "* Variable\n", + "* Dimenssion\n", + "* Coordinate\n", + "* Data Type\n", + "* Meta Data (Attributes)\n", + "\n", + "We will not go into details about NetCDF basics and here are some references you might find helpful\n", + "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", + "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", + "\n", + "\n", + "\n", + "### Xarray essentials\n", + "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", + "\n", + "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", + "* Dataset: `ds`\n", + "* Data array: `ds.variables`\n", + "* Variable: `ds.ds.variables`\n", + "* Dimenssion: `ds.dims`\n", + "* Coordinate: `ds.coords`\n", + "* Data Type: `type`\n", + "* Meta Data (Attributes)\n", + "\n", + "Also, here are some references if you are new to xarray\n", + "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", + "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", + "\n", + "Try the following commands in action\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3eac323", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds_single" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77ecf85d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset \n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25e7de09", + "metadata": {}, + "outputs": [], + "source": [ + "# Data array, Variable\n", + "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", + "ds.variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c41b67e", + "metadata": {}, + "outputs": [], + "source": [ + "# dimenssions\n", + "ds.dims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156f1dfc", + "metadata": {}, + "outputs": [], + "source": [ + "# coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "277d6064", + "metadata": {}, + "outputs": [], + "source": [ + "# Meta Data (Attributes)\n", + "ds.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d334681f", + "metadata": {}, + "outputs": [], + "source": [ + "# type\n", + "print(type(ds))\n", + "print(type(ds.variables))\n", + "print(type(ds.dims))\n", + "print(type(ds.coords))\n", + "print(type(ds.attrs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643399d6", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: retrieve variable names only\n", + "list(ds.variables)" + ] + }, + { + "cell_type": "markdown", + "id": "1cf36878", + "metadata": {}, + "source": [ + "#### Discussion: variable vs. coordinates vs. dimenssions. \n", + "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", + "\n", + "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", + "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", + "\n", + "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", + "\n", + "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", + "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", + "\n", + "\n", + "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " + ] + }, + { + "cell_type": "markdown", + "id": "d47120f9", + "metadata": {}, + "source": [ + "### Xarray Variable\n", + "\n", + "\n", + "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57e136", + "metadata": {}, + "outputs": [], + "source": [ + "print(type(ds[\"time\"]))\n", + "ds[\"time\"]" + ] + }, + { + "cell_type": "markdown", + "id": "51ec643e", + "metadata": {}, + "source": [ + "#### Variable properties\n", + "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", + "* name: `var.name`\n", + "* data content: `var.data`\n", + "* attributes: `var.attrs`\n", + "* dimenstions: `var.dims`\n", + "* data type: `var.data.dtype`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a632a525", + "metadata": {}, + "outputs": [], + "source": [ + "var_name = \"time\"\n", + "var = ds[var_name]\n", + "\n", + "print(\"var.name: \\n\", var.name, \"\\n\")\n", + "print(\"var.data: \\n\", var.data, \"\\n\")\n", + "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", + "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", + "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b256b07f", + "metadata": {}, + "outputs": [], + "source": [ + "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", + "df_info = pd.DataFrame()\n", + "df_info[\"var_name\"] = list(ds.variables)\n", + "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", + "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", + "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", + "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", + "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", + "df_info" + ] + }, + { + "cell_type": "markdown", + "id": "e6a36b96", + "metadata": {}, + "source": [ + "### Data cleaning/Preprocessing (skipped)\n", + "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", + "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", + "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", + "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", + "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "ae6e1a69", + "metadata": {}, + "source": [ + "### Plotting\n", + "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", + "\n", + "Here are some reference you might find useful:\n", + "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", + "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", + "\n", + "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." + ] + }, + { + "cell_type": "markdown", + "id": "7e417fb2", + "metadata": {}, + "source": [ + "#### 1-dimenssional basic time series plot\n", + "\n", + "For the following plot we would like to find variables such that\n", + "* it has one and only one dimession\n", + "* \"time\" is its coordinate variable\n", + "* it is not a dimenssion itself,\n", + "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "\n", + "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", + "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", + "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", + "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30edac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter = df_info[(df_info.n_dim == 1) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4777a659", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_1d = df_filter.var_name.values[0]\n", + " var_1d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb5ae985", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " ds[var_1d].plot()\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "9e41066e", + "metadata": {}, + "source": [ + "#### 2-dimenssional basic plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09bd4adc", + "metadata": {}, + "outputs": [], + "source": [ + "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (df_info.is_dim==False) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdea1a9", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " var_2d = df_filter_2.var_name.values[0]\n", + " var_2d\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f48879", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: if failed, change to another variable to plot.\n", + "try:\n", + " print(ds[var_2d].dims)\n", + " # ds[var_2d].plot()\n", + "\n", + " # conventionally, use \"time\" as x-axis\n", + " ds[var_2d].plot(x=\"time\")\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e) " + ] + }, + { + "cell_type": "markdown", + "id": "7554b2a6", + "metadata": {}, + "source": [ + "#### qc-plotting (optional)\n", + "\n", + "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c2096f", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " ds_act = act.io.armfiles.read_netcdf(full_path)\n", + " print(type(ds_act))\n", + " ds_act.clean.cleanup()\n", + "\n", + " # or \n", + " # ds.clean.cleanup()\n", + "except Exception as e:\n", + " print(\"ERROR\", e)\n", + " ds_act = ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "844f8505", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter valid variables for ACT qc plotting\n", + "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", + " if x in list(ds.variables) else False)\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_3 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", + " condition\n", + " ]\n", + "df_filter_3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd38cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "try:\n", + " qc_variable = df_filter_3.var_name.values[0]\n", + " print(qc_variable)\n", + "except Exception as e:\n", + " print(e)\n", + " \n", + "try:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "\n", + " plt.show()\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "d76e4d27", + "metadata": {}, + "source": [ + "#### bonus: choose variables to plot from a dropdown menu " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb733804", + "metadata": {}, + "outputs": [], + "source": [ + "# Valid variables filtering\n", + "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", + "df_filter_4 = df_info[(df_info.is_dim==False) &\n", + " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", + " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", + " ]\n", + "df_filter_4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82d5c20d", + "metadata": {}, + "outputs": [], + "source": [ + "# example 1: using xarray plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc9aaa4", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# fig, ax = plt.subplots(figsize=(10, 4))\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "# fig.clear() # Remove old lines from plot and plot new one\n", + "# if len(ds[var].dims)==2:\n", + "# ds[var].plot(x=\"time\", add_colorbar=False)\n", + "# else:\n", + "# ds[var].plot()\n", + "# plt.grid()\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabdd802", + "metadata": {}, + "outputs": [], + "source": [ + "# example 2: using act plot\n", + "\n", + "# Uncomment the following cell to try the interactive plot (ctrl + /)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "962b4186", + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib widget\n", + "# plt.clf()\n", + "\n", + "# available_variables = df_filter_4.var_name.values\n", + "\n", + "\n", + "# @widgets.interact(var=available_variables)\n", + "# def update(var = available_variables[0]):\n", + "\n", + "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", + "# i_display.add_subplots((1,), figsize=(10, 4))\n", + "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", + "\n", + "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", + "# ax.grid()\n", + "# plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/WACRARSCL/arsclwacr1kollias.c1.ipynb b/VAPs/quicklook/WACRARSCL/arsclwacr1kollias.c1.ipynb new file mode 100644 index 00000000..b2a8d160 --- /dev/null +++ b/VAPs/quicklook/WACRARSCL/arsclwacr1kollias.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# ARSCLWACR1KOLLIAS.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/wacrarscl) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'arsclwacr1kollias'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2008-12-15', 'facility': 'M1', 'site': 'hfe', 'start_date': '2008-10-21'}, {'end_date': '2008-01-01', 'facility': 'M1', 'site': 'fkb', 'start_date': '2007-03-29'}, {'end_date': '2018-03-24', 'facility': 'M1', 'site': 'mar', 'start_date': '2017-10-29'}, {'end_date': '2010-12-31', 'facility': 'M1', 'site': 'grw', 'start_date': '2009-06-06'}, {'end_date': '2006-12-29', 'facility': 'M1', 'site': 'nim', 'start_date': '2006-03-16'}, {'end_date': '2013-06-14', 'facility': 'M1', 'site': 'pvc', 'start_date': '2012-10-12'}, {'end_date': '2014-09-13', 'facility': 'M1', 'site': 'tmp', 'start_date': '2014-02-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'hfe', 'M1' )\n", + "\n", + "date_start = '2008-12-13'\n", + "date_end = '2008-12-15'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['reflectivity', 'reflectivity_best_estimate', 'mean_doppler_velocity']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'radar_first_top'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'reflectivity'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/quicklook/WACRARSCL/arsclwacrbnd1kollias.c1.ipynb b/VAPs/quicklook/WACRARSCL/arsclwacrbnd1kollias.c1.ipynb new file mode 100644 index 00000000..fde3c46a --- /dev/null +++ b/VAPs/quicklook/WACRARSCL/arsclwacrbnd1kollias.c1.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "70840257-70e4-45e2-b491-14bff5a257a3", + "metadata": {}, + "source": [ + "# ARSCLWACRBND1KOLLIAS.C1 Plots\n", + "\n", + "[Click here](https://www.arm.gov/capabilities/vaps/wacrarscl) for more information about this vap." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460fd89f-e034-452c-b837-f65c5958264f", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "import ipywidgets as widgets\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "import act\n", + "import xarray as xr\n", + "\n", + "# Data archive directory\n", + "DATA_DIR = r'/data/archive/'\n", + "\n", + "# Datastream info\n", + "DATASTREAM_NAME = 'arsclwacrbnd1kollias'\n", + "DATA_LEVEL = 'c1'\n", + "LOCATIONS = [{'end_date': '2008-01-02', 'facility': 'M1', 'site': 'fkb', 'start_date': '2007-03-29'}, {'end_date': '2018-03-24', 'facility': 'M1', 'site': 'mar', 'start_date': '2017-10-29'}, {'end_date': '2008-12-15', 'facility': 'M1', 'site': 'hfe', 'start_date': '2008-10-14'}, {'end_date': '2010-12-31', 'facility': 'M1', 'site': 'grw', 'start_date': '2009-06-06'}, {'end_date': '2006-12-29', 'facility': 'M1', 'site': 'nim', 'start_date': '2006-03-16'}, {'end_date': '2013-06-14', 'facility': 'M1', 'site': 'pvc', 'start_date': '2012-10-12'}, {'end_date': '2014-09-13', 'facility': 'M1', 'site': 'tmp', 'start_date': '2014-02-01'}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9faaf875", + "metadata": {}, + "source": [ + "## Define site, facility, and date range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6764f5", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The following locations and date ranges are available for this VAP:\")\n", + "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d132223", + "metadata": {}, + "source": [ + "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e563983a", + "metadata": {}, + "outputs": [], + "source": [ + "site_facility = ( 'fkb', 'M1' )\n", + "\n", + "date_start = '2007-12-31'\n", + "date_end = '2008-01-02'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", + "metadata": {}, + "source": [ + "## Load data files\n", + "Load data files from /data/archive/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile list of files\n", + "site, facility = site_facility\n", + "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", + "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", + "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", + "dir_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6be8f3dc", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date, timedelta\n", + "import pandas as pd\n", + "\n", + "def get_ARM_formated_dates(start_date, end_date):\n", + " \"\"\"\n", + " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", + " EXAMPLE:\n", + " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", + " >> [\"20180219\", \"20180220\", \"20180221\"] \n", + " \"\"\"\n", + " \n", + " _start_date = pd.to_datetime(start_date)\n", + " _end_date = pd.to_datetime(end_date)\n", + " \n", + " delta = _end_date - _start_date # returns timedelta \n", + " dates = []\n", + "\n", + " for i in range(delta.days + 1):\n", + " day = _start_date + timedelta(days=i)\n", + " day_formated = day.strftime(format=\"%Y%m%d\")\n", + " dates.append(day_formated)\n", + " return dates\n", + "\n", + "\n", + "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51feea2e", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter a list of files based on date pattern\n", + "import glob\n", + "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", + "files_filter = []\n", + "for date in dates:\n", + " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", + " files_filter\n", + "files_filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e5d0d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load files as a single dataset\n", + "files_list = files_filter \n", + "ds = act.io.armfiles.read_netcdf(files_list)\n", + "ds.clean.cleanup()\n", + "print(f'{len(files_list)} files loaded')\n", + "ds\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", + "metadata": {}, + "source": [ + "## Plot time series data\n", + "#### Define the list of variables to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d09b789e-84f1-4605-846b-a72c110c8048", + "metadata": {}, + "outputs": [], + "source": [ + "variables_to_plot = ['cloud_base_best_estimate', 'cloud_layer_base_height', 'cloud_layer_top_height']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", + "metadata": {}, + "outputs": [], + "source": [ + "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", + "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", + "\n", + "for i,v in enumerate(variables_to_plot):\n", + " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", + " ts_ax.grid()\n", + "\n", + "plt.show()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "194399aa-1907-452b-8ba9-bc31d7f60291", + "metadata": {}, + "source": [ + "## Quality check plots\n", + "#### Define variable for QC plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", + "metadata": {}, + "outputs": [], + "source": [ + "qc_variable = 'radar_first_top'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", + "metadata": {}, + "outputs": [], + "source": [ + "# QC Plot\n", + "if ('qc_'+qc_variable) in ds.variables:\n", + "\n", + " # Plot\n", + " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", + " qc_display.add_subplots((2,), figsize = (9.5,10))\n", + " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", + " qc_ax.grid()\n", + " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", + "\n", + " plt.show()\n", + "else:\n", + " print(f'QC not available for the selected field: {qc_variable}')\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", + "metadata": {}, + "source": [ + "## Field selection dropdown menu\n", + "Select variable to be plotted from a dropdown menu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", + "metadata": {}, + "outputs": [], + "source": [ + "plt.ioff()\n", + "\n", + "# populate dropdown menu with available variables \n", + "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", + "d_variable = 'cloud_base_best_estimate'\n", + "dropdown = widgets.Dropdown(\n", + " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", + " value= d_variable,\n", + " description='Field:',\n", + " disabled=False,\n", + ")\n", + "dropdown.layout.margin = '0px 30% 0px 20%'\n", + "dropdown.layout.width = '50%'\n", + "\n", + "# set up display\n", + "i_display = act.plotting.TimeSeriesDisplay(ds)\n", + "i_display.add_subplots((1,), figsize = (9.5,5))\n", + "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", + "i_ax.grid()\n", + "i_fig = i_display.fig\n", + "\n", + "# update plot callback function\n", + "def update_plot(change):\n", + " i_ax.cla()\n", + " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", + " i_ax_new.grid()\n", + " i_fig.canvas.draw()\n", + " i_fig.canvas.flush_events()\n", + "\n", + "dropdown.observe(update_plot, names='value')\n", + "\n", + "widgets.AppLayout(\n", + " header=dropdown,\n", + " center=i_fig.canvas,\n", + " pane_heights=[1, 6,1]\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VAPs/vap_notebook_list.md b/VAPs/vap_notebook_list.md new file mode 100644 index 00000000..a94c2ea7 --- /dev/null +++ b/VAPs/vap_notebook_list.md @@ -0,0 +1,1010 @@ +# ARM VAPs Jupyter Notebooks +List of available ARM VAPs Notebooks. + +You must have access to the [ARM JupyterHub Server](https://jupyterhub.arm.gov/) to open these notebooks. + + + +
+ 2DS-AIR + +* [2DS-AIR tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/2DS-AIR/2DS-AIR_tutorial.ipynb&branch=main) + +* [aaf2dsh.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/2DS-AIR/aaf2dsh.c1.ipynb&branch=main) + +* [aaf2dsv.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/2DS-AIR/aaf2dsv.c1.ipynb&branch=main) + + +
+ + +
+ ACSMCDCE + +* [ACSMCDCE tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/ACSMCDCE/ACSMCDCE_tutorial.ipynb&branch=main) + +* [acsmcdce.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/ACSMCDCE/acsmcdce.c1.ipynb&branch=main) + +* [acsmcdce.c2](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/ACSMCDCE/acsmcdce.c2.ipynb&branch=main) + +* [acsmtofcdce.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/ACSMCDCE/acsmtofcdce.c1.ipynb&branch=main) + + +
+ + +
+ AERINF + +* [AERINF tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/AERINF/AERINF_tutorial.ipynb&branch=main) + +* [aerich1nf1turn.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/AERINF/aerich1nf1turn.c1.ipynb&branch=main) + +* [aerich2nf1turn.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/AERINF/aerich2nf1turn.c1.ipynb&branch=main) + + +
+ + +
+ AERIOE + +* [AERIOE tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/AERIOE/AERIOE_tutorial.ipynb&branch=main) + +* [aerioe1turn.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/AERIOE/aerioe1turn.c1.ipynb&branch=main) + + +
+ + +
+ AERIPROF + +* [AERIPROF tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/AERIPROF/AERIPROF_tutorial.ipynb&branch=main) + +* [aeri01prof3feltz.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/AERIPROF/aeri01prof3feltz.c1.ipynb&branch=main) + +* [aeriprof3feltz.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/AERIPROF/aeriprof3feltz.c1.ipynb&branch=main) + +* [qmeaeriprof.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/AERIPROF/qmeaeriprof.c1.ipynb&branch=main) + + +
+ + +
+ AEROSOLBE + +* [AEROSOLBE tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/AEROSOLBE/AEROSOLBE_tutorial.ipynb&branch=main) + +* [aerosolbe1turn.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/AEROSOLBE/aerosolbe1turn.c1.ipynb&branch=main) + + +
+ + +
+ AIP + +* [AIP tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/AIP/AIP_tutorial.ipynb&branch=main) + +* [aip1ogren.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/AIP/aip1ogren.c1.ipynb&branch=main) + +* [aipavg1ogren.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/AIP/aipavg1ogren.c1.ipynb&branch=main) + +* [aipfitrh1ogren.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/AIP/aipfitrh1ogren.c1.ipynb&branch=main) + + +
+ + +
+ AOD + +* [AOD tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/AOD/AOD_tutorial.ipynb&branch=main) + +* [sasheniraod.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/AOD/sasheniraod.c1.ipynb&branch=main) + +* [sashevisaod.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/AOD/sashevisaod.c1.ipynb&branch=main) + + +
+ + +
+ AOD-MFRSR + +* [AOD-MFRSR tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/AOD-MFRSR/AOD-MFRSR_tutorial.ipynb&branch=main) + +* [mfrsr7nchaod1mich.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/AOD-MFRSR/mfrsr7nchaod1mich.c1.ipynb&branch=main) + +* [mfrsr7nchcal.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/AOD-MFRSR/mfrsr7nchcal.c1.ipynb&branch=main) + +* [mfrsraod1mich.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/AOD-MFRSR/mfrsraod1mich.c1.ipynb&branch=main) + +* [mfrsrcal.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/AOD-MFRSR/mfrsrcal.c1.ipynb&branch=main) + + +
+ + +
+ AOD-NIMFR + +* [AOD-NIMFR tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/AOD-NIMFR/AOD-NIMFR_tutorial.ipynb&branch=main) + +* [nimfraod1mich.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/AOD-NIMFR/nimfraod1mich.c1.ipynb&branch=main) + + +
+ + +
+ AOP + +* [AOP tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/AOP/AOP_tutorial.ipynb&branch=main) + +* [aopclap1flynn1m.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/AOP/aopclap1flynn1m.c1.ipynb&branch=main) + +* [aoppsap1flynn1h.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/AOP/aoppsap1flynn1h.c1.ipynb&branch=main) + +* [aoppsap1flynn1m.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/AOP/aoppsap1flynn1m.c1.ipynb&branch=main) + + +
+ + +
+ AOSCCNAVG + +* [AOSCCNAVG tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/AOSCCNAVG/AOSCCNAVG_tutorial.ipynb&branch=main) + +* [aosccnavg.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/AOSCCNAVG/aosccnavg.c1.ipynb&branch=main) + +* [aosccnavg.c2](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/AOSCCNAVG/aosccnavg.c2.ipynb&branch=main) + + +
+ + +
+ AOSSP2BC + +* [AOSSP2BC tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/AOSSP2BC/AOSSP2BC_tutorial.ipynb&branch=main) + +* [aossp2rbc1m.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/AOSSP2BC/aossp2rbc1m.c1.ipynb&branch=main) + + +
+ + +
+ ARMBE + +* [ARMBE tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/ARMBE/ARMBE_tutorial.ipynb&branch=main) + +* [armbeatm.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/ARMBE/armbeatm.c1.ipynb&branch=main) + +* [armbecldrad.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/ARMBE/armbecldrad.c1.ipynb&branch=main) + + +
+ + +
+ ARSCL + +* [ARSCL tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/ARSCL/ARSCL_tutorial.ipynb&branch=main) + +* [arscl1cloth.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/ARSCL/arscl1cloth.c1.ipynb&branch=main) + +* [arsclbnd1cloth.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/ARSCL/arsclbnd1cloth.c1.ipynb&branch=main) + + +
+ + +
+ ASDBE-AIR + +* [ASDBE-AIR tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/ASDBE-AIR/ASDBE-AIR_tutorial.ipynb&branch=main) + +* [aafmergedaerosolsd.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/ASDBE-AIR/aafmergedaerosolsd.c1.ipynb&branch=main) + + +
+ + +
+ BAEBBR + +* [BAEBBR tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/BAEBBR/BAEBBR_tutorial.ipynb&branch=main) + +* [30baebbr.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/BAEBBR/30baebbr.c1.ipynb&branch=main) + + +
+ + +
+ BBHRP + +* [BBHRP tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/BBHRP/BBHRP_tutorial.ipynb&branch=main) + +* [1bbhrpripbe1mcfarlane.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/BBHRP/1bbhrpripbe1mcfarlane.c1.ipynb&branch=main) + +* [30bbhrpripbe1mcfarlane.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/BBHRP/30bbhrpripbe1mcfarlane.c1.ipynb&branch=main) + +* [bbhrpavg1mlawer.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/BBHRP/bbhrpavg1mlawer.c1.ipynb&branch=main) + + +
+ + +
+ BEFLUX + +* [BEFLUX tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/BEFLUX/BEFLUX_tutorial.ipynb&branch=main) + +* [beflux1long.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/BEFLUX/beflux1long.c1.ipynb&branch=main) + +* [qcflux1long.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/BEFLUX/qcflux1long.c1.ipynb&branch=main) + + +
+ + +
+ CCNKAPPA + +* [CCNKAPPA tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/CCNKAPPA/CCNKAPPA_tutorial.ipynb&branch=main) + +* [aosccnsmpskappa.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/CCNKAPPA/aosccnsmpskappa.c1.ipynb&branch=main) + + +
+ + +
+ CCNPROF + +* [CCNPROF tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/CCNPROF/CCNPROF_tutorial.ipynb&branch=main) + +* [rlccnprof1ghan.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/CCNPROF/rlccnprof1ghan.c1.ipynb&branch=main) + + +
+ + +
+ CLAP + +* [CLAP tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/CLAP/CLAP_tutorial.ipynb&branch=main) + +* [aosclap3w.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/CLAP/aosclap3w.c1.ipynb&branch=main) + + +
+ + +
+ CLDTYPE + +* [CLDTYPE tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/CLDTYPE/CLDTYPE_tutorial.ipynb&branch=main) + +* [cldtype.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/CLDTYPE/cldtype.c1.ipynb&branch=main) + + +
+ + +
+ CMAC2 + +* [CMAC2 tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/CMAC2/CMAC2_tutorial.ipynb&branch=main) + +* [cmac2.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/CMAC2/cmac2.c1.ipynb&branch=main) + + +
+ + +
+ CO-AIR + +* [CO-AIR tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/CO-AIR/CO-AIR_tutorial.ipynb&branch=main) + +* [aafco.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/CO-AIR/aafco.c1.ipynb&branch=main) + + +
+ + +
+ COGS + +* [COGS tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/COGS/COGS_tutorial.ipynb&branch=main) + +* [cogs.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/COGS/cogs.c1.ipynb&branch=main) + + +
+ + +
+ DIFFCOR + +* [DIFFCOR tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/DIFFCOR/DIFFCOR_tutorial.ipynb&branch=main) + +* [brs1dutt.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/DIFFCOR/brs1dutt.c1.ipynb&branch=main) + +* [siros1dutt.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/DIFFCOR/siros1dutt.c1.ipynb&branch=main) + +* [sirs1dutt.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/DIFFCOR/sirs1dutt.c1.ipynb&branch=main) + + +
+ + +
+ DLPROF-WIND + +* [DLPROF-WIND tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/DLPROF-WIND/DLPROF-WIND_tutorial.ipynb&branch=main) + +* [dlprofwind4news.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/DLPROF-WIND/dlprofwind4news.c1.ipynb&branch=main) + + +
+ + +
+ DLPROF-WSTATS + +* [DLPROF-WSTATS tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/DLPROF-WSTATS/DLPROF-WSTATS_tutorial.ipynb&branch=main) + +* [dlprofwstats4news.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/DLPROF-WSTATS/dlprofwstats4news.c1.ipynb&branch=main) + + +
+ + +
+ FCDP-AIR + +* [FCDP-AIR tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/FCDP-AIR/FCDP-AIR_tutorial.ipynb&branch=main) + +* [aaffcdp.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/FCDP-AIR/aaffcdp.c1.ipynb&branch=main) + + +
+ + +
+ GVR + +* [GVR tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/GVR/GVR_tutorial.ipynb&branch=main) + +* [gvr.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/GVR/gvr.c1.ipynb&branch=main) + + +
+ + +
+ HVPS-AIR + +* [HVPS-AIR tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/HVPS-AIR/HVPS-AIR_tutorial.ipynb&branch=main) + +* [aafhvps.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/HVPS-AIR/aafhvps.c1.ipynb&branch=main) + + +
+ + +
+ INLETCVI-AIR + +* [INLETCVI-AIR tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/INLETCVI-AIR/INLETCVI-AIR_tutorial.ipynb&branch=main) + +* [aafinletcvi.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/INLETCVI-AIR/aafinletcvi.c1.ipynb&branch=main) + + +
+ + +
+ INTERPSONDE + +* [INTERPSONDE tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/INTERPSONDE/INTERPSONDE_tutorial.ipynb&branch=main) + +* [interpolatedsonde.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/INTERPSONDE/interpolatedsonde.c1.ipynb&branch=main) + + +
+ + +
+ KAZRARSCL + +* [KAZRARSCL tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/KAZRARSCL/KAZRARSCL_tutorial.ipynb&branch=main) + +* [arsclkazr1kollias.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/KAZRARSCL/arsclkazr1kollias.c1.ipynb&branch=main) + +* [arsclkazrbnd1kollias.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/KAZRARSCL/arsclkazrbnd1kollias.c1.ipynb&branch=main) + + +
+ + +
+ KAZRARSCLCLOUDSAT + +* [KAZRARSCLCLOUDSAT tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/KAZRARSCLCLOUDSAT/KAZRARSCLCLOUDSAT_tutorial.ipynb&branch=main) + +* [arsclkazrcloudsat.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/KAZRARSCLCLOUDSAT/arsclkazrcloudsat.c1.ipynb&branch=main) + + +
+ + +
+ KAZRCFRCOR + +* [KAZRCFRCOR tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/KAZRCFRCOR/KAZRCFRCOR_tutorial.ipynb&branch=main) + +* [kazrcfrcorge.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/KAZRCFRCOR/kazrcfrcorge.c1.ipynb&branch=main) + +* [kazrcfrcormd.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/KAZRCFRCOR/kazrcfrcormd.c1.ipynb&branch=main) + + +
+ + +
+ KAZRCOR + +* [KAZRCOR tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/KAZRCOR/KAZRCOR_tutorial.ipynb&branch=main) + +* [kazrcorge.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/KAZRCOR/kazrcorge.c1.ipynb&branch=main) + +* [kazrcorhi.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/KAZRCOR/kazrcorhi.c1.ipynb&branch=main) + +* [kazrcormd.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/KAZRCOR/kazrcormd.c1.ipynb&branch=main) + + +
+ + +
+ LCLHEIGHT + +* [LCLHEIGHT tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/LCLHEIGHT/LCLHEIGHT_tutorial.ipynb&branch=main) + +* [lcl.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/LCLHEIGHT/lcl.c1.ipynb&branch=main) + + +
+ + +
+ LDQUANTS + +* [LDQUANTS tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/LDQUANTS/LDQUANTS_tutorial.ipynb&branch=main) + +* [ldquants.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/LDQUANTS/ldquants.c1.ipynb&branch=main) + + +
+ + +
+ LSSONDE + +* [LSSONDE tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/LSSONDE/LSSONDE_tutorial.ipynb&branch=main) + +* [lssonde.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/LSSONDE/lssonde.c1.ipynb&branch=main) + + +
+ + +
+ MASCPARTICLES + +* [MASCPARTICLES tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/MASCPARTICLES/MASCPARTICLES_tutorial.ipynb&branch=main) + +* [mascparticles.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/MASCPARTICLES/mascparticles.c1.ipynb&branch=main) + +* [mascparticlesavg.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/MASCPARTICLES/mascparticlesavg.c1.ipynb&branch=main) + + +
+ + +
+ MERGED-COMMON + +* [MERGED-COMMON tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/MERGED-COMMON/MERGED-COMMON_tutorial.ipynb&branch=main) + +* [aafmergedcldsd.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/MERGED-COMMON/aafmergedcldsd.c1.ipynb&branch=main) + + +
+ + +
+ MERGEDSMPSAPS + +* [MERGEDSMPSAPS tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/MERGEDSMPSAPS/MERGEDSMPSAPS_tutorial.ipynb&branch=main) + +* [mergedsmpsaps.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/MERGEDSMPSAPS/mergedsmpsaps.c1.ipynb&branch=main) + + +
+ + +
+ MERGESONDE + +* [MERGESONDE tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/MERGESONDE/MERGESONDE_tutorial.ipynb&branch=main) + +* [mergesonde1mace.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/MERGESONDE/mergesonde1mace.c1.ipynb&branch=main) + +* [mergesonde2mace.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/MERGESONDE/mergesonde2mace.c1.ipynb&branch=main) + + +
+ + +
+ MFRSRCLDOD + +* [MFRSRCLDOD tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/MFRSRCLDOD/MFRSRCLDOD_tutorial.ipynb&branch=main) + +* [mfrsrcldod1min.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/MFRSRCLDOD/mfrsrcldod1min.c1.ipynb&branch=main) + + +
+ + +
+ MICROBASE + +* [MICROBASE tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/MICROBASE/MICROBASE_tutorial.ipynb&branch=main) + +* [microbasepi.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/MICROBASE/microbasepi.c1.ipynb&branch=main) + +* [microbasepi2.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/MICROBASE/microbasepi2.c1.ipynb&branch=main) + +* [microbasepiavg.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/MICROBASE/microbasepiavg.c1.ipynb&branch=main) + + +
+ + +
+ MPLAVG + +* [MPLAVG tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/MPLAVG/MPLAVG_tutorial.ipynb&branch=main) + +* [mplpolavg.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/MPLAVG/mplpolavg.c1.ipynb&branch=main) + + +
+ + +
+ MPLCMASK + +* [MPLCMASK tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/MPLCMASK/MPLCMASK_tutorial.ipynb&branch=main) + +* [30smplcmask1zwang.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/MPLCMASK/30smplcmask1zwang.c1.ipynb&branch=main) + + +
+ + +
+ MPLCMASKML + +* [MPLCMASKML tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/MPLCMASKML/MPLCMASKML_tutorial.ipynb&branch=main) + +* [mplcmaskml.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/MPLCMASKML/mplcmaskml.c1.ipynb&branch=main) + + +
+ + +
+ MPLNOR + +* [MPLNOR tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/MPLNOR/MPLNOR_tutorial.ipynb&branch=main) + +* [mplnor1camp.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/MPLNOR/mplnor1camp.c1.ipynb&branch=main) + + +
+ + +
+ MWRRET + +* [MWRRET tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/MWRRET/MWRRET_tutorial.ipynb&branch=main) + +* [mwrret1liljclou.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/MWRRET/mwrret1liljclou.c1.ipynb&branch=main) + +* [mwrret1liljclou.c2](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/MWRRET/mwrret1liljclou.c2.ipynb&branch=main) + + +
+ + +
+ MWRRETV2 + +* [MWRRETV2 tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/MWRRETV2/MWRRETV2_tutorial.ipynb&branch=main) + +* [mwrret2turn.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/MWRRETV2/mwrret2turn.c1.ipynb&branch=main) + + +
+ + +
+ NAVMET-AIR + +* [NAVMET-AIR tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/NAVMET-AIR/NAVMET-AIR_tutorial.ipynb&branch=main) + +* [aafnaviwg.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/NAVMET-AIR/aafnaviwg.c1.ipynb&branch=main) + + +
+ + +
+ NDROP + +* [NDROP tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/NDROP/NDROP_tutorial.ipynb&branch=main) + +* [ndropmfrsr.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/NDROP/ndropmfrsr.c1.ipynb&branch=main) + + +
+ + +
+ NEPHELOMETER + +* [NEPHELOMETER tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/NEPHELOMETER/NEPHELOMETER_tutorial.ipynb&branch=main) + +* [aosnephdry.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/NEPHELOMETER/aosnephdry.c1.ipynb&branch=main) + +* [aosnephwet.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/NEPHELOMETER/aosnephwet.c1.ipynb&branch=main) + + +
+ + +
+ OKMSOIL + +* [OKMSOIL tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/OKMSOIL/OKMSOIL_tutorial.ipynb&branch=main) + +* [okmsoil.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/OKMSOIL/okmsoil.c1.ipynb&branch=main) + + +
+ + +
+ OZONE-AIR + +* [OZONE-AIR tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/OZONE-AIR/OZONE-AIR_tutorial.ipynb&branch=main) + +* [aafo3.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/OZONE-AIR/aafo3.c1.ipynb&branch=main) + + +
+ + +
+ PBLHT + +* [PBLHT tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/PBLHT/PBLHT_tutorial.ipynb&branch=main) + +* [pblhtsonde1mcfarl.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/PBLHT/pblhtsonde1mcfarl.c1.ipynb&branch=main) + +* [pblhtsondeyr1mcfarl.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/PBLHT/pblhtsondeyr1mcfarl.c1.ipynb&branch=main) + + +
+ + +
+ PCCP + +* [PCCP tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/PCCP/PCCP_tutorial.ipynb&branch=main) + +* [pccp.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/PCCP/pccp.c1.ipynb&branch=main) + + +
+ + +
+ PSAP + +* [PSAP tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/PSAP/PSAP_tutorial.ipynb&branch=main) + +* [aospsap3w.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/PSAP/aospsap3w.c1.ipynb&branch=main) + + +
+ + +
+ QCRAD + +* [QCRAD tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/QCRAD/QCRAD_tutorial.ipynb&branch=main) + +* [qcrad1long.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/QCRAD/qcrad1long.c1.ipynb&branch=main) + +* [qcrad1long.c2](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/QCRAD/qcrad1long.c2.ipynb&branch=main) + +* [qcradbeflux1long.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/QCRAD/qcradbeflux1long.c1.ipynb&branch=main) + +* [qcradbeflux1long.c2](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/QCRAD/qcradbeflux1long.c2.ipynb&branch=main) + +* [qcradbrs1long.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/QCRAD/qcradbrs1long.c1.ipynb&branch=main) + +* [qcradbrs1long.c2](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/QCRAD/qcradbrs1long.c2.ipynb&branch=main) + + +
+ + +
+ RADFLUXANAL + +* [RADFLUXANAL tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/RADFLUXANAL/RADFLUXANAL_tutorial.ipynb&branch=main) + +* [radflux1long.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/RADFLUXANAL/radflux1long.c1.ipynb&branch=main) + +* [radflux1long.c2](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/RADFLUXANAL/radflux1long.c2.ipynb&branch=main) + +* [radfluxbrs1long.c2](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/RADFLUXANAL/radfluxbrs1long.c2.ipynb&branch=main) + + +
+ + +
+ RIPBE + +* [RIPBE tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/RIPBE/RIPBE_tutorial.ipynb&branch=main) + +* [30ripbe1mcfarlane.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/RIPBE/30ripbe1mcfarlane.c1.ipynb&branch=main) + +* [ripbe1mcfarlane.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/RIPBE/ripbe1mcfarlane.c1.ipynb&branch=main) + + +
+ + +
+ RLPROF + +* [RLPROF tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/RLPROF/RLPROF_tutorial.ipynb&branch=main) + +* [10rlprofbe1news.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/RLPROF/10rlprofbe1news.c1.ipynb&branch=main) + + +
+ + +
+ SACRADV3D3C + +* [SACRADV3D3C tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/SACRADV3D3C/SACRADV3D3C_tutorial.ipynb&branch=main) + +* [kasacradv3d3c.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/SACRADV3D3C/kasacradv3d3c.c1.ipynb&branch=main) + + +
+ + +
+ SACRADVVAD + +* [SACRADVVAD tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/SACRADVVAD/SACRADVVAD_tutorial.ipynb&branch=main) + +* [kasacradvvad.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/SACRADVVAD/kasacradvvad.c1.ipynb&branch=main) + + +
+ + +
+ SFCCLDGRID + +* [SFCCLDGRID tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/SFCCLDGRID/SFCCLDGRID_tutorial.ipynb&branch=main) + +* [15swfcldgrid1long.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/SFCCLDGRID/15swfcldgrid1long.c1.ipynb&branch=main) + +* [sfccldgrid2longcaracena.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/SFCCLDGRID/sfccldgrid2longcaracena.c1.ipynb&branch=main) + +* [sfccldgrid2longstation.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/SFCCLDGRID/sfccldgrid2longstation.c1.ipynb&branch=main) + + +
+ + +
+ SHALLOWCUMULUS + +* [SHALLOWCUMULUS tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/SHALLOWCUMULUS/SHALLOWCUMULUS_tutorial.ipynb&branch=main) + +* [shallowcumulus.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/SHALLOWCUMULUS/shallowcumulus.c1.ipynb&branch=main) + +* [shcusummary.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/SHALLOWCUMULUS/shcusummary.c1.ipynb&branch=main) + + +
+ + +
+ SO2-AIR + +* [SO2-AIR tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/SO2-AIR/SO2-AIR_tutorial.ipynb&branch=main) + +* [aafso2.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/SO2-AIR/aafso2.c1.ipynb&branch=main) + + +
+ + +
+ SONDEADJUST + +* [SONDEADJUST tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/SONDEADJUST/SONDEADJUST_tutorial.ipynb&branch=main) + +* [sondeadjust.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/SONDEADJUST/sondeadjust.c1.ipynb&branch=main) + + +
+ + +
+ SONDEPARAM + +* [SONDEPARAM tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/SONDEPARAM/SONDEPARAM_tutorial.ipynb&branch=main) + +* [sondeparam.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/SONDEPARAM/sondeparam.c1.ipynb&branch=main) + + +
+ + +
+ SP2-AIR + +* [SP2-AIR tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/SP2-AIR/SP2-AIR_tutorial.ipynb&branch=main) + +* [aafsp2rbc10s.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/SP2-AIR/aafsp2rbc10s.c1.ipynb&branch=main) + + +
+ + +
+ SPHOTCOD + +* [SPHOTCOD tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/SPHOTCOD/SPHOTCOD_tutorial.ipynb&branch=main) + +* [sphotcod2chiu.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/SPHOTCOD/sphotcod2chiu.c1.ipynb&branch=main) + + +
+ + +
+ SURFSPECALB + +* [SURFSPECALB tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/SURFSPECALB/SURFSPECALB_tutorial.ipynb&branch=main) + +* [surfspecalb1mlawer.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/SURFSPECALB/surfspecalb1mlawer.c1.ipynb&branch=main) + +* [surfspecalb7nch1mlawer.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/SURFSPECALB/surfspecalb7nch1mlawer.c1.ipynb&branch=main) + + +
+ + +
+ TBSMERGED + +* [TBSMERGED tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/TBSMERGED/TBSMERGED_tutorial.ipynb&branch=main) + +* [tbsmerged.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/TBSMERGED/tbsmerged.c1.ipynb&branch=main) + +* [tbsmergedincloud.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/TBSMERGED/tbsmergedincloud.c1.ipynb&branch=main) + + +
+ + +
+ TDMA + +* [TDMA tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/TDMA/TDMA_tutorial.ipynb&branch=main) + +* [tdmaapssize.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/TDMA/tdmaapssize.c1.ipynb&branch=main) + + +
+ + +
+ TWRMR + +* [TWRMR tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/TWRMR/TWRMR_tutorial.ipynb&branch=main) + +* [1twrmr.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/TWRMR/1twrmr.c1.ipynb&branch=main) + +* [30twrmr.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/TWRMR/30twrmr.c1.ipynb&branch=main) + + +
+ + +
+ VARANAL + +* [VARANAL tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/VARANAL/VARANAL_tutorial.ipynb&branch=main) + +* [180varanaecmwf.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/VARANAL/180varanaecmwf.c1.ipynb&branch=main) + +* [180varanamerra001.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/VARANAL/180varanamerra001.c1.ipynb&branch=main) + + +
+ + +
+ VARANAL3D + +* [VARANAL3D tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/VARANAL3D/VARANAL3D_tutorial.ipynb&branch=main) + +* [180varanal3dera5.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/VARANAL3D/180varanal3dera5.c1.ipynb&branch=main) + +* [180varanal3dncep.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/VARANAL3D/180varanal3dncep.c1.ipynb&branch=main) + + +
+ + +
+ VDISQUANTS + +* [VDISQUANTS tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/VDISQUANTS/VDISQUANTS_tutorial.ipynb&branch=main) + +* [vdisquants.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/VDISQUANTS/vdisquants.c1.ipynb&branch=main) + + +
+ + +
+ WACRARSCL + +* [WACRARSCL tutorial](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/WACRARSCL/WACRARSCL_tutorial.ipynb&branch=main) + +* [arsclwacr1kollias.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/WACRARSCL/arsclwacr1kollias.c1.ipynb&branch=main) + +* [arsclwacrbnd1kollias.c1](https://jupyterhub.arm.gov/hub/user-redirect/git-pull?repo=https%3A//github.com/ARM-Development/ARM-Notebooks&urlpath=lab/tree/ARM-Notebooks/../user-data-home/ARM-Notebooks/VAPs/quicklook/WACRARSCL/arsclwacrbnd1kollias.c1.ipynb&branch=main) + + +
+ + + diff --git a/_config.yml b/_config.yml index a6978ab6..92893b03 100644 --- a/_config.yml +++ b/_config.yml @@ -13,3 +13,4 @@ repository: launch_buttons: jupyterhub_url: "https://jupyterhub.arm.gov" notebook_interface: "jupyterlab" +exclude_patterns: ["VAPs/quicklook/*"] # exclude ipynb files to build \ No newline at end of file diff --git a/_toc.yml b/_toc.yml index a54de287..4d978202 100644 --- a/_toc.yml +++ b/_toc.yml @@ -14,6 +14,7 @@ parts: - file: VAPs/README.md sections: - file: VAPs/squire/intro-to-squire.ipynb + - file: VAPs/vap_notebook_list.md - caption: ARM/ASR PI Meeting 2023 chapters: - file: Tutorials/arm-asr-pi-meeting-2023/README.md From 437bbf6ef6e135117545d507ee9da69833b0b339 Mon Sep 17 00:00:00 2001 From: mgrover1 Date: Fri, 10 May 2024 10:53:04 -0500 Subject: [PATCH 2/2] DEL: Remove the checkpoints --- .gitignore | 5 +- .../aaf2dsh.c1-checkpoint.ipynb | 1271 --- .../aaf2dsv.c1-checkpoint.ipynb | 1798 ---- .../aoppsap1flynn1m.c1-checkpoint.ipynb | 8265 ----------------- .../arscl1cloth.c1-checkpoint.ipynb | 2631 ------ .../arsclbnd1cloth.c1-checkpoint.ipynb | 1937 ---- .../bbhrpavg1mlawer.c1-checkpoint.ipynb | 3768 -------- .../rlccnprof1ghan.c1-checkpoint.ipynb | 4109 -------- .../cmac2.c1-checkpoint.ipynb | 3537 ------- .../kazrcorge.c1-checkpoint.ipynb | 445 - .../kazrcorhi.c1-checkpoint.ipynb | 1856 ---- .../kazrcormd.c1-checkpoint.ipynb | 2667 ------ .../mfrsrcldod1min.c1-checkpoint.ipynb | 799 -- .../microbasepi2.c1-checkpoint.ipynb | 468 - .../microbasepiavg.c1-checkpoint.ipynb | 1757 ---- .../mplnor1camp.c1-checkpoint.ipynb | 1732 ---- .../okmsoil.c1-checkpoint.ipynb | 2048 ---- .../pblhtsonde1mcfarl.c1-checkpoint.ipynb | 679 -- .../aospsap3w.c1-checkpoint.ipynb | 1841 ---- .../radflux1long.c1-checkpoint.ipynb | 3763 -------- .../kasacradv3d3c.c1-checkpoint.ipynb | 2574 ----- .../15swfcldgrid1long.c1-checkpoint.ipynb | 2384 ----- ...fccldgrid2longcaracena.c1-checkpoint.ipynb | 5150 ---------- .../SONDEADJUST_tutorial-checkpoint.ipynb | 4058 -------- .../1swfanalsirs1long.c1-checkpoint.ipynb | 2654 ------ 25 files changed, 4 insertions(+), 62192 deletions(-) delete mode 100644 VAPs/quicklook/2DS-AIR/.ipynb_checkpoints/aaf2dsh.c1-checkpoint.ipynb delete mode 100644 VAPs/quicklook/2DS-AIR/.ipynb_checkpoints/aaf2dsv.c1-checkpoint.ipynb delete mode 100644 VAPs/quicklook/AOP/.ipynb_checkpoints/aoppsap1flynn1m.c1-checkpoint.ipynb delete mode 100644 VAPs/quicklook/ARSCL/.ipynb_checkpoints/arscl1cloth.c1-checkpoint.ipynb delete mode 100644 VAPs/quicklook/ARSCL/.ipynb_checkpoints/arsclbnd1cloth.c1-checkpoint.ipynb delete mode 100644 VAPs/quicklook/BBHRP/.ipynb_checkpoints/bbhrpavg1mlawer.c1-checkpoint.ipynb delete mode 100644 VAPs/quicklook/CCNPROF/.ipynb_checkpoints/rlccnprof1ghan.c1-checkpoint.ipynb delete mode 100644 VAPs/quicklook/CMAC2/.ipynb_checkpoints/cmac2.c1-checkpoint.ipynb delete mode 100644 VAPs/quicklook/KAZRCOR/.ipynb_checkpoints/kazrcorge.c1-checkpoint.ipynb delete mode 100644 VAPs/quicklook/KAZRCOR/.ipynb_checkpoints/kazrcorhi.c1-checkpoint.ipynb delete mode 100644 VAPs/quicklook/KAZRCOR/.ipynb_checkpoints/kazrcormd.c1-checkpoint.ipynb delete mode 100644 VAPs/quicklook/MFRSRCLDOD/.ipynb_checkpoints/mfrsrcldod1min.c1-checkpoint.ipynb delete mode 100644 VAPs/quicklook/MICROBASE/.ipynb_checkpoints/microbasepi2.c1-checkpoint.ipynb delete mode 100644 VAPs/quicklook/MICROBASE/.ipynb_checkpoints/microbasepiavg.c1-checkpoint.ipynb delete mode 100644 VAPs/quicklook/MPLNOR/.ipynb_checkpoints/mplnor1camp.c1-checkpoint.ipynb delete mode 100644 VAPs/quicklook/OKMSOIL/.ipynb_checkpoints/okmsoil.c1-checkpoint.ipynb delete mode 100644 VAPs/quicklook/PBLHT/.ipynb_checkpoints/pblhtsonde1mcfarl.c1-checkpoint.ipynb delete mode 100644 VAPs/quicklook/PSAP/.ipynb_checkpoints/aospsap3w.c1-checkpoint.ipynb delete mode 100644 VAPs/quicklook/RADFLUXANAL/.ipynb_checkpoints/radflux1long.c1-checkpoint.ipynb delete mode 100644 VAPs/quicklook/SACRADV3D3C/.ipynb_checkpoints/kasacradv3d3c.c1-checkpoint.ipynb delete mode 100644 VAPs/quicklook/SFCCLDGRID/.ipynb_checkpoints/15swfcldgrid1long.c1-checkpoint.ipynb delete mode 100644 VAPs/quicklook/SFCCLDGRID/.ipynb_checkpoints/sfccldgrid2longcaracena.c1-checkpoint.ipynb delete mode 100644 VAPs/quicklook/SONDEADJUST/.ipynb_checkpoints/SONDEADJUST_tutorial-checkpoint.ipynb delete mode 100644 VAPs/quicklook/SWFLUXANAL/.ipynb_checkpoints/1swfanalsirs1long.c1-checkpoint.ipynb diff --git a/.gitignore b/.gitignore index c71e30af..742b8373 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,5 @@ .DS_Store -_build/ \ No newline at end of file +_build/ + +# Jupyter Notebook +.ipynb_checkpoints diff --git a/VAPs/quicklook/2DS-AIR/.ipynb_checkpoints/aaf2dsh.c1-checkpoint.ipynb b/VAPs/quicklook/2DS-AIR/.ipynb_checkpoints/aaf2dsh.c1-checkpoint.ipynb deleted file mode 100644 index 66d434d8..00000000 --- a/VAPs/quicklook/2DS-AIR/.ipynb_checkpoints/aaf2dsh.c1-checkpoint.ipynb +++ /dev/null @@ -1,1271 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "70840257-70e4-45e2-b491-14bff5a257a3", - "metadata": {}, - "source": [ - "# AAF2DSH.C1 Plots\n", - "\n", - "[Click here](https://www.arm.gov/capabilities/vaps/2ds-air) for more information about this vap." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "460fd89f-e034-452c-b837-f65c5958264f", - "metadata": {}, - "outputs": [], - "source": [ - "%matplotlib widget\n", - "import ipywidgets as widgets\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import ipywidgets as widgets\n", - "import numpy as np\n", - "import pandas as pd\n", - "import os\n", - "from datetime import datetime\n", - "\n", - "import act\n", - "import xarray as xr\n", - "\n", - "# Data archive directory\n", - "DATA_DIR = r'/data/archive/'\n", - "\n", - "# Datastream info\n", - "DATASTREAM_NAME = 'aaf2dsh'\n", - "DATA_LEVEL = 'c1'\n", - "LOCATIONS = [{'end_date': '2018-12-08', 'facility': 'F1', 'site': 'cor', 'start_date': '2018-11-04'}, {'end_date': '2018-02-19', 'facility': 'F1', 'site': 'ena', 'start_date': '2017-06-21'}, {'end_date': '2016-09-22', 'facility': 'F1', 'site': 'sgp', 'start_date': '2016-04-25'}]" - ] - }, - { - "cell_type": "markdown", - "id": "9faaf875", - "metadata": {}, - "source": [ - "## Define site, facility, and date range" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "ac6764f5", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The following locations and date ranges are available for this VAP:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sitefacilitystart_dateend_date
0corF12018-11-042018-12-08
1enaF12017-06-212018-02-19
2sgpF12016-04-252016-09-22
\n", - "
" - ], - "text/plain": [ - " site facility start_date end_date\n", - "0 cor F1 2018-11-04 2018-12-08\n", - "1 ena F1 2017-06-21 2018-02-19\n", - "2 sgp F1 2016-04-25 2016-09-22" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "print(\"The following locations and date ranges are available for this VAP:\")\n", - "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" - ] - }, - { - "cell_type": "markdown", - "id": "8d132223", - "metadata": {}, - "source": [ - "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "e563983a", - "metadata": {}, - "outputs": [], - "source": [ - "site_facility = ( 'sgp', 'F1' )\n", - "\n", - "date_start = '2016-09-21'\n", - "date_end = '2016-09-22'" - ] - }, - { - "cell_type": "markdown", - "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", - "metadata": {}, - "source": [ - "## Load data files\n", - "Load data files from /data/archive/" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'/data/archive/sgp/sgpaaf2dshF1.c1'" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Compile list of files\n", - "site, facility = site_facility\n", - "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", - "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", - "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", - "dir_path\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "6be8f3dc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['20160921', '20160922']" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from datetime import date, timedelta\n", - "import pandas as pd\n", - "\n", - "def get_ARM_formated_dates(start_date, end_date):\n", - " \"\"\"\n", - " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", - " EXAMPLE:\n", - " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", - " >> [\"20180219\", \"20180220\", \"20180221\"] \n", - " \"\"\"\n", - " \n", - " _start_date = pd.to_datetime(start_date)\n", - " _end_date = pd.to_datetime(end_date)\n", - " \n", - " delta = _end_date - _start_date # returns timedelta \n", - " dates = []\n", - "\n", - " for i in range(delta.days + 1):\n", - " day = _start_date + timedelta(days=i)\n", - " day_formated = day.strftime(format=\"%Y%m%d\")\n", - " dates.append(day_formated)\n", - " return dates\n", - "\n", - "\n", - "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "51feea2e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['/data/archive/sgp/sgpaaf2dshF1.c1/sgpaaf2dshF1.c1.20160921.163940.nc',\n", - " '/data/archive/sgp/sgpaaf2dshF1.c1/sgpaaf2dshF1.c1.20160922.160625.nc']" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Filter a list of files based on date pattern\n", - "import glob\n", - "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", - "files_filter = []\n", - "for date in dates:\n", - " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", - " files_filter\n", - "files_filter" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "b0e5d0d7", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2 files loaded\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset>\n",
-       "Dimensions:                     (time: 18380, optical_diameter: 61, bound: 2)\n",
-       "Coordinates:\n",
-       "  * time                        (time) datetime64[ns] 2016-09-21T16:39:40 ......\n",
-       "  * optical_diameter            (optical_diameter) float32 10.0 20.0 ... inf\n",
-       "Dimensions without coordinates: bound\n",
-       "Data variables:\n",
-       "    base_time                   (time) datetime64[ns] 2016-09-21 ... 2016-09-22\n",
-       "    time_offset                 (time) datetime64[ns] 2016-09-21T16:39:40 ......\n",
-       "    optical_diameter_bounds     (time, optical_diameter, bound) float32 dask.array<chunksize=(8283, 61, 2), meta=np.ndarray>\n",
-       "    total_number_concentration  (time) float32 dask.array<chunksize=(8283,), meta=np.ndarray>\n",
-       "    number_concentration        (time, optical_diameter) float32 dask.array<chunksize=(8283, 61), meta=np.ndarray>\n",
-       "    lat                         (time) float32 dask.array<chunksize=(8283,), meta=np.ndarray>\n",
-       "    lon                         (time) float32 dask.array<chunksize=(8283,), meta=np.ndarray>\n",
-       "    alt                         (time) float32 dask.array<chunksize=(8283,), meta=np.ndarray>\n",
-       "Attributes: (12/17)\n",
-       "    command_line:          aaf2dsme_ingest -s sgp -f F1 -D -R\n",
-       "    Conventions:           ARM-1.3\n",
-       "    process_version:       ingest-aaf2dsme-1.2-0.el7\n",
-       "    dod_version:           aaf2dsh-c1-1.1\n",
-       "    input_source:          /data/project/ENG0004504/collection/sgp/sgpaaf2dsF...\n",
-       "    site_id:               sgp\n",
-       "    ...                    ...\n",
-       "    doi:                   10.5439/1419322\n",
-       "    history:               created by user burk on machine prod-proc5.adc.arm...\n",
-       "    _file_dates:           ['20160921', '20160922']\n",
-       "    _file_times:           ['163940', '160625']\n",
-       "    _datastream:           sgpaaf2dshF1.c1\n",
-       "    _arm_standards_flag:   1
" - ], - "text/plain": [ - "\n", - "Dimensions: (time: 18380, optical_diameter: 61, bound: 2)\n", - "Coordinates:\n", - " * time (time) datetime64[ns] 2016-09-21T16:39:40 ......\n", - " * optical_diameter (optical_diameter) float32 10.0 20.0 ... inf\n", - "Dimensions without coordinates: bound\n", - "Data variables:\n", - " base_time (time) datetime64[ns] 2016-09-21 ... 2016-09-22\n", - " time_offset (time) datetime64[ns] 2016-09-21T16:39:40 ......\n", - " optical_diameter_bounds (time, optical_diameter, bound) float32 dask.array\n", - " total_number_concentration (time) float32 dask.array\n", - " number_concentration (time, optical_diameter) float32 dask.array\n", - " lat (time) float32 dask.array\n", - " lon (time) float32 dask.array\n", - " alt (time) float32 dask.array\n", - "Attributes: (12/17)\n", - " command_line: aaf2dsme_ingest -s sgp -f F1 -D -R\n", - " Conventions: ARM-1.3\n", - " process_version: ingest-aaf2dsme-1.2-0.el7\n", - " dod_version: aaf2dsh-c1-1.1\n", - " input_source: /data/project/ENG0004504/collection/sgp/sgpaaf2dsF...\n", - " site_id: sgp\n", - " ... ...\n", - " doi: 10.5439/1419322\n", - " history: created by user burk on machine prod-proc5.adc.arm...\n", - " _file_dates: ['20160921', '20160922']\n", - " _file_times: ['163940', '160625']\n", - " _datastream: sgpaaf2dshF1.c1\n", - " _arm_standards_flag: 1" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load files as a single dataset\n", - "files_list = files_filter \n", - "ds = act.io.armfiles.read_netcdf(files_list)\n", - "ds.clean.cleanup()\n", - "print(f'{len(files_list)} files loaded')\n", - "ds\n" - ] - }, - { - "cell_type": "markdown", - "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", - "metadata": {}, - "source": [ - "## Plot time series data\n", - "#### Define the list of variables to be plotted:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "d09b789e-84f1-4605-846b-a72c110c8048", - "metadata": {}, - "outputs": [], - "source": [ - "variables_to_plot = ['total_number_concentration', 'number_concentration']" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", - "metadata": {}, - "outputs": [ - { - "ename": "ValueError", - "evalue": "x and y arguments to pcolormesh cannot have non-finite values or be of type numpy.ma.core.MaskedArray with masked values", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[9], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m ts_display\u001b[38;5;241m.\u001b[39madd_subplots((\u001b[38;5;28mlen\u001b[39m(variables_to_plot),), figsize \u001b[38;5;241m=\u001b[39m (\u001b[38;5;241m9.5\u001b[39m,\u001b[38;5;241m4\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mlen\u001b[39m(variables_to_plot)))\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i,v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(variables_to_plot):\n\u001b[0;32m----> 5\u001b[0m ts_ax \u001b[38;5;241m=\u001b[39m \u001b[43mts_display\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mplot\u001b[49m\u001b[43m(\u001b[49m\u001b[43mv\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msubplot_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mi\u001b[49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mset_title\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mds\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvariables\u001b[49m\u001b[43m[\u001b[49m\u001b[43mv\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mattrs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mlong_name\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6\u001b[0m ts_ax\u001b[38;5;241m.\u001b[39mgrid()\n\u001b[1;32m 8\u001b[0m plt\u001b[38;5;241m.\u001b[39mshow()\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/plotting/timeseriesdisplay.py:588\u001b[0m, in \u001b[0;36mTimeSeriesDisplay.plot\u001b[0;34m(self, field, dsname, subplot_index, cmap, set_title, add_nan, day_night_background, invert_y_axis, abs_limits, time_rng, y_rng, use_var_for_y, set_shading, assessment_overplot, overplot_marker, overplot_behind, overplot_markersize, assessment_overplot_category, assessment_overplot_category_color, force_line_plot, labels, cbar_label, cbar_h_adjust, secondary_y, y_axis_flag_meanings, colorbar_labels, cb_friendly, **kwargs)\u001b[0m\n\u001b[1;32m 586\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 587\u001b[0m kwargs[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mface\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m--> 588\u001b[0m mesh \u001b[38;5;241m=\u001b[39m \u001b[43max\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpcolormesh\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 589\u001b[0m \u001b[43m \u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43masarray\u001b[49m\u001b[43m(\u001b[49m\u001b[43mxdata\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 590\u001b[0m \u001b[43m \u001b[49m\u001b[43mydata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 591\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtranspose\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 592\u001b[0m \u001b[43m \u001b[49m\u001b[43mshading\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mset_shading\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 593\u001b[0m \u001b[43m \u001b[49m\u001b[43mcmap\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcmap\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 594\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 595\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 597\u001b[0m \u001b[38;5;66;03m# Set Title\u001b[39;00m\n\u001b[1;32m 598\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m set_title \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/__init__.py:1442\u001b[0m, in \u001b[0;36m_preprocess_data..inner\u001b[0;34m(ax, data, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1439\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 1440\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minner\u001b[39m(ax, \u001b[38;5;241m*\u001b[39margs, data\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 1441\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m data \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1442\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43max\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mmap\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43msanitize_sequence\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1444\u001b[0m bound \u001b[38;5;241m=\u001b[39m new_sig\u001b[38;5;241m.\u001b[39mbind(ax, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 1445\u001b[0m auto_label \u001b[38;5;241m=\u001b[39m (bound\u001b[38;5;241m.\u001b[39marguments\u001b[38;5;241m.\u001b[39mget(label_namer)\n\u001b[1;32m 1446\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m bound\u001b[38;5;241m.\u001b[39mkwargs\u001b[38;5;241m.\u001b[39mget(label_namer))\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/axes/_axes.py:6220\u001b[0m, in \u001b[0;36mAxes.pcolormesh\u001b[0;34m(self, alpha, norm, cmap, vmin, vmax, shading, antialiased, *args, **kwargs)\u001b[0m\n\u001b[1;32m 6217\u001b[0m shading \u001b[38;5;241m=\u001b[39m shading\u001b[38;5;241m.\u001b[39mlower()\n\u001b[1;32m 6218\u001b[0m kwargs\u001b[38;5;241m.\u001b[39msetdefault(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnone\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m-> 6220\u001b[0m X, Y, C, shading \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_pcolorargs\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mpcolormesh\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6221\u001b[0m \u001b[43m \u001b[49m\u001b[43mshading\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mshading\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6222\u001b[0m coords \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mstack([X, Y], axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m 6223\u001b[0m \u001b[38;5;66;03m# convert to one dimensional array, except for 3D RGB(A) arrays\u001b[39;00m\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/axes/_axes.py:5717\u001b[0m, in \u001b[0;36mAxes._pcolorargs\u001b[0;34m(self, funcname, shading, *args, **kwargs)\u001b[0m\n\u001b[1;32m 5715\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m funcname \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpcolormesh\u001b[39m\u001b[38;5;124m'\u001b[39m:\n\u001b[1;32m 5716\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m np\u001b[38;5;241m.\u001b[39mma\u001b[38;5;241m.\u001b[39mis_masked(X) \u001b[38;5;129;01mor\u001b[39;00m np\u001b[38;5;241m.\u001b[39mma\u001b[38;5;241m.\u001b[39mis_masked(Y):\n\u001b[0;32m-> 5717\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 5718\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mx and y arguments to pcolormesh cannot have \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 5719\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnon-finite values or be of type \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 5720\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnumpy.ma.core.MaskedArray with masked values\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 5721\u001b[0m \u001b[38;5;66;03m# safe_masked_invalid() returns an ndarray for dtypes other\u001b[39;00m\n\u001b[1;32m 5722\u001b[0m \u001b[38;5;66;03m# than floating point.\u001b[39;00m\n\u001b[1;32m 5723\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(X, np\u001b[38;5;241m.\u001b[39mma\u001b[38;5;241m.\u001b[39mcore\u001b[38;5;241m.\u001b[39mMaskedArray):\n", - "\u001b[0;31mValueError\u001b[0m: x and y arguments to pcolormesh cannot have non-finite values or be of type numpy.ma.core.MaskedArray with masked values" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "eae83e9e4798482083ecdb12aeb73cf7", - "version_major": 2, - "version_minor": 0 - }, - "image/png": "", - "text/html": [ - "\n", - "
\n", - "
\n", - " Figure\n", - "
\n", - " \n", - "
\n", - " " - ], - "text/plain": [ - "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", - "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", - "\n", - "for i,v in enumerate(variables_to_plot):\n", - " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", - " ts_ax.grid()\n", - "\n", - "plt.show()\n" - ] - }, - { - "cell_type": "markdown", - "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", - "metadata": {}, - "source": [ - "## Field selection dropdown menu\n", - "Select variable to be plotted from a dropdown menu" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", - "metadata": {}, - "outputs": [], - "source": [ - "plt.ioff()\n", - "\n", - "# populate dropdown menu with available variables \n", - "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", - "d_variable = 'total_number_concentration'\n", - "dropdown = widgets.Dropdown(\n", - " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", - " value= d_variable,\n", - " description='Field:',\n", - " disabled=False,\n", - ")\n", - "dropdown.layout.margin = '0px 30% 0px 20%'\n", - "dropdown.layout.width = '50%'\n", - "\n", - "# set up display\n", - "i_display = act.plotting.TimeSeriesDisplay(ds)\n", - "i_display.add_subplots((1,), figsize = (9.5,5))\n", - "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", - "i_ax.grid()\n", - "i_fig = i_display.fig\n", - "\n", - "# update plot callback function\n", - "def update_plot(change):\n", - " i_ax.cla()\n", - " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", - " i_ax_new.grid()\n", - " i_fig.canvas.draw()\n", - " i_fig.canvas.flush_events()\n", - "\n", - "dropdown.observe(update_plot, names='value')\n", - "\n", - "widgets.AppLayout(\n", - " header=dropdown,\n", - " center=i_fig.canvas,\n", - " pane_heights=[1, 6,1]\n", - ")\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "state": {}, - "version_major": 2, - "version_minor": 0 - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/VAPs/quicklook/2DS-AIR/.ipynb_checkpoints/aaf2dsv.c1-checkpoint.ipynb b/VAPs/quicklook/2DS-AIR/.ipynb_checkpoints/aaf2dsv.c1-checkpoint.ipynb deleted file mode 100644 index 9a856041..00000000 --- a/VAPs/quicklook/2DS-AIR/.ipynb_checkpoints/aaf2dsv.c1-checkpoint.ipynb +++ /dev/null @@ -1,1798 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "70840257-70e4-45e2-b491-14bff5a257a3", - "metadata": {}, - "source": [ - "# AAF2DSV.C1 Plots\n", - "\n", - "[Click here](https://www.arm.gov/capabilities/vaps/2ds-air) for more information about this vap." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "460fd89f-e034-452c-b837-f65c5958264f", - "metadata": {}, - "outputs": [], - "source": [ - "%matplotlib widget\n", - "import ipywidgets as widgets\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import ipywidgets as widgets\n", - "import numpy as np\n", - "import pandas as pd\n", - "import os\n", - "from datetime import datetime\n", - "\n", - "import act\n", - "import xarray as xr\n", - "\n", - "# Data archive directory\n", - "DATA_DIR = r'/data/archive/'\n", - "\n", - "# Datastream info\n", - "DATASTREAM_NAME = 'aaf2dsv'\n", - "DATA_LEVEL = 'c1'\n", - "LOCATIONS = [{'end_date': '2018-12-08', 'facility': 'F1', 'site': 'cor', 'start_date': '2018-11-04'}, {'end_date': '2018-02-19', 'facility': 'F1', 'site': 'ena', 'start_date': '2017-06-21'}, {'end_date': '2016-09-22', 'facility': 'F1', 'site': 'sgp', 'start_date': '2016-04-25'}]" - ] - }, - { - "cell_type": "markdown", - "id": "9faaf875", - "metadata": {}, - "source": [ - "## Define site, facility, and date range" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "ac6764f5", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The following locations and date ranges are available for this VAP:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sitefacilitystart_dateend_date
0corF12018-11-042018-12-08
1enaF12017-06-212018-02-19
2sgpF12016-04-252016-09-22
\n", - "
" - ], - "text/plain": [ - " site facility start_date end_date\n", - "0 cor F1 2018-11-04 2018-12-08\n", - "1 ena F1 2017-06-21 2018-02-19\n", - "2 sgp F1 2016-04-25 2016-09-22" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "print(\"The following locations and date ranges are available for this VAP:\")\n", - "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" - ] - }, - { - "cell_type": "markdown", - "id": "8d132223", - "metadata": {}, - "source": [ - "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "e563983a", - "metadata": {}, - "outputs": [], - "source": [ - "site_facility = ( 'sgp', 'F1' )\n", - "\n", - "date_start = '2016-09-21'\n", - "date_end = '2016-09-22'" - ] - }, - { - "cell_type": "markdown", - "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", - "metadata": {}, - "source": [ - "## Load data files\n", - "Load data files from /data/archive/" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'/data/archive/sgp/sgpaaf2dsvF1.c1'" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Compile list of files\n", - "site, facility = site_facility\n", - "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", - "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", - "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", - "dir_path\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "6be8f3dc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['20160921', '20160922']" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from datetime import date, timedelta\n", - "import pandas as pd\n", - "\n", - "def get_ARM_formated_dates(start_date, end_date):\n", - " \"\"\"\n", - " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", - " EXAMPLE:\n", - " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", - " >> [\"20180219\", \"20180220\", \"20180221\"] \n", - " \"\"\"\n", - " \n", - " _start_date = pd.to_datetime(start_date)\n", - " _end_date = pd.to_datetime(end_date)\n", - " \n", - " delta = _end_date - _start_date # returns timedelta \n", - " dates = []\n", - "\n", - " for i in range(delta.days + 1):\n", - " day = _start_date + timedelta(days=i)\n", - " day_formated = day.strftime(format=\"%Y%m%d\")\n", - " dates.append(day_formated)\n", - " return dates\n", - "\n", - "\n", - "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "51feea2e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['/data/archive/sgp/sgpaaf2dsvF1.c1/sgpaaf2dsvF1.c1.20160921.163940.nc',\n", - " '/data/archive/sgp/sgpaaf2dsvF1.c1/sgpaaf2dsvF1.c1.20160922.160625.nc']" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Filter a list of files based on date pattern\n", - "import glob\n", - "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", - "files_filter = []\n", - "for date in dates:\n", - " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", - " files_filter\n", - "files_filter" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "226f29ae", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset>\n",
-       "Dimensions:                     (time: 10097, optical_diameter: 61, bound: 2)\n",
-       "Coordinates:\n",
-       "  * time                        (time) datetime64[ns] 2016-09-22T16:06:25 ......\n",
-       "  * optical_diameter            (optical_diameter) float32 10.0 20.0 ... inf\n",
-       "Dimensions without coordinates: bound\n",
-       "Data variables:\n",
-       "    base_time                   datetime64[ns] 2016-09-22\n",
-       "    time_offset                 (time) datetime64[ns] 2016-09-22T16:06:25 ......\n",
-       "    optical_diameter_bounds     (optical_diameter, bound) float32 5.0 ... inf\n",
-       "    total_number_concentration  (time) float32 26.59 0.0 22.4 ... 0.0 0.0 nan\n",
-       "    number_concentration        (time, optical_diameter) float32 2.24 ... nan\n",
-       "    lat                         (time) float32 36.76 36.76 36.76 ... 36.74 36.74\n",
-       "    lon                         (time) float32 -96.01 -96.01 ... -96.02 -96.02\n",
-       "    alt                         (time) float32 220.0 224.0 228.0 ... 677.0 674.0\n",
-       "Attributes: (12/13)\n",
-       "    command_line:          aaf2dsme_ingest -s sgp -f F1 -D -R\n",
-       "    Conventions:           ARM-1.3\n",
-       "    process_version:       ingest-aaf2dsme-1.2-0.el7\n",
-       "    dod_version:           aaf2dsv-c1-1.1\n",
-       "    input_source:          /data/project/ENG0004504/collection/sgp/sgpaaf2dsF...\n",
-       "    site_id:               sgp\n",
-       "    ...                    ...\n",
-       "    facility_id:           F1\n",
-       "    data_level:            c1\n",
-       "    location_description:  Southern Great Plains (SGP), Gulfstream 159 ("G1")...\n",
-       "    datastream:            sgpaaf2dsvF1.c1\n",
-       "    doi:                   10.5439/1419323\n",
-       "    history:               created by user burk on machine prod-proc5.adc.arm...
" - ], - "text/plain": [ - "\n", - "Dimensions: (time: 10097, optical_diameter: 61, bound: 2)\n", - "Coordinates:\n", - " * time (time) datetime64[ns] 2016-09-22T16:06:25 ......\n", - " * optical_diameter (optical_diameter) float32 10.0 20.0 ... inf\n", - "Dimensions without coordinates: bound\n", - "Data variables:\n", - " base_time datetime64[ns] 2016-09-22\n", - " time_offset (time) datetime64[ns] 2016-09-22T16:06:25 ......\n", - " optical_diameter_bounds (optical_diameter, bound) float32 5.0 ... inf\n", - " total_number_concentration (time) float32 26.59 0.0 22.4 ... 0.0 0.0 nan\n", - " number_concentration (time, optical_diameter) float32 2.24 ... nan\n", - " lat (time) float32 36.76 36.76 36.76 ... 36.74 36.74\n", - " lon (time) float32 -96.01 -96.01 ... -96.02 -96.02\n", - " alt (time) float32 220.0 224.0 228.0 ... 677.0 674.0\n", - "Attributes: (12/13)\n", - " command_line: aaf2dsme_ingest -s sgp -f F1 -D -R\n", - " Conventions: ARM-1.3\n", - " process_version: ingest-aaf2dsme-1.2-0.el7\n", - " dod_version: aaf2dsv-c1-1.1\n", - " input_source: /data/project/ENG0004504/collection/sgp/sgpaaf2dsF...\n", - " site_id: sgp\n", - " ... ...\n", - " facility_id: F1\n", - " data_level: c1\n", - " location_description: Southern Great Plains (SGP), Gulfstream 159 (\"G1\")...\n", - " datastream: sgpaaf2dsvF1.c1\n", - " doi: 10.5439/1419323\n", - " history: created by user burk on machine prod-proc5.adc.arm..." - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds_single_1 = xr.load_dataset(\"/data/archive/sgp/sgpaaf2dsvF1.c1/sgpaaf2dsvF1.c1.20160921.163940.nc\")\n", - "ds_single_1\n", - "\n", - "ds_single_2 = xr.load_dataset(\"/data/archive/sgp/sgpaaf2dsvF1.c1/sgpaaf2dsvF1.c1.20160922.160625.nc\")\n", - "ds_single_2" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "b0e5d0d7", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2 files loaded\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset>\n",
-       "Dimensions:                     (time: 18380, optical_diameter: 61, bound: 2)\n",
-       "Coordinates:\n",
-       "  * time                        (time) datetime64[ns] 2016-09-21T16:39:40 ......\n",
-       "  * optical_diameter            (optical_diameter) float32 10.0 20.0 ... inf\n",
-       "Dimensions without coordinates: bound\n",
-       "Data variables:\n",
-       "    base_time                   (time) datetime64[ns] 2016-09-21 ... 2016-09-22\n",
-       "    time_offset                 (time) datetime64[ns] 2016-09-21T16:39:40 ......\n",
-       "    optical_diameter_bounds     (time, optical_diameter, bound) float32 dask.array<chunksize=(8283, 61, 2), meta=np.ndarray>\n",
-       "    total_number_concentration  (time) float32 dask.array<chunksize=(8283,), meta=np.ndarray>\n",
-       "    number_concentration        (time, optical_diameter) float32 dask.array<chunksize=(8283, 61), meta=np.ndarray>\n",
-       "    lat                         (time) float32 dask.array<chunksize=(8283,), meta=np.ndarray>\n",
-       "    lon                         (time) float32 dask.array<chunksize=(8283,), meta=np.ndarray>\n",
-       "    alt                         (time) float32 dask.array<chunksize=(8283,), meta=np.ndarray>\n",
-       "Attributes: (12/17)\n",
-       "    command_line:          aaf2dsme_ingest -s sgp -f F1 -D -R\n",
-       "    Conventions:           ARM-1.3\n",
-       "    process_version:       ingest-aaf2dsme-1.2-0.el7\n",
-       "    dod_version:           aaf2dsv-c1-1.1\n",
-       "    input_source:          /data/project/ENG0004504/collection/sgp/sgpaaf2dsF...\n",
-       "    site_id:               sgp\n",
-       "    ...                    ...\n",
-       "    doi:                   10.5439/1419323\n",
-       "    history:               created by user burk on machine prod-proc5.adc.arm...\n",
-       "    _file_dates:           ['20160921', '20160922']\n",
-       "    _file_times:           ['163940', '160625']\n",
-       "    _datastream:           sgpaaf2dsvF1.c1\n",
-       "    _arm_standards_flag:   1
" - ], - "text/plain": [ - "\n", - "Dimensions: (time: 18380, optical_diameter: 61, bound: 2)\n", - "Coordinates:\n", - " * time (time) datetime64[ns] 2016-09-21T16:39:40 ......\n", - " * optical_diameter (optical_diameter) float32 10.0 20.0 ... inf\n", - "Dimensions without coordinates: bound\n", - "Data variables:\n", - " base_time (time) datetime64[ns] 2016-09-21 ... 2016-09-22\n", - " time_offset (time) datetime64[ns] 2016-09-21T16:39:40 ......\n", - " optical_diameter_bounds (time, optical_diameter, bound) float32 dask.array\n", - " total_number_concentration (time) float32 dask.array\n", - " number_concentration (time, optical_diameter) float32 dask.array\n", - " lat (time) float32 dask.array\n", - " lon (time) float32 dask.array\n", - " alt (time) float32 dask.array\n", - "Attributes: (12/17)\n", - " command_line: aaf2dsme_ingest -s sgp -f F1 -D -R\n", - " Conventions: ARM-1.3\n", - " process_version: ingest-aaf2dsme-1.2-0.el7\n", - " dod_version: aaf2dsv-c1-1.1\n", - " input_source: /data/project/ENG0004504/collection/sgp/sgpaaf2dsF...\n", - " site_id: sgp\n", - " ... ...\n", - " doi: 10.5439/1419323\n", - " history: created by user burk on machine prod-proc5.adc.arm...\n", - " _file_dates: ['20160921', '20160922']\n", - " _file_times: ['163940', '160625']\n", - " _datastream: sgpaaf2dsvF1.c1\n", - " _arm_standards_flag: 1" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load files as a single dataset\n", - "files_list = files_filter \n", - "ds = act.io.armfiles.read_netcdf(files_list)\n", - "ds.clean.cleanup()\n", - "print(f'{len(files_list)} files loaded')\n", - "ds\n" - ] - }, - { - "cell_type": "markdown", - "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", - "metadata": {}, - "source": [ - "## Plot time series data\n", - "#### Define the list of variables to be plotted:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "d09b789e-84f1-4605-846b-a72c110c8048", - "metadata": {}, - "outputs": [], - "source": [ - "variables_to_plot = ['total_number_concentration', 'number_concentration']" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", - "metadata": {}, - "outputs": [ - { - "ename": "ValueError", - "evalue": "x and y arguments to pcolormesh cannot have non-finite values or be of type numpy.ma.core.MaskedArray with masked values", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[9], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m ts_display\u001b[38;5;241m.\u001b[39madd_subplots((\u001b[38;5;28mlen\u001b[39m(variables_to_plot),), figsize \u001b[38;5;241m=\u001b[39m (\u001b[38;5;241m9.5\u001b[39m,\u001b[38;5;241m4\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mlen\u001b[39m(variables_to_plot)))\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i,v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(variables_to_plot):\n\u001b[0;32m----> 5\u001b[0m ts_ax \u001b[38;5;241m=\u001b[39m \u001b[43mts_display\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mplot\u001b[49m\u001b[43m(\u001b[49m\u001b[43mv\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msubplot_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mi\u001b[49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mset_title\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mds\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvariables\u001b[49m\u001b[43m[\u001b[49m\u001b[43mv\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mattrs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mlong_name\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6\u001b[0m ts_ax\u001b[38;5;241m.\u001b[39mgrid()\n\u001b[1;32m 8\u001b[0m plt\u001b[38;5;241m.\u001b[39mshow()\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/plotting/timeseriesdisplay.py:588\u001b[0m, in \u001b[0;36mTimeSeriesDisplay.plot\u001b[0;34m(self, field, dsname, subplot_index, cmap, set_title, add_nan, day_night_background, invert_y_axis, abs_limits, time_rng, y_rng, use_var_for_y, set_shading, assessment_overplot, overplot_marker, overplot_behind, overplot_markersize, assessment_overplot_category, assessment_overplot_category_color, force_line_plot, labels, cbar_label, cbar_h_adjust, secondary_y, y_axis_flag_meanings, colorbar_labels, cb_friendly, **kwargs)\u001b[0m\n\u001b[1;32m 586\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 587\u001b[0m kwargs[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mface\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m--> 588\u001b[0m mesh \u001b[38;5;241m=\u001b[39m \u001b[43max\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpcolormesh\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 589\u001b[0m \u001b[43m \u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43masarray\u001b[49m\u001b[43m(\u001b[49m\u001b[43mxdata\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 590\u001b[0m \u001b[43m \u001b[49m\u001b[43mydata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 591\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtranspose\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 592\u001b[0m \u001b[43m \u001b[49m\u001b[43mshading\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mset_shading\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 593\u001b[0m \u001b[43m \u001b[49m\u001b[43mcmap\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcmap\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 594\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 595\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 597\u001b[0m \u001b[38;5;66;03m# Set Title\u001b[39;00m\n\u001b[1;32m 598\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m set_title \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/__init__.py:1442\u001b[0m, in \u001b[0;36m_preprocess_data..inner\u001b[0;34m(ax, data, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1439\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 1440\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minner\u001b[39m(ax, \u001b[38;5;241m*\u001b[39margs, data\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 1441\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m data \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1442\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43max\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mmap\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43msanitize_sequence\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1444\u001b[0m bound \u001b[38;5;241m=\u001b[39m new_sig\u001b[38;5;241m.\u001b[39mbind(ax, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 1445\u001b[0m auto_label \u001b[38;5;241m=\u001b[39m (bound\u001b[38;5;241m.\u001b[39marguments\u001b[38;5;241m.\u001b[39mget(label_namer)\n\u001b[1;32m 1446\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m bound\u001b[38;5;241m.\u001b[39mkwargs\u001b[38;5;241m.\u001b[39mget(label_namer))\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/axes/_axes.py:6220\u001b[0m, in \u001b[0;36mAxes.pcolormesh\u001b[0;34m(self, alpha, norm, cmap, vmin, vmax, shading, antialiased, *args, **kwargs)\u001b[0m\n\u001b[1;32m 6217\u001b[0m shading \u001b[38;5;241m=\u001b[39m shading\u001b[38;5;241m.\u001b[39mlower()\n\u001b[1;32m 6218\u001b[0m kwargs\u001b[38;5;241m.\u001b[39msetdefault(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnone\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m-> 6220\u001b[0m X, Y, C, shading \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_pcolorargs\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mpcolormesh\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6221\u001b[0m \u001b[43m \u001b[49m\u001b[43mshading\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mshading\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6222\u001b[0m coords \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mstack([X, Y], axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m 6223\u001b[0m \u001b[38;5;66;03m# convert to one dimensional array, except for 3D RGB(A) arrays\u001b[39;00m\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/axes/_axes.py:5717\u001b[0m, in \u001b[0;36mAxes._pcolorargs\u001b[0;34m(self, funcname, shading, *args, **kwargs)\u001b[0m\n\u001b[1;32m 5715\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m funcname \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpcolormesh\u001b[39m\u001b[38;5;124m'\u001b[39m:\n\u001b[1;32m 5716\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m np\u001b[38;5;241m.\u001b[39mma\u001b[38;5;241m.\u001b[39mis_masked(X) \u001b[38;5;129;01mor\u001b[39;00m np\u001b[38;5;241m.\u001b[39mma\u001b[38;5;241m.\u001b[39mis_masked(Y):\n\u001b[0;32m-> 5717\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 5718\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mx and y arguments to pcolormesh cannot have \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 5719\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnon-finite values or be of type \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 5720\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnumpy.ma.core.MaskedArray with masked values\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 5721\u001b[0m \u001b[38;5;66;03m# safe_masked_invalid() returns an ndarray for dtypes other\u001b[39;00m\n\u001b[1;32m 5722\u001b[0m \u001b[38;5;66;03m# than floating point.\u001b[39;00m\n\u001b[1;32m 5723\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(X, np\u001b[38;5;241m.\u001b[39mma\u001b[38;5;241m.\u001b[39mcore\u001b[38;5;241m.\u001b[39mMaskedArray):\n", - "\u001b[0;31mValueError\u001b[0m: x and y arguments to pcolormesh cannot have non-finite values or be of type numpy.ma.core.MaskedArray with masked values" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "b4ce530bc3b34d038b85608090b4b719", - "version_major": 2, - "version_minor": 0 - }, - "image/png": "", - "text/html": [ - "\n", - "
\n", - "
\n", - " Figure\n", - "
\n", - " \n", - "
\n", - " " - ], - "text/plain": [ - "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", - "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", - "\n", - "for i,v in enumerate(variables_to_plot):\n", - " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", - " ts_ax.grid()\n", - "\n", - "plt.show()\n" - ] - }, - { - "cell_type": "markdown", - "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", - "metadata": {}, - "source": [ - "## Field selection dropdown menu\n", - "Select variable to be plotted from a dropdown menu" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", - "metadata": {}, - "outputs": [], - "source": [ - "plt.ioff()\n", - "\n", - "# populate dropdown menu with available variables \n", - "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", - "d_variable = 'total_number_concentration'\n", - "dropdown = widgets.Dropdown(\n", - " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", - " value= d_variable,\n", - " description='Field:',\n", - " disabled=False,\n", - ")\n", - "dropdown.layout.margin = '0px 30% 0px 20%'\n", - "dropdown.layout.width = '50%'\n", - "\n", - "# set up display\n", - "i_display = act.plotting.TimeSeriesDisplay(ds)\n", - "i_display.add_subplots((1,), figsize = (9.5,5))\n", - "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", - "i_ax.grid()\n", - "i_fig = i_display.fig\n", - "\n", - "# update plot callback function\n", - "def update_plot(change):\n", - " i_ax.cla()\n", - " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", - " i_ax_new.grid()\n", - " i_fig.canvas.draw()\n", - " i_fig.canvas.flush_events()\n", - "\n", - "dropdown.observe(update_plot, names='value')\n", - "\n", - "widgets.AppLayout(\n", - " header=dropdown,\n", - " center=i_fig.canvas,\n", - " pane_heights=[1, 6,1]\n", - ")\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "state": {}, - "version_major": 2, - "version_minor": 0 - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/VAPs/quicklook/AOP/.ipynb_checkpoints/aoppsap1flynn1m.c1-checkpoint.ipynb b/VAPs/quicklook/AOP/.ipynb_checkpoints/aoppsap1flynn1m.c1-checkpoint.ipynb deleted file mode 100644 index 927b1391..00000000 --- a/VAPs/quicklook/AOP/.ipynb_checkpoints/aoppsap1flynn1m.c1-checkpoint.ipynb +++ /dev/null @@ -1,8265 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "70840257-70e4-45e2-b491-14bff5a257a3", - "metadata": {}, - "source": [ - "# AOPPSAP1FLYNN1M.C1 Plots\n", - "\n", - "[Click here](https://www.arm.gov/capabilities/vaps/aop) for more information about this vap." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "460fd89f-e034-452c-b837-f65c5958264f", - "metadata": {}, - "outputs": [], - "source": [ - "%matplotlib widget\n", - "import ipywidgets as widgets\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import ipywidgets as widgets\n", - "import numpy as np\n", - "import pandas as pd\n", - "import os\n", - "from datetime import datetime\n", - "\n", - "import act\n", - "import xarray as xr\n", - "\n", - "# Data archive directory\n", - "DATA_DIR = r'/data/archive/'\n", - "\n", - "# Datastream info\n", - "DATASTREAM_NAME = 'aoppsap1flynn1m'\n", - "DATA_LEVEL = 'c1'\n", - "LOCATIONS = [{'end_date': '2017-11-01', 'facility': 'M1', 'site': 'asi', 'start_date': '2016-04-23'}, {'end_date': '2020-06-01', 'facility': 'M1', 'site': 'anx', 'start_date': '2019-12-01'}, {'end_date': '2021-10-14', 'facility': 'M1', 'site': 'guc', 'start_date': '2021-09-02'}, {'end_date': '2023-06-15', 'facility': 'S2', 'site': 'guc', 'start_date': '2021-10-27'}, {'end_date': '2021-06-14', 'facility': 'M1', 'site': 'oli', 'start_date': '2016-08-06'}, {'end_date': '2019-04-30', 'facility': 'M1', 'site': 'cor', 'start_date': '2018-09-23'}, {'end_date': '2015-12-01', 'facility': 'S1', 'site': 'mao', 'start_date': '2014-02-06'}, {'end_date': '2018-01-11', 'facility': 'M1', 'site': 'mar', 'start_date': '2017-10-29'}, {'end_date': '2020-10-01', 'facility': 'M1', 'site': 'mos', 'start_date': '2019-10-11'}, {'end_date': '2023-12-05', 'facility': 'M1', 'site': 'epc', 'start_date': '2023-01-15'}, {'end_date': '2022-09-30', 'facility': 'M1', 'site': 'hou', 'start_date': '2021-09-08'}, {'end_date': '2023-12-11', 'facility': 'C1', 'site': 'ena', 'start_date': '2013-10-09'}, {'end_date': '2017-09-29', 'facility': 'C1', 'site': 'sgp', 'start_date': '2015-10-01'}, {'end_date': '2023-12-12', 'facility': 'E13', 'site': 'sgp', 'start_date': '2016-11-15'}]" - ] - }, - { - "cell_type": "markdown", - "id": "9faaf875", - "metadata": {}, - "source": [ - "## Define site, facility, and date range" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "ac6764f5", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The following locations and date ranges are available for this VAP:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sitefacilitystart_dateend_date
0asiM12016-04-232017-11-01
1anxM12019-12-012020-06-01
2gucM12021-09-022021-10-14
3gucS22021-10-272023-06-15
4oliM12016-08-062021-06-14
5corM12018-09-232019-04-30
6maoS12014-02-062015-12-01
7marM12017-10-292018-01-11
8mosM12019-10-112020-10-01
9epcM12023-01-152023-12-05
10houM12021-09-082022-09-30
11enaC12013-10-092023-12-11
12sgpC12015-10-012017-09-29
13sgpE132016-11-152023-12-12
\n", - "
" - ], - "text/plain": [ - " site facility start_date end_date\n", - "0 asi M1 2016-04-23 2017-11-01\n", - "1 anx M1 2019-12-01 2020-06-01\n", - "2 guc M1 2021-09-02 2021-10-14\n", - "3 guc S2 2021-10-27 2023-06-15\n", - "4 oli M1 2016-08-06 2021-06-14\n", - "5 cor M1 2018-09-23 2019-04-30\n", - "6 mao S1 2014-02-06 2015-12-01\n", - "7 mar M1 2017-10-29 2018-01-11\n", - "8 mos M1 2019-10-11 2020-10-01\n", - "9 epc M1 2023-01-15 2023-12-05\n", - "10 hou M1 2021-09-08 2022-09-30\n", - "11 ena C1 2013-10-09 2023-12-11\n", - "12 sgp C1 2015-10-01 2017-09-29\n", - "13 sgp E13 2016-11-15 2023-12-12" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "print(\"The following locations and date ranges are available for this VAP:\")\n", - "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" - ] - }, - { - "cell_type": "markdown", - "id": "8d132223", - "metadata": {}, - "source": [ - "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "e563983a", - "metadata": {}, - "outputs": [], - "source": [ - "site_facility = ( 'sgp', 'C1' )\n", - "\n", - "date_start = '2017-09-27'\n", - "date_end = '2017-09-29'" - ] - }, - { - "cell_type": "markdown", - "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", - "metadata": {}, - "source": [ - "## Load data files\n", - "Load data files from /data/archive/" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'/data/archive/sgp/sgpaoppsap1flynn1mC1.c1'" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Compile list of files\n", - "site, facility = site_facility\n", - "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", - "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", - "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", - "dir_path\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "6be8f3dc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['20170927', '20170928', '20170929']" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from datetime import date, timedelta\n", - "import pandas as pd\n", - "\n", - "def get_ARM_formated_dates(start_date, end_date):\n", - " \"\"\"\n", - " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", - " EXAMPLE:\n", - " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", - " >> [\"20180219\", \"20180220\", \"20180221\"] \n", - " \"\"\"\n", - " \n", - " _start_date = pd.to_datetime(start_date)\n", - " _end_date = pd.to_datetime(end_date)\n", - " \n", - " delta = _end_date - _start_date # returns timedelta \n", - " dates = []\n", - "\n", - " for i in range(delta.days + 1):\n", - " day = _start_date + timedelta(days=i)\n", - " day_formated = day.strftime(format=\"%Y%m%d\")\n", - " dates.append(day_formated)\n", - " return dates\n", - "\n", - "\n", - "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "51feea2e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['/data/archive/sgp/sgpaoppsap1flynn1mC1.c1/sgpaoppsap1flynn1mC1.c1.20170927.000030.nc',\n", - " '/data/archive/sgp/sgpaoppsap1flynn1mC1.c1/sgpaoppsap1flynn1mC1.c1.20170928.000030.nc',\n", - " '/data/archive/sgp/sgpaoppsap1flynn1mC1.c1/sgpaoppsap1flynn1mC1.c1.20170929.000030.nc']" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Filter a list of files based on date pattern\n", - "import glob\n", - "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", - "files_filter = []\n", - "for date in dates:\n", - " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", - " files_filter\n", - "files_filter" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "b0e5d0d7", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "3 files loaded\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset>\n",
-       "Dimensions:                         (time: 4320, bound: 2)\n",
-       "Coordinates:\n",
-       "  * time                            (time) datetime64[ns] 2017-09-27T00:00:30...\n",
-       "Dimensions without coordinates: bound\n",
-       "Data variables: (12/132)\n",
-       "    base_time                       (time) datetime64[ns] 2017-09-27 ... 2017...\n",
-       "    time_offset                     (time) datetime64[ns] 2017-09-27T00:00:30...\n",
-       "    time_bounds                     (time, bound) object dask.array<chunksize=(1440, 2), meta=np.ndarray>\n",
-       "    impactor_state                  (time) int32 10 10 10 ... -9999 -9999 -9999\n",
-       "    Bs_B                            (time) float32 dask.array<chunksize=(1440,), meta=np.ndarray>\n",
-       "    qc_Bs_B                         (time) int32 dask.array<chunksize=(1440,), meta=np.ndarray>\n",
-       "    ...                              ...\n",
-       "    K1_B                            (time) float32 dask.array<chunksize=(1440,), meta=np.ndarray>\n",
-       "    K1_G                            (time) float32 dask.array<chunksize=(1440,), meta=np.ndarray>\n",
-       "    K1_R                            (time) float32 dask.array<chunksize=(1440,), meta=np.ndarray>\n",
-       "    lat                             (time) float32 36.6 36.6 36.6 ... 36.6 36.6\n",
-       "    lon                             (time) float32 -97.49 -97.49 ... -97.49\n",
-       "    alt                             (time) float32 318.0 318.0 ... 318.0 318.0\n",
-       "Attributes: (12/20)\n",
-       "    command_line:                    aosaop -n aosaoppsap -s sgp -f C1 -D -b ...\n",
-       "    Conventions:                     ARM-1.2\n",
-       "    process_version:                 vap-aosaop-1.2-0.el6\n",
-       "    dod_version:                     aoppsap1flynn1m-c1-1.2\n",
-       "    input_datastreams:               sgpaosnephdry1mC1.b1 : 1.0 : 20170927.00...\n",
-       "    site_id:                         sgp\n",
-       "    ...                              ...\n",
-       "    doi:                             10.5439/1369240\n",
-       "    history:                         created by user dsmgr on machine ruby at...\n",
-       "    _file_dates:                     ['20170927', '20170928', '20170929']\n",
-       "    _file_times:                     ['000030', '000030', '000030']\n",
-       "    _datastream:                     sgpaoppsap1flynn1mC1.c1\n",
-       "    _arm_standards_flag:             1
" - ], - "text/plain": [ - "\n", - "Dimensions: (time: 4320, bound: 2)\n", - "Coordinates:\n", - " * time (time) datetime64[ns] 2017-09-27T00:00:30...\n", - "Dimensions without coordinates: bound\n", - "Data variables: (12/132)\n", - " base_time (time) datetime64[ns] 2017-09-27 ... 2017...\n", - " time_offset (time) datetime64[ns] 2017-09-27T00:00:30...\n", - " time_bounds (time, bound) object dask.array\n", - " impactor_state (time) int32 10 10 10 ... -9999 -9999 -9999\n", - " Bs_B (time) float32 dask.array\n", - " qc_Bs_B (time) int32 dask.array\n", - " ... ...\n", - " K1_B (time) float32 dask.array\n", - " K1_G (time) float32 dask.array\n", - " K1_R (time) float32 dask.array\n", - " lat (time) float32 36.6 36.6 36.6 ... 36.6 36.6\n", - " lon (time) float32 -97.49 -97.49 ... -97.49\n", - " alt (time) float32 318.0 318.0 ... 318.0 318.0\n", - "Attributes: (12/20)\n", - " command_line: aosaop -n aosaoppsap -s sgp -f C1 -D -b ...\n", - " Conventions: ARM-1.2\n", - " process_version: vap-aosaop-1.2-0.el6\n", - " dod_version: aoppsap1flynn1m-c1-1.2\n", - " input_datastreams: sgpaosnephdry1mC1.b1 : 1.0 : 20170927.00...\n", - " site_id: sgp\n", - " ... ...\n", - " doi: 10.5439/1369240\n", - " history: created by user dsmgr on machine ruby at...\n", - " _file_dates: ['20170927', '20170928', '20170929']\n", - " _file_times: ['000030', '000030', '000030']\n", - " _datastream: sgpaoppsap1flynn1mC1.c1\n", - " _arm_standards_flag: 1" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load files as a single dataset\n", - "files_list = files_filter \n", - "ds = act.io.armfiles.read_netcdf(files_list)\n", - "ds.clean.cleanup()\n", - "print(f'{len(files_list)} files loaded')\n", - "ds\n" - ] - }, - { - "cell_type": "markdown", - "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", - "metadata": {}, - "source": [ - "## Plot time series data\n", - "#### Define the list of variables to be plotted:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "d09b789e-84f1-4605-846b-a72c110c8048", - "metadata": {}, - "outputs": [], - "source": [ - "variables_to_plot = ['Bs_B', 'Bs_G', 'Bs_R']" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "49f2c7463c164a46aa13bd6286315713", - "version_major": 2, - "version_minor": 0 - }, - "image/png": "", - "text/html": [ - "\n", - "
\n", - "
\n", - " Figure\n", - "
\n", - " \n", - "
\n", - " " - ], - "text/plain": [ - "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", - "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", - "\n", - "for i,v in enumerate(variables_to_plot):\n", - " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", - " ts_ax.grid()\n", - "\n", - "plt.show()\n" - ] - }, - { - "cell_type": "markdown", - "id": "194399aa-1907-452b-8ba9-bc31d7f60291", - "metadata": {}, - "source": [ - "## Quality check plots\n", - "#### Define variable for QC plot" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", - "metadata": {}, - "outputs": [], - "source": [ - "qc_variable = 'Bs_B'" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/kefeimo/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/utils/datetime_utils.py:136: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning.\n", - " mode = stats.mode(np.diff(time))\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "1b8f0eadc61146b1b9944c7cadc77104", - "version_major": 2, - "version_minor": 0 - }, - "image/png": "", - "text/html": [ - "\n", - "
\n", - "
\n", - " Figure\n", - "
\n", - " \n", - "
\n", - " " - ], - "text/plain": [ - "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# QC Plot\n", - "if ('qc_'+qc_variable) in ds.variables:\n", - "\n", - " # Plot\n", - " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", - " qc_display.add_subplots((2,), figsize = (9.5,10))\n", - " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", - " qc_ax.grid()\n", - " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", - "\n", - " plt.show()\n", - "else:\n", - " print(f'QC not available for the selected field: {qc_variable}')\n" - ] - }, - { - "cell_type": "markdown", - "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", - "metadata": {}, - "source": [ - "## Field selection dropdown menu\n", - "Select variable to be plotted from a dropdown menu" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "3d3082e5808440dc89d3713bcdf41d09", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "AppLayout(children=(Dropdown(description='Field:', index=1, layout=Layout(grid_area='header', margin='0px 30% …" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "plt.ioff()\n", - "\n", - "# populate dropdown menu with available variables \n", - "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", - "d_variable = 'Bs_B'\n", - "dropdown = widgets.Dropdown(\n", - " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", - " value= d_variable,\n", - " description='Field:',\n", - " disabled=False,\n", - ")\n", - "dropdown.layout.margin = '0px 30% 0px 20%'\n", - "dropdown.layout.width = '50%'\n", - "\n", - "# set up display\n", - "i_display = act.plotting.TimeSeriesDisplay(ds)\n", - "i_display.add_subplots((1,), figsize = (9.5,5))\n", - "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", - "i_ax.grid()\n", - "i_fig = i_display.fig\n", - "\n", - "# update plot callback function\n", - "def update_plot(change):\n", - " i_ax.cla()\n", - " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", - " i_ax_new.grid()\n", - " i_fig.canvas.draw()\n", - " i_fig.canvas.flush_events()\n", - "\n", - "dropdown.observe(update_plot, names='value')\n", - "\n", - "widgets.AppLayout(\n", - " header=dropdown,\n", - " center=i_fig.canvas,\n", - " pane_heights=[1, 6,1]\n", - ")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3c0aba93", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "state": {}, - "version_major": 2, - "version_minor": 0 - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/VAPs/quicklook/ARSCL/.ipynb_checkpoints/arscl1cloth.c1-checkpoint.ipynb b/VAPs/quicklook/ARSCL/.ipynb_checkpoints/arscl1cloth.c1-checkpoint.ipynb deleted file mode 100644 index 4502aea5..00000000 --- a/VAPs/quicklook/ARSCL/.ipynb_checkpoints/arscl1cloth.c1-checkpoint.ipynb +++ /dev/null @@ -1,2631 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "70840257-70e4-45e2-b491-14bff5a257a3", - "metadata": {}, - "source": [ - "# ARSCL1CLOTH.C1 Plots\n", - "\n", - "[Click here](https://www.arm.gov/capabilities/vaps/arscl) for more information about this vap." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "460fd89f-e034-452c-b837-f65c5958264f", - "metadata": {}, - "outputs": [], - "source": [ - "%matplotlib widget\n", - "import ipywidgets as widgets\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import ipywidgets as widgets\n", - "import numpy as np\n", - "import pandas as pd\n", - "import os\n", - "from datetime import datetime\n", - "\n", - "import act\n", - "import xarray as xr\n", - "\n", - "# Data archive directory\n", - "DATA_DIR = r'/data/archive/'\n", - "\n", - "# Datastream info\n", - "DATASTREAM_NAME = 'arscl1cloth'\n", - "DATA_LEVEL = 'c1'\n", - "LOCATIONS = [{'end_date': '2011-03-23', 'facility': 'C1', 'site': 'nsa', 'start_date': '1998-03-25'}, {'end_date': '2011-01-04', 'facility': 'C1', 'site': 'sgp', 'start_date': '1996-11-08'}, {'end_date': '2011-03-07', 'facility': 'C1', 'site': 'twp', 'start_date': '1999-07-01'}, {'end_date': '2009-02-14', 'facility': 'C2', 'site': 'twp', 'start_date': '1998-11-01'}, {'end_date': '2011-02-28', 'facility': 'C3', 'site': 'twp', 'start_date': '2003-01-01'}]" - ] - }, - { - "cell_type": "markdown", - "id": "9faaf875", - "metadata": {}, - "source": [ - "## Define site, facility, and date range" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "ac6764f5", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The following locations and date ranges are available for this VAP:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sitefacilitystart_dateend_date
0nsaC11998-03-252011-03-23
1sgpC11996-11-082011-01-04
2twpC11999-07-012011-03-07
3twpC21998-11-012009-02-14
4twpC32003-01-012011-02-28
\n", - "
" - ], - "text/plain": [ - " site facility start_date end_date\n", - "0 nsa C1 1998-03-25 2011-03-23\n", - "1 sgp C1 1996-11-08 2011-01-04\n", - "2 twp C1 1999-07-01 2011-03-07\n", - "3 twp C2 1998-11-01 2009-02-14\n", - "4 twp C3 2003-01-01 2011-02-28" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "print(\"The following locations and date ranges are available for this VAP:\")\n", - "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" - ] - }, - { - "cell_type": "markdown", - "id": "8d132223", - "metadata": {}, - "source": [ - "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "e563983a", - "metadata": {}, - "outputs": [], - "source": [ - "site_facility = ( 'sgp', 'C1' )\n", - "\n", - "date_start = '2011-01-03'\n", - "date_end = '2011-01-04'" - ] - }, - { - "cell_type": "markdown", - "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", - "metadata": {}, - "source": [ - "## Load data files\n", - "Load data files from /data/archive/" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'/data/archive/sgp/sgparscl1clothC1.c1'" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Compile list of files\n", - "site, facility = site_facility\n", - "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", - "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", - "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", - "dir_path\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "6be8f3dc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['20110103', '20110104']" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from datetime import date, timedelta\n", - "import pandas as pd\n", - "\n", - "def get_ARM_formated_dates(start_date, end_date):\n", - " \"\"\"\n", - " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", - " EXAMPLE:\n", - " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", - " >> [\"20180219\", \"20180220\", \"20180221\"] \n", - " \"\"\"\n", - " \n", - " _start_date = pd.to_datetime(start_date)\n", - " _end_date = pd.to_datetime(end_date)\n", - " \n", - " delta = _end_date - _start_date # returns timedelta \n", - " dates = []\n", - "\n", - " for i in range(delta.days + 1):\n", - " day = _start_date + timedelta(days=i)\n", - " day_formated = day.strftime(format=\"%Y%m%d\")\n", - " dates.append(day_formated)\n", - " return dates\n", - "\n", - "\n", - "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "51feea2e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['/data/archive/sgp/sgparscl1clothC1.c1/sgparscl1clothC1.c1.20110103.000000.cdf',\n", - " '/data/archive/sgp/sgparscl1clothC1.c1/sgparscl1clothC1.c1.20110104.000000.cdf']" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Filter a list of files based on date pattern\n", - "import glob\n", - "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", - "files_filter = []\n", - "for date in dates:\n", - " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", - " files_filter\n", - "files_filter" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "b0e5d0d7", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "77 files loaded\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset>\n",
-       "Dimensions:                         (time: 8640, nheights: 512, numlayers: 10)\n",
-       "Coordinates:\n",
-       "  * time                            (time) timedelta64[ns] 00:00:00 ... 23:59:50\n",
-       "Dimensions without coordinates: nheights, numlayers\n",
-       "Data variables: (12/23)\n",
-       "    base_time                       object ...\n",
-       "    time_offset                     (time) timedelta64[ns] dask.array<chunksize=(8640,), meta=np.ndarray>\n",
-       "    Heights                         (nheights) float32 dask.array<chunksize=(512,), meta=np.ndarray>\n",
-       "    Reflectivity                    (time, nheights) int16 dask.array<chunksize=(8640, 512), meta=np.ndarray>\n",
-       "    ReflectivityNoClutter           (time, nheights) int16 dask.array<chunksize=(8640, 512), meta=np.ndarray>\n",
-       "    ReflectivityBestEstimate        (time, nheights) int16 dask.array<chunksize=(8640, 512), meta=np.ndarray>\n",
-       "    ...                              ...\n",
-       "    CloudLayerBottomHeightMplZwang  (time, numlayers) float32 dask.array<chunksize=(8640, 10), meta=np.ndarray>\n",
-       "    CloudLayerTopHeightMplZwang     (time, numlayers) float32 dask.array<chunksize=(8640, 10), meta=np.ndarray>\n",
-       "    qc_RadarArtifacts               (time, nheights) |S1 dask.array<chunksize=(8640, 512), meta=np.ndarray>\n",
-       "    qc_ReflectivityClutterFlag      (time, nheights) |S1 dask.array<chunksize=(8640, 512), meta=np.ndarray>\n",
-       "    qc_CloudLayerTopHeightMplZwang  (time, numlayers) float32 dask.array<chunksize=(8640, 10), meta=np.ndarray>\n",
-       "    qc_BeamAttenuationMplZwang      (time) float32 dask.array<chunksize=(8640,), meta=np.ndarray>\n",
-       "Attributes: (12/18)\n",
-       "    Date:                      Wed Jul 13 16:28:55 GMT 2011\n",
-       "    Version:                   $State: Exp $\n",
-       "    Number_Input_Platforms:    3\n",
-       "    Input_Platforms:           sgp30smplcmask1zwangC1.c1,sgpvceil25kC1.b1,sgp...\n",
-       "    Input_Platforms_Versions:  ?????,10.2,1.16\n",
-       "    Command_Line:              arsc1/arscl2 -s YYYYMMDD -e YYYYMMDD SITE FACI...\n",
-       "    ...                        ...\n",
-       "    commentf:                  Note that -32768 is also used for the geophysi...\n",
-       "    _file_dates:               ['20110103']\n",
-       "    _file_times:               ['000000']\n",
-       "    datastream:                sgparscl1clothC1.c1\n",
-       "    _datastream:               sgparscl1clothC1.c1\n",
-       "    _arm_standards_flag:       1
" - ], - "text/plain": [ - "\n", - "Dimensions: (time: 8640, nheights: 512, numlayers: 10)\n", - "Coordinates:\n", - " * time (time) timedelta64[ns] 00:00:00 ... 23:59:50\n", - "Dimensions without coordinates: nheights, numlayers\n", - "Data variables: (12/23)\n", - " base_time object ...\n", - " time_offset (time) timedelta64[ns] dask.array\n", - " Heights (nheights) float32 dask.array\n", - " Reflectivity (time, nheights) int16 dask.array\n", - " ReflectivityNoClutter (time, nheights) int16 dask.array\n", - " ReflectivityBestEstimate (time, nheights) int16 dask.array\n", - " ... ...\n", - " CloudLayerBottomHeightMplZwang (time, numlayers) float32 dask.array\n", - " CloudLayerTopHeightMplZwang (time, numlayers) float32 dask.array\n", - " qc_RadarArtifacts (time, nheights) |S1 dask.array\n", - " qc_ReflectivityClutterFlag (time, nheights) |S1 dask.array\n", - " qc_CloudLayerTopHeightMplZwang (time, numlayers) float32 dask.array\n", - " qc_BeamAttenuationMplZwang (time) float32 dask.array\n", - "Attributes: (12/18)\n", - " Date: Wed Jul 13 16:28:55 GMT 2011\n", - " Version: $State: Exp $\n", - " Number_Input_Platforms: 3\n", - " Input_Platforms: sgp30smplcmask1zwangC1.c1,sgpvceil25kC1.b1,sgp...\n", - " Input_Platforms_Versions: ?????,10.2,1.16\n", - " Command_Line: arsc1/arscl2 -s YYYYMMDD -e YYYYMMDD SITE FACI...\n", - " ... ...\n", - " commentf: Note that -32768 is also used for the geophysi...\n", - " _file_dates: ['20110103']\n", - " _file_times: ['000000']\n", - " datastream: sgparscl1clothC1.c1\n", - " _datastream: sgparscl1clothC1.c1\n", - " _arm_standards_flag: 1" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load files as a single dataset\n", - "files_list = files_filter[0]\n", - "ds = act.io.armfiles.read_netcdf(files_list)\n", - "ds.clean.cleanup()\n", - "print(f'{len(files_list)} files loaded')\n", - "ds\n" - ] - }, - { - "cell_type": "markdown", - "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", - "metadata": {}, - "source": [ - "## Plot time series data\n", - "#### Define the list of variables to be plotted:" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "d09b789e-84f1-4605-846b-a72c110c8048", - "metadata": {}, - "outputs": [], - "source": [ - "variables_to_plot = ['Reflectivity', 'ReflectivityNoClutter', 'ReflectivityBestEstimate']" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "de5b8b3d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.DataArray 'ReflectivityNoClutter' (time: 8640, nheights: 512)>\n",
-       "dask.array<open_dataset-86c2f04dd636517ce84998ed679d9bf5ReflectivityNoClutter, shape=(8640, 512), dtype=int16, chunksize=(8640, 512), chunktype=numpy.ndarray>\n",
-       "Coordinates:\n",
-       "  * time     (time) timedelta64[ns] 00:00:00 00:00:10 ... 23:59:40 23:59:50\n",
-       "Dimensions without coordinates: nheights\n",
-       "Attributes:\n",
-       "    long_name:  MMCR Reflectivity with Clutter Removed\n",
-       "    units:      dBZ (X100)\n",
-       "    comment:    Divide ReflectivityNoClutter by 100 to get dBZ
" - ], - "text/plain": [ - "\n", - "dask.array\n", - "Coordinates:\n", - " * time (time) timedelta64[ns] 00:00:00 00:00:10 ... 23:59:40 23:59:50\n", - "Dimensions without coordinates: nheights\n", - "Attributes:\n", - " long_name: MMCR Reflectivity with Clutter Removed\n", - " units: dBZ (X100)\n", - " comment: Divide ReflectivityNoClutter by 100 to get dBZ" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds.ReflectivityNoClutter.data" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", - "metadata": {}, - "outputs": [ - { - "ename": "UFuncTypeError", - "evalue": "Cannot cast ufunc 'greater_equal' input 0 from dtype(' 5\u001b[0m ts_ax \u001b[38;5;241m=\u001b[39m \u001b[43mts_display\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mplot\u001b[49m\u001b[43m(\u001b[49m\u001b[43mv\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msubplot_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mi\u001b[49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mset_title\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mds\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvariables\u001b[49m\u001b[43m[\u001b[49m\u001b[43mv\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mattrs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mlong_name\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6\u001b[0m ts_ax\u001b[38;5;241m.\u001b[39mgrid()\n\u001b[1;32m 8\u001b[0m plt\u001b[38;5;241m.\u001b[39mshow()\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/plotting/timeseriesdisplay.py:588\u001b[0m, in \u001b[0;36mTimeSeriesDisplay.plot\u001b[0;34m(self, field, dsname, subplot_index, cmap, set_title, add_nan, day_night_background, invert_y_axis, abs_limits, time_rng, y_rng, use_var_for_y, set_shading, assessment_overplot, overplot_marker, overplot_behind, overplot_markersize, assessment_overplot_category, assessment_overplot_category_color, force_line_plot, labels, cbar_label, cbar_h_adjust, secondary_y, y_axis_flag_meanings, colorbar_labels, cb_friendly, **kwargs)\u001b[0m\n\u001b[1;32m 586\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 587\u001b[0m kwargs[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mface\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m--> 588\u001b[0m mesh \u001b[38;5;241m=\u001b[39m \u001b[43max\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpcolormesh\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 589\u001b[0m \u001b[43m \u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43masarray\u001b[49m\u001b[43m(\u001b[49m\u001b[43mxdata\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 590\u001b[0m \u001b[43m \u001b[49m\u001b[43mydata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 591\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtranspose\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 592\u001b[0m \u001b[43m \u001b[49m\u001b[43mshading\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mset_shading\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 593\u001b[0m \u001b[43m \u001b[49m\u001b[43mcmap\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcmap\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 594\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 595\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 597\u001b[0m \u001b[38;5;66;03m# Set Title\u001b[39;00m\n\u001b[1;32m 598\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m set_title \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/__init__.py:1442\u001b[0m, in \u001b[0;36m_preprocess_data..inner\u001b[0;34m(ax, data, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1439\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 1440\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minner\u001b[39m(ax, \u001b[38;5;241m*\u001b[39margs, data\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 1441\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m data \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1442\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43max\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mmap\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43msanitize_sequence\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1444\u001b[0m bound \u001b[38;5;241m=\u001b[39m new_sig\u001b[38;5;241m.\u001b[39mbind(ax, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 1445\u001b[0m auto_label \u001b[38;5;241m=\u001b[39m (bound\u001b[38;5;241m.\u001b[39marguments\u001b[38;5;241m.\u001b[39mget(label_namer)\n\u001b[1;32m 1446\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m bound\u001b[38;5;241m.\u001b[39mkwargs\u001b[38;5;241m.\u001b[39mget(label_namer))\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/axes/_axes.py:6220\u001b[0m, in \u001b[0;36mAxes.pcolormesh\u001b[0;34m(self, alpha, norm, cmap, vmin, vmax, shading, antialiased, *args, **kwargs)\u001b[0m\n\u001b[1;32m 6217\u001b[0m shading \u001b[38;5;241m=\u001b[39m shading\u001b[38;5;241m.\u001b[39mlower()\n\u001b[1;32m 6218\u001b[0m kwargs\u001b[38;5;241m.\u001b[39msetdefault(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnone\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m-> 6220\u001b[0m X, Y, C, shading \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_pcolorargs\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mpcolormesh\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6221\u001b[0m \u001b[43m \u001b[49m\u001b[43mshading\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mshading\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6222\u001b[0m coords \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mstack([X, Y], axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m 6223\u001b[0m \u001b[38;5;66;03m# convert to one dimensional array, except for 3D RGB(A) arrays\u001b[39;00m\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/axes/_axes.py:5786\u001b[0m, in \u001b[0;36mAxes._pcolorargs\u001b[0;34m(self, funcname, shading, *args, **kwargs)\u001b[0m\n\u001b[1;32m 5783\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m X\n\u001b[1;32m 5785\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ncols \u001b[38;5;241m==\u001b[39m Nx:\n\u001b[0;32m-> 5786\u001b[0m X \u001b[38;5;241m=\u001b[39m \u001b[43m_interp_grid\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5787\u001b[0m Y \u001b[38;5;241m=\u001b[39m _interp_grid(Y)\n\u001b[1;32m 5788\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m nrows \u001b[38;5;241m==\u001b[39m Ny:\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/axes/_axes.py:5768\u001b[0m, in \u001b[0;36mAxes._pcolorargs.._interp_grid\u001b[0;34m(X)\u001b[0m\n\u001b[1;32m 5766\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m np\u001b[38;5;241m.\u001b[39mshape(X)[\u001b[38;5;241m1\u001b[39m] \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m 5767\u001b[0m dX \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mdiff(X, axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\u001b[38;5;241m/\u001b[39m\u001b[38;5;241m2.\u001b[39m\n\u001b[0;32m-> 5768\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (np\u001b[38;5;241m.\u001b[39mall(\u001b[43mdX\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m>\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0\u001b[39;49m) \u001b[38;5;129;01mor\u001b[39;00m np\u001b[38;5;241m.\u001b[39mall(dX \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m)):\n\u001b[1;32m 5769\u001b[0m _api\u001b[38;5;241m.\u001b[39mwarn_external(\n\u001b[1;32m 5770\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe input coordinates to \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfuncname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m are \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 5771\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minterpreted as cell centers, but are not \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 5774\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124medges, in which case, please supply \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 5775\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mexplicit cell edges to \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfuncname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 5776\u001b[0m X \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mhstack((X[:, [\u001b[38;5;241m0\u001b[39m]] \u001b[38;5;241m-\u001b[39m dX[:, [\u001b[38;5;241m0\u001b[39m]],\n\u001b[1;32m 5777\u001b[0m X[:, :\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m] \u001b[38;5;241m+\u001b[39m dX,\n\u001b[1;32m 5778\u001b[0m X[:, [\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]] \u001b[38;5;241m+\u001b[39m dX[:, [\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]]))\n", - "\u001b[0;31mUFuncTypeError\u001b[0m: Cannot cast ufunc 'greater_equal' input 0 from dtype('\n", - "
\n", - " Figure\n", - "
\n", - " \n", - " \n", - " " - ], - "text/plain": [ - "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", - "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", - "\n", - "for i,v in enumerate(variables_to_plot):\n", - " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", - " ts_ax.grid()\n", - "\n", - "plt.show()\n" - ] - }, - { - "cell_type": "markdown", - "id": "194399aa-1907-452b-8ba9-bc31d7f60291", - "metadata": {}, - "source": [ - "## Quality check plots\n", - "#### Define variable for QC plot" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", - "metadata": {}, - "outputs": [], - "source": [ - "qc_variable = 'RadarArtifacts'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", - "metadata": {}, - "outputs": [], - "source": [ - "# QC Plot\n", - "if ('qc_'+qc_variable) in ds.variables:\n", - "\n", - " # Plot\n", - " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", - " qc_display.add_subplots((2,), figsize = (9.5,10))\n", - " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", - " qc_ax.grid()\n", - " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", - "\n", - " plt.show()\n", - "else:\n", - " print(f'QC not available for the selected field: {qc_variable}')\n" - ] - }, - { - "cell_type": "markdown", - "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", - "metadata": {}, - "source": [ - "## Field selection dropdown menu\n", - "Select variable to be plotted from a dropdown menu" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", - "metadata": {}, - "outputs": [], - "source": [ - "plt.ioff()\n", - "\n", - "# populate dropdown menu with available variables \n", - "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", - "d_variable = 'Reflectivity'\n", - "dropdown = widgets.Dropdown(\n", - " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", - " value= d_variable,\n", - " description='Field:',\n", - " disabled=False,\n", - ")\n", - "dropdown.layout.margin = '0px 30% 0px 20%'\n", - "dropdown.layout.width = '50%'\n", - "\n", - "# set up display\n", - "i_display = act.plotting.TimeSeriesDisplay(ds)\n", - "i_display.add_subplots((1,), figsize = (9.5,5))\n", - "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", - "i_ax.grid()\n", - "i_fig = i_display.fig\n", - "\n", - "# update plot callback function\n", - "def update_plot(change):\n", - " i_ax.cla()\n", - " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", - " i_ax_new.grid()\n", - " i_fig.canvas.draw()\n", - " i_fig.canvas.flush_events()\n", - "\n", - "dropdown.observe(update_plot, names='value')\n", - "\n", - "widgets.AppLayout(\n", - " header=dropdown,\n", - " center=i_fig.canvas,\n", - " pane_heights=[1, 6,1]\n", - ")\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "state": {}, - "version_major": 2, - "version_minor": 0 - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/VAPs/quicklook/ARSCL/.ipynb_checkpoints/arsclbnd1cloth.c1-checkpoint.ipynb b/VAPs/quicklook/ARSCL/.ipynb_checkpoints/arsclbnd1cloth.c1-checkpoint.ipynb deleted file mode 100644 index b365a780..00000000 --- a/VAPs/quicklook/ARSCL/.ipynb_checkpoints/arsclbnd1cloth.c1-checkpoint.ipynb +++ /dev/null @@ -1,1937 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "70840257-70e4-45e2-b491-14bff5a257a3", - "metadata": {}, - "source": [ - "# ARSCLBND1CLOTH.C1 Plots\n", - "\n", - "[Click here](https://www.arm.gov/capabilities/vaps/arscl) for more information about this vap." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "460fd89f-e034-452c-b837-f65c5958264f", - "metadata": {}, - "outputs": [], - "source": [ - "%matplotlib widget\n", - "import ipywidgets as widgets\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import ipywidgets as widgets\n", - "import numpy as np\n", - "import pandas as pd\n", - "import os\n", - "from datetime import datetime\n", - "\n", - "import act\n", - "import xarray as xr\n", - "\n", - "# Data archive directory\n", - "DATA_DIR = r'/data/archive/'\n", - "\n", - "# Datastream info\n", - "DATASTREAM_NAME = 'arsclbnd1cloth'\n", - "DATA_LEVEL = 'c1'\n", - "LOCATIONS = [{'end_date': '2011-03-23', 'facility': 'C1', 'site': 'nsa', 'start_date': '1998-03-25'}, {'end_date': '2011-01-04', 'facility': 'C1', 'site': 'sgp', 'start_date': '1996-11-08'}, {'end_date': '2011-03-07', 'facility': 'C1', 'site': 'twp', 'start_date': '1999-07-01'}, {'end_date': '2009-02-14', 'facility': 'C2', 'site': 'twp', 'start_date': '1998-11-01'}, {'end_date': '2011-02-28', 'facility': 'C3', 'site': 'twp', 'start_date': '2003-01-01'}]" - ] - }, - { - "cell_type": "markdown", - "id": "9faaf875", - "metadata": {}, - "source": [ - "## Define site, facility, and date range" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "ac6764f5", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The following locations and date ranges are available for this VAP:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sitefacilitystart_dateend_date
0nsaC11998-03-252011-03-23
1sgpC11996-11-082011-01-04
2twpC11999-07-012011-03-07
3twpC21998-11-012009-02-14
4twpC32003-01-012011-02-28
\n", - "
" - ], - "text/plain": [ - " site facility start_date end_date\n", - "0 nsa C1 1998-03-25 2011-03-23\n", - "1 sgp C1 1996-11-08 2011-01-04\n", - "2 twp C1 1999-07-01 2011-03-07\n", - "3 twp C2 1998-11-01 2009-02-14\n", - "4 twp C3 2003-01-01 2011-02-28" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "print(\"The following locations and date ranges are available for this VAP:\")\n", - "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" - ] - }, - { - "cell_type": "markdown", - "id": "8d132223", - "metadata": {}, - "source": [ - "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "e563983a", - "metadata": {}, - "outputs": [], - "source": [ - "site_facility = ( 'sgp', 'C1' )\n", - "\n", - "date_start = '2011-01-03'\n", - "date_end = '2011-01-04'" - ] - }, - { - "cell_type": "markdown", - "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", - "metadata": {}, - "source": [ - "## Load data files\n", - "Load data files from /data/archive/" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'/data/archive/sgp/sgparsclbnd1clothC1.c1'" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Compile list of files\n", - "site, facility = site_facility\n", - "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", - "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", - "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", - "dir_path\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "6be8f3dc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['20110103', '20110104']" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from datetime import date, timedelta\n", - "import pandas as pd\n", - "\n", - "def get_ARM_formated_dates(start_date, end_date):\n", - " \"\"\"\n", - " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", - " EXAMPLE:\n", - " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", - " >> [\"20180219\", \"20180220\", \"20180221\"] \n", - " \"\"\"\n", - " \n", - " _start_date = pd.to_datetime(start_date)\n", - " _end_date = pd.to_datetime(end_date)\n", - " \n", - " delta = _end_date - _start_date # returns timedelta \n", - " dates = []\n", - "\n", - " for i in range(delta.days + 1):\n", - " day = _start_date + timedelta(days=i)\n", - " day_formated = day.strftime(format=\"%Y%m%d\")\n", - " dates.append(day_formated)\n", - " return dates\n", - "\n", - "\n", - "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "51feea2e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['/data/archive/sgp/sgparsclbnd1clothC1.c1/sgparsclbnd1clothC1.c1.20110103.000000.cdf',\n", - " '/data/archive/sgp/sgparsclbnd1clothC1.c1/sgparsclbnd1clothC1.c1.20110104.000000.cdf']" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Filter a list of files based on date pattern\n", - "import glob\n", - "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", - "files_filter = []\n", - "for date in dates:\n", - " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", - " files_filter\n", - "files_filter" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "604409ea", - "metadata": {}, - "outputs": [], - "source": [ - "# this datastream is a bit different. It has trouble merge the individual datasets." - ] - }, - { - "cell_type": "code", - "execution_count": 122, - "id": "ccbe501b", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset>\n",
-       "Dimensions:                         (time: 17280, numlayers: 10)\n",
-       "Coordinates:\n",
-       "  * time                            (time) datetime64[ns] 2011-01-01 ... 2011...\n",
-       "Dimensions without coordinates: numlayers\n",
-       "Data variables:\n",
-       "    base_time                       datetime64[ns] 2011-01-01\n",
-       "    time_offset                     (time) timedelta64[ns] 00:00:00 ... NaT\n",
-       "    CloudBaseBestEstimate           (time) float32 -1.0 -1.0 -1.0 ... nan nan\n",
-       "    CloudLayerBottomHeightMplZwang  (time, numlayers) float32 0.0 0.0 ... nan\n",
-       "    CloudLayerTopHeightMplZwang     (time, numlayers) float32 0.0 0.0 ... nan\n",
-       "    qc_CloudLayerTopHeightMplZwang  (time, numlayers) float32 0.0 0.0 ... nan\n",
-       "Attributes:\n",
-       "    Date:                      Wed Jul 13 16:27:17 GMT 2011\n",
-       "    Version:                   $State: Exp $\n",
-       "    Number_Input_Platforms:    3\n",
-       "    Input_Platforms:           sgp30smplcmask1zwangC1.c1,sgpvceil25kC1.b1,sgp...\n",
-       "    Input_Platforms_Versions:  ?????,10.2,1.16\n",
-       "    zeb_platform:              sgparsclbnd1clothC1.c1\n",
-       "    Command_Line:              arsc1/arscl2 -s YYYYMMDD -e YYYYMMDD SITE FACI...\n",
-       "    contact:                    \n",
-       "    comment:                   If all layer top heights are 0, then the first...
" - ], - "text/plain": [ - "\n", - "Dimensions: (time: 17280, numlayers: 10)\n", - "Coordinates:\n", - " * time (time) datetime64[ns] 2011-01-01 ... 2011...\n", - "Dimensions without coordinates: numlayers\n", - "Data variables:\n", - " base_time datetime64[ns] 2011-01-01\n", - " time_offset (time) timedelta64[ns] 00:00:00 ... NaT\n", - " CloudBaseBestEstimate (time) float32 -1.0 -1.0 -1.0 ... nan nan\n", - " CloudLayerBottomHeightMplZwang (time, numlayers) float32 0.0 0.0 ... nan\n", - " CloudLayerTopHeightMplZwang (time, numlayers) float32 0.0 0.0 ... nan\n", - " qc_CloudLayerTopHeightMplZwang (time, numlayers) float32 0.0 0.0 ... nan\n", - "Attributes:\n", - " Date: Wed Jul 13 16:27:17 GMT 2011\n", - " Version: $State: Exp $\n", - " Number_Input_Platforms: 3\n", - " Input_Platforms: sgp30smplcmask1zwangC1.c1,sgpvceil25kC1.b1,sgp...\n", - " Input_Platforms_Versions: ?????,10.2,1.16\n", - " zeb_platform: sgparsclbnd1clothC1.c1\n", - " Command_Line: arsc1/arscl2 -s YYYYMMDD -e YYYYMMDD SITE FACI...\n", - " contact: \n", - " comment: If all layer top heights are 0, then the first..." - ] - }, - "execution_count": 122, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# this datastream require some special treatment to merge the files\n", - "ds_single_1 = xr.load_dataset(\"/data/archive/sgp/sgparsclbnd1clothC1.c1/sgparsclbnd1clothC1.c1.20110101.000000.cdf\")\n", - "ds_single_1\n", - "\n", - "ds_single_2 = xr.load_dataset(\"/data/archive/sgp/sgparsclbnd1clothC1.c1/sgparsclbnd1clothC1.c1.20110102.000000.cdf\")\n", - "ds_single_2\n", - "\n", - "# ds_single_1.time.data = ds_single_1.base_time.data + ds_single_1.time.data\n", - "# ds_single_2.time.data = ds_single_2.base_time.data + ds_single_2.time.data\n", - "\n", - "ds_single_1['time'] = ds_single_1.base_time.data + ds_single_1.time.data * 10000000000\n", - "ds_single_2['time'] = ds_single_2.base_time.data + ds_single_2.time.data * 10000000000\n", - "\n", - "ds_single_1['base_time'] = pd.to_datetime(ds_single_1.base_time.data)\n", - "ds_single_2['base_time'] = pd.to_datetime(ds_single_2.base_time.data)\n", - "\n", - "ds = xr.merge([ds_single_1, ds_single_2], compat='override') \n", - "ds" - ] - }, - { - "cell_type": "code", - "execution_count": 123, - "id": "dde711c1", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.DataArray 'CloudBaseBestEstimate' (time: 8640)>\n",
-       "array([-1., -1., -1., ..., -1., -1., -1.], dtype=float32)\n",
-       "Coordinates:\n",
-       "  * time     (time) datetime64[ns] 2011-01-02 ... 2011-01-02T23:59:50\n",
-       "Attributes:\n",
-       "    long_name:  LASER Cloud Base Height Best Estimate\n",
-       "    units:      m AGL\n",
-       "    comment:    -3. Data do not exist, -2. Data exist but no retrieval, -1. C...
" - ], - "text/plain": [ - "\n", - "array([-1., -1., -1., ..., -1., -1., -1.], dtype=float32)\n", - "Coordinates:\n", - " * time (time) datetime64[ns] 2011-01-02 ... 2011-01-02T23:59:50\n", - "Attributes:\n", - " long_name: LASER Cloud Base Height Best Estimate\n", - " units: m AGL\n", - " comment: -3. Data do not exist, -2. Data exist but no retrieval, -1. C..." - ] - }, - "execution_count": 123, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds_single_2.CloudBaseBestEstimate" - ] - }, - { - "cell_type": "code", - "execution_count": 72, - "id": "7c1839ad", - "metadata": {}, - "outputs": [ - { - "ename": "MergeError", - "evalue": "conflicting values for variable 'base_time' on objects to be combined. You can skip this check by specifying compat='override'.", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/io/armfiles.py:143\u001b[0m, in \u001b[0;36mread_netcdf\u001b[0;34m(filenames, concat_dim, return_None, combine, decode_times, use_cftime, use_base_time, combine_attrs, cleanup_qc, keep_variables, **kwargs)\u001b[0m\n\u001b[1;32m 141\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 142\u001b[0m \u001b[38;5;66;03m# Read data file with Xarray function\u001b[39;00m\n\u001b[0;32m--> 143\u001b[0m ds \u001b[38;5;241m=\u001b[39m \u001b[43mxr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen_mfdataset\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilenames\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 145\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m except_tuple \u001b[38;5;28;01mas\u001b[39;00m exception:\n\u001b[1;32m 146\u001b[0m \u001b[38;5;66;03m# If requested return None for File not found error\u001b[39;00m\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/backends/api.py:1026\u001b[0m, in \u001b[0;36mopen_mfdataset\u001b[0;34m(paths, chunks, concat_dim, compat, preprocess, engine, data_vars, coords, combine, parallel, join, attrs_file, combine_attrs, **kwargs)\u001b[0m\n\u001b[1;32m 1023\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m combine \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mby_coords\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 1024\u001b[0m \u001b[38;5;66;03m# Redo ordering from coordinates, ignoring how they were ordered\u001b[39;00m\n\u001b[1;32m 1025\u001b[0m \u001b[38;5;66;03m# previously\u001b[39;00m\n\u001b[0;32m-> 1026\u001b[0m combined \u001b[38;5;241m=\u001b[39m \u001b[43mcombine_by_coords\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1027\u001b[0m \u001b[43m \u001b[49m\u001b[43mdatasets\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1028\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1029\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_vars\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_vars\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1030\u001b[0m \u001b[43m \u001b[49m\u001b[43mcoords\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcoords\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1031\u001b[0m \u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1032\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombine_attrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcombine_attrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1033\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1034\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/combine.py:982\u001b[0m, in \u001b[0;36mcombine_by_coords\u001b[0;34m(data_objects, compat, data_vars, coords, fill_value, join, combine_attrs, datasets)\u001b[0m\n\u001b[1;32m 981\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m \u001b[38;5;28mvars\u001b[39m, datasets_with_same_vars \u001b[38;5;129;01min\u001b[39;00m grouped_by_vars:\n\u001b[0;32m--> 982\u001b[0m concatenated \u001b[38;5;241m=\u001b[39m \u001b[43m_combine_single_variable_hypercube\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 983\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mlist\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mdatasets_with_same_vars\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 984\u001b[0m \u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 985\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_vars\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_vars\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 986\u001b[0m \u001b[43m \u001b[49m\u001b[43mcoords\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcoords\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 987\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 988\u001b[0m \u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 989\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombine_attrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcombine_attrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 990\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 991\u001b[0m concatenated_grouped_by_data_vars\u001b[38;5;241m.\u001b[39mappend(concatenated)\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/combine.py:629\u001b[0m, in \u001b[0;36m_combine_single_variable_hypercube\u001b[0;34m(datasets, fill_value, data_vars, coords, compat, join, combine_attrs)\u001b[0m\n\u001b[1;32m 624\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 625\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAt least one Dataset is required to resolve variable names \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 626\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfor combined hypercube.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 627\u001b[0m )\n\u001b[0;32m--> 629\u001b[0m combined_ids, concat_dims \u001b[38;5;241m=\u001b[39m \u001b[43m_infer_concat_order_from_coords\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mlist\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mdatasets\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 631\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m fill_value \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 632\u001b[0m \u001b[38;5;66;03m# check that datasets form complete hypercube\u001b[39;00m\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/combine.py:149\u001b[0m, in \u001b[0;36m_infer_concat_order_from_coords\u001b[0;34m(datasets)\u001b[0m\n\u001b[1;32m 148\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(datasets) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m concat_dims:\n\u001b[0;32m--> 149\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 150\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCould not find any dimension coordinates to use to \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 151\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124morder the datasets for concatenation\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 152\u001b[0m )\n\u001b[1;32m 154\u001b[0m combined_ids \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mdict\u001b[39m(\u001b[38;5;28mzip\u001b[39m(tile_ids, datasets))\n", - "\u001b[0;31mValueError\u001b[0m: Could not find any dimension coordinates to use to order the datasets for concatenation", - "\nDuring handling of the above exception, another exception occurred:\n", - "\u001b[0;31mMergeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[72], line 5\u001b[0m\n\u001b[1;32m 1\u001b[0m files_filter \u001b[38;5;241m=\u001b[39m \\\n\u001b[1;32m 2\u001b[0m [\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m/data/archive/sgp/sgparsclbnd1clothC1.c1/sgparsclbnd1clothC1.c1.20110104.000000.cdf\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 3\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m/data/archive/sgp/sgparsclbnd1clothC1.c1/sgparsclbnd1clothC1.c1.20110106.000000.cdf\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[0;32m----> 5\u001b[0m ds_multi \u001b[38;5;241m=\u001b[39m \u001b[43mact\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mio\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marmfiles\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_netcdf\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfiles_list\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6\u001b[0m ds_multi\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/io/armfiles.py:164\u001b[0m, in \u001b[0;36mread_netcdf\u001b[0;34m(filenames, concat_dim, return_None, combine, decode_times, use_cftime, use_base_time, combine_attrs, cleanup_qc, keep_variables, **kwargs)\u001b[0m\n\u001b[1;32m 157\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 158\u001b[0m kwargs[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcombine\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnested\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 159\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(exception)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mValueError\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m exception\u001b[38;5;241m.\u001b[39margs[\u001b[38;5;241m0\u001b[39m] \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mCould not find any dimension coordinates \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mto use to order the datasets for concatenation\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 162\u001b[0m ):\n\u001b[1;32m 163\u001b[0m kwargs[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcombine\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnested\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m--> 164\u001b[0m ds \u001b[38;5;241m=\u001b[39m \u001b[43mxr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen_mfdataset\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilenames\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 166\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 167\u001b[0m \u001b[38;5;66;03m# When all else fails raise the orginal exception\u001b[39;00m\n\u001b[1;32m 168\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exception\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/backends/api.py:1013\u001b[0m, in \u001b[0;36mopen_mfdataset\u001b[0;34m(paths, chunks, concat_dim, compat, preprocess, engine, data_vars, coords, combine, parallel, join, attrs_file, combine_attrs, **kwargs)\u001b[0m\n\u001b[1;32m 1009\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1010\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m combine \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnested\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 1011\u001b[0m \u001b[38;5;66;03m# Combined nested list by successive concat and merge operations\u001b[39;00m\n\u001b[1;32m 1012\u001b[0m \u001b[38;5;66;03m# along each dimension, using structure given by \"ids\"\u001b[39;00m\n\u001b[0;32m-> 1013\u001b[0m combined \u001b[38;5;241m=\u001b[39m \u001b[43m_nested_combine\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1014\u001b[0m \u001b[43m \u001b[49m\u001b[43mdatasets\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1015\u001b[0m \u001b[43m \u001b[49m\u001b[43mconcat_dims\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconcat_dim\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1016\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1017\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_vars\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_vars\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1018\u001b[0m \u001b[43m \u001b[49m\u001b[43mcoords\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcoords\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1019\u001b[0m \u001b[43m \u001b[49m\u001b[43mids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1020\u001b[0m \u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1021\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombine_attrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcombine_attrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1022\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1023\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m combine \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mby_coords\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 1024\u001b[0m \u001b[38;5;66;03m# Redo ordering from coordinates, ignoring how they were ordered\u001b[39;00m\n\u001b[1;32m 1025\u001b[0m \u001b[38;5;66;03m# previously\u001b[39;00m\n\u001b[1;32m 1026\u001b[0m combined \u001b[38;5;241m=\u001b[39m combine_by_coords(\n\u001b[1;32m 1027\u001b[0m datasets,\n\u001b[1;32m 1028\u001b[0m compat\u001b[38;5;241m=\u001b[39mcompat,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1032\u001b[0m combine_attrs\u001b[38;5;241m=\u001b[39mcombine_attrs,\n\u001b[1;32m 1033\u001b[0m )\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/combine.py:365\u001b[0m, in \u001b[0;36m_nested_combine\u001b[0;34m(datasets, concat_dims, compat, data_vars, coords, ids, fill_value, join, combine_attrs)\u001b[0m\n\u001b[1;32m 362\u001b[0m _check_shape_tile_ids(combined_ids)\n\u001b[1;32m 364\u001b[0m \u001b[38;5;66;03m# Apply series of concatenate or merge operations along each dimension\u001b[39;00m\n\u001b[0;32m--> 365\u001b[0m combined \u001b[38;5;241m=\u001b[39m \u001b[43m_combine_nd\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 366\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombined_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 367\u001b[0m \u001b[43m \u001b[49m\u001b[43mconcat_dims\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 368\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 369\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_vars\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_vars\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 370\u001b[0m \u001b[43m \u001b[49m\u001b[43mcoords\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcoords\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 371\u001b[0m \u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 372\u001b[0m \u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 373\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombine_attrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcombine_attrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 374\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 375\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m combined\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/combine.py:239\u001b[0m, in \u001b[0;36m_combine_nd\u001b[0;34m(combined_ids, concat_dims, data_vars, coords, compat, fill_value, join, combine_attrs)\u001b[0m\n\u001b[1;32m 235\u001b[0m \u001b[38;5;66;03m# Each iteration of this loop reduces the length of the tile_ids tuples\u001b[39;00m\n\u001b[1;32m 236\u001b[0m \u001b[38;5;66;03m# by one. It always combines along the first dimension, removing the first\u001b[39;00m\n\u001b[1;32m 237\u001b[0m \u001b[38;5;66;03m# element of the tuple\u001b[39;00m\n\u001b[1;32m 238\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m concat_dim \u001b[38;5;129;01min\u001b[39;00m concat_dims:\n\u001b[0;32m--> 239\u001b[0m combined_ids \u001b[38;5;241m=\u001b[39m \u001b[43m_combine_all_along_first_dim\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 240\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombined_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 241\u001b[0m \u001b[43m \u001b[49m\u001b[43mdim\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconcat_dim\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 242\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_vars\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_vars\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 243\u001b[0m \u001b[43m \u001b[49m\u001b[43mcoords\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcoords\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 244\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 245\u001b[0m \u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 246\u001b[0m \u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 247\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombine_attrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcombine_attrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 248\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 249\u001b[0m (combined_ds,) \u001b[38;5;241m=\u001b[39m combined_ids\u001b[38;5;241m.\u001b[39mvalues()\n\u001b[1;32m 250\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m combined_ds\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/combine.py:275\u001b[0m, in \u001b[0;36m_combine_all_along_first_dim\u001b[0;34m(combined_ids, dim, data_vars, coords, compat, fill_value, join, combine_attrs)\u001b[0m\n\u001b[1;32m 273\u001b[0m combined_ids \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mdict\u001b[39m(\u001b[38;5;28msorted\u001b[39m(group))\n\u001b[1;32m 274\u001b[0m datasets \u001b[38;5;241m=\u001b[39m combined_ids\u001b[38;5;241m.\u001b[39mvalues()\n\u001b[0;32m--> 275\u001b[0m new_combined_ids[new_id] \u001b[38;5;241m=\u001b[39m \u001b[43m_combine_1d\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 276\u001b[0m \u001b[43m \u001b[49m\u001b[43mdatasets\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdim\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata_vars\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcoords\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcombine_attrs\u001b[49m\n\u001b[1;32m 277\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 278\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m new_combined_ids\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/combine.py:320\u001b[0m, in \u001b[0;36m_combine_1d\u001b[0;34m(datasets, concat_dim, compat, data_vars, coords, fill_value, join, combine_attrs)\u001b[0m\n\u001b[1;32m 318\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m\n\u001b[1;32m 319\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 320\u001b[0m combined \u001b[38;5;241m=\u001b[39m \u001b[43mmerge\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 321\u001b[0m \u001b[43m \u001b[49m\u001b[43mdatasets\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 322\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 323\u001b[0m \u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 324\u001b[0m \u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 325\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombine_attrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcombine_attrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 326\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m combined\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/merge.py:1025\u001b[0m, in \u001b[0;36mmerge\u001b[0;34m(objects, compat, join, fill_value, combine_attrs)\u001b[0m\n\u001b[1;32m 1022\u001b[0m obj \u001b[38;5;241m=\u001b[39m obj\u001b[38;5;241m.\u001b[39mto_dataset(promote_attrs\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m) \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(obj, DataArray) \u001b[38;5;28;01melse\u001b[39;00m obj\n\u001b[1;32m 1023\u001b[0m dict_like_objects\u001b[38;5;241m.\u001b[39mappend(obj)\n\u001b[0;32m-> 1025\u001b[0m merge_result \u001b[38;5;241m=\u001b[39m \u001b[43mmerge_core\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1026\u001b[0m \u001b[43m \u001b[49m\u001b[43mdict_like_objects\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1027\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1028\u001b[0m \u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1029\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombine_attrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcombine_attrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1030\u001b[0m \u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1031\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1032\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m Dataset\u001b[38;5;241m.\u001b[39m_construct_direct(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mmerge_result\u001b[38;5;241m.\u001b[39m_asdict())\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/merge.py:757\u001b[0m, in \u001b[0;36mmerge_core\u001b[0;34m(objects, compat, join, combine_attrs, priority_arg, explicit_coords, indexes, fill_value)\u001b[0m\n\u001b[1;32m 755\u001b[0m collected \u001b[38;5;241m=\u001b[39m collect_variables_and_indexes(aligned, indexes\u001b[38;5;241m=\u001b[39mindexes)\n\u001b[1;32m 756\u001b[0m prioritized \u001b[38;5;241m=\u001b[39m _get_priority_vars_and_indexes(aligned, priority_arg, compat\u001b[38;5;241m=\u001b[39mcompat)\n\u001b[0;32m--> 757\u001b[0m variables, out_indexes \u001b[38;5;241m=\u001b[39m \u001b[43mmerge_collected\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 758\u001b[0m \u001b[43m \u001b[49m\u001b[43mcollected\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mprioritized\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcombine_attrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcombine_attrs\u001b[49m\n\u001b[1;32m 759\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 761\u001b[0m dims \u001b[38;5;241m=\u001b[39m calculate_dimensions(variables)\n\u001b[1;32m 763\u001b[0m coord_names, noncoord_names \u001b[38;5;241m=\u001b[39m determine_coords(coerced)\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/merge.py:302\u001b[0m, in \u001b[0;36mmerge_collected\u001b[0;34m(grouped, prioritized, compat, combine_attrs, equals)\u001b[0m\n\u001b[1;32m 300\u001b[0m variables \u001b[38;5;241m=\u001b[39m [variable \u001b[38;5;28;01mfor\u001b[39;00m variable, _ \u001b[38;5;129;01min\u001b[39;00m elements_list]\n\u001b[1;32m 301\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 302\u001b[0m merged_vars[name] \u001b[38;5;241m=\u001b[39m \u001b[43munique_variable\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 303\u001b[0m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvariables\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mequals\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 304\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 305\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m MergeError:\n\u001b[1;32m 306\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m compat \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mminimal\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 307\u001b[0m \u001b[38;5;66;03m# we need more than \"minimal\" compatibility (for which\u001b[39;00m\n\u001b[1;32m 308\u001b[0m \u001b[38;5;66;03m# we drop conflicting coordinates)\u001b[39;00m\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/merge.py:156\u001b[0m, in \u001b[0;36munique_variable\u001b[0;34m(name, variables, compat, equals)\u001b[0m\n\u001b[1;32m 153\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n\u001b[1;32m 155\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m equals:\n\u001b[0;32m--> 156\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m MergeError(\n\u001b[1;32m 157\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mconflicting values for variable \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m on objects to be combined. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 158\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mYou can skip this check by specifying compat=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124moverride\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 159\u001b[0m )\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m combine_method:\n\u001b[1;32m 162\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m var \u001b[38;5;129;01min\u001b[39;00m variables[\u001b[38;5;241m1\u001b[39m:]:\n", - "\u001b[0;31mMergeError\u001b[0m: conflicting values for variable 'base_time' on objects to be combined. You can skip this check by specifying compat='override'." - ] - } - ], - "source": [ - "files_filter = \\\n", - "['/data/archive/sgp/sgparsclbnd1clothC1.c1/sgparsclbnd1clothC1.c1.20110104.000000.cdf',\n", - " '/data/archive/sgp/sgparsclbnd1clothC1.c1/sgparsclbnd1clothC1.c1.20110106.000000.cdf']\n", - "\n", - "ds_multi = act.io.armfiles.read_netcdf(files_list)\n", - "ds_multi" - ] - }, - { - "cell_type": "code", - "execution_count": 101, - "id": "b0e5d0d7", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2 files loaded\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset>\n",
-       "Dimensions:                         (time: 17280, numlayers: 10)\n",
-       "Coordinates:\n",
-       "  * time                            (time) datetime64[ns] 2011-01-03 ... 2011...\n",
-       "Dimensions without coordinates: numlayers\n",
-       "Data variables:\n",
-       "    base_time                       datetime64[ns] 2011-01-03\n",
-       "    time_offset                     (time) timedelta64[ns] 00:00:00 ... NaT\n",
-       "    CloudBaseBestEstimate           (time) float32 -1.0 -1.0 -1.0 ... nan nan\n",
-       "    CloudLayerBottomHeightMplZwang  (time, numlayers) float32 0.0 0.0 ... nan\n",
-       "    CloudLayerTopHeightMplZwang     (time, numlayers) float32 0.0 0.0 ... nan\n",
-       "    qc_CloudLayerTopHeightMplZwang  (time, numlayers) float32 0.0 0.0 ... nan\n",
-       "Attributes:\n",
-       "    Date:                      Wed Jul 13 16:28:57 GMT 2011\n",
-       "    Version:                   $State: Exp $\n",
-       "    Number_Input_Platforms:    3\n",
-       "    Input_Platforms:           sgp30smplcmask1zwangC1.c1,sgpvceil25kC1.b1,sgp...\n",
-       "    Input_Platforms_Versions:  ?????,10.2,1.16\n",
-       "    zeb_platform:              sgparsclbnd1clothC1.c1\n",
-       "    Command_Line:              arsc1/arscl2 -s YYYYMMDD -e YYYYMMDD SITE FACI...\n",
-       "    contact:                    \n",
-       "    comment:                   If all layer top heights are 0, then the first...
" - ], - "text/plain": [ - "\n", - "Dimensions: (time: 17280, numlayers: 10)\n", - "Coordinates:\n", - " * time (time) datetime64[ns] 2011-01-03 ... 2011...\n", - "Dimensions without coordinates: numlayers\n", - "Data variables:\n", - " base_time datetime64[ns] 2011-01-03\n", - " time_offset (time) timedelta64[ns] 00:00:00 ... NaT\n", - " CloudBaseBestEstimate (time) float32 -1.0 -1.0 -1.0 ... nan nan\n", - " CloudLayerBottomHeightMplZwang (time, numlayers) float32 0.0 0.0 ... nan\n", - " CloudLayerTopHeightMplZwang (time, numlayers) float32 0.0 0.0 ... nan\n", - " qc_CloudLayerTopHeightMplZwang (time, numlayers) float32 0.0 0.0 ... nan\n", - "Attributes:\n", - " Date: Wed Jul 13 16:28:57 GMT 2011\n", - " Version: $State: Exp $\n", - " Number_Input_Platforms: 3\n", - " Input_Platforms: sgp30smplcmask1zwangC1.c1,sgpvceil25kC1.b1,sgp...\n", - " Input_Platforms_Versions: ?????,10.2,1.16\n", - " zeb_platform: sgparsclbnd1clothC1.c1\n", - " Command_Line: arsc1/arscl2 -s YYYYMMDD -e YYYYMMDD SITE FACI...\n", - " contact: \n", - " comment: If all layer top heights are 0, then the first..." - ] - }, - "execution_count": 101, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load files as a single dataset\n", - "files_list = files_filter \n", - "ds = ds_single_2\n", - "ds = data\n", - "# ds = act.io.armfiles.read_netcdf(files_list, compat='override')\n", - "ds.clean.cleanup()\n", - "print(f'{len(files_list)} files loaded')\n", - "ds\n" - ] - }, - { - "cell_type": "markdown", - "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", - "metadata": {}, - "source": [ - "## Plot time series data\n", - "#### Define the list of variables to be plotted:" - ] - }, - { - "cell_type": "code", - "execution_count": 102, - "id": "d09b789e-84f1-4605-846b-a72c110c8048", - "metadata": {}, - "outputs": [], - "source": [ - "variables_to_plot = ['CloudBaseBestEstimate']" - ] - }, - { - "cell_type": "code", - "execution_count": 115, - "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/kefeimo/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/plotting/plot.py:81: UserWarning: Could not discern datastreamname and dict or tuple were not provided. Using defaultname of act_datastream!\n", - " warnings.warn(\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "14e3ab08f694414e84a6909356fbe15a", - "version_major": 2, - "version_minor": 0 - }, - "image/png": "", - "text/html": [ - "\n", - "
\n", - "
\n", - " Figure\n", - "
\n", - " \n", - "
\n", - " " - ], - "text/plain": [ - "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", - "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", - "\n", - "for i,v in enumerate(variables_to_plot):\n", - " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", - " ts_ax.grid()\n", - "\n", - "plt.show()\n" - ] - }, - { - "cell_type": "markdown", - "id": "194399aa-1907-452b-8ba9-bc31d7f60291", - "metadata": {}, - "source": [ - "## Quality check plots\n", - "#### Define variable for QC plot" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", - "metadata": {}, - "outputs": [], - "source": [ - "qc_variable = 'CloudLayerTopHeightMplCamp'" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "QC not available for the selected field: CloudLayerTopHeightMplCamp\n" - ] - } - ], - "source": [ - "# QC Plot\n", - "if ('qc_'+qc_variable) in ds.variables:\n", - "\n", - " # Plot\n", - " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", - " qc_display.add_subplots((2,), figsize = (9.5,10))\n", - " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", - " qc_ax.grid()\n", - " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", - "\n", - " plt.show()\n", - "else:\n", - " print(f'QC not available for the selected field: {qc_variable}')\n" - ] - }, - { - "cell_type": "markdown", - "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", - "metadata": {}, - "source": [ - "## Field selection dropdown menu\n", - "Select variable to be plotted from a dropdown menu" - ] - }, - { - "cell_type": "code", - "execution_count": 116, - "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/kefeimo/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/plotting/plot.py:81: UserWarning: Could not discern datastreamname and dict or tuple were not provided. Using defaultname of act_datastream!\n", - " warnings.warn(\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "29438b2a84d24cb3b3d1d4be9f4dae0c", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "AppLayout(children=(Dropdown(description='Field:', layout=Layout(grid_area='header', margin='0px 30% 0px 20%',…" - ] - }, - "execution_count": 116, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "plt.ioff()\n", - "\n", - "# populate dropdown menu with available variables \n", - "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", - "d_variable = 'CloudBaseBestEstimate'\n", - "dropdown = widgets.Dropdown(\n", - " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", - " value= d_variable,\n", - " description='Field:',\n", - " disabled=False,\n", - ")\n", - "dropdown.layout.margin = '0px 30% 0px 20%'\n", - "dropdown.layout.width = '50%'\n", - "\n", - "# set up display\n", - "i_display = act.plotting.TimeSeriesDisplay(ds)\n", - "i_display.add_subplots((1,), figsize = (9.5,5))\n", - "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", - "i_ax.grid()\n", - "i_fig = i_display.fig\n", - "\n", - "# update plot callback function\n", - "def update_plot(change):\n", - " i_ax.cla()\n", - " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", - " i_ax_new.grid()\n", - " i_fig.canvas.draw()\n", - " i_fig.canvas.flush_events()\n", - "\n", - "dropdown.observe(update_plot, names='value')\n", - "\n", - "widgets.AppLayout(\n", - " header=dropdown,\n", - " center=i_fig.canvas,\n", - " pane_heights=[1, 6,1]\n", - ")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1d7932f0", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "state": {}, - "version_major": 2, - "version_minor": 0 - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/VAPs/quicklook/BBHRP/.ipynb_checkpoints/bbhrpavg1mlawer.c1-checkpoint.ipynb b/VAPs/quicklook/BBHRP/.ipynb_checkpoints/bbhrpavg1mlawer.c1-checkpoint.ipynb deleted file mode 100644 index 3ad7672a..00000000 --- a/VAPs/quicklook/BBHRP/.ipynb_checkpoints/bbhrpavg1mlawer.c1-checkpoint.ipynb +++ /dev/null @@ -1,3768 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "70840257-70e4-45e2-b491-14bff5a257a3", - "metadata": {}, - "source": [ - "# BBHRPAVG1MLAWER.C1 Plots\n", - "\n", - "[Click here](https://www.arm.gov/capabilities/vaps/bbhrp) for more information about this vap." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "460fd89f-e034-452c-b837-f65c5958264f", - "metadata": {}, - "outputs": [], - "source": [ - "%matplotlib widget\n", - "import ipywidgets as widgets\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import ipywidgets as widgets\n", - "import numpy as np\n", - "import pandas as pd\n", - "import os\n", - "from datetime import datetime\n", - "\n", - "import act\n", - "import xarray as xr\n", - "\n", - "# Data archive directory\n", - "DATA_DIR = r'/data/archive/'\n", - "\n", - "# Datastream info\n", - "DATASTREAM_NAME = 'bbhrpavg1mlawer'\n", - "DATA_LEVEL = 'c1'\n", - "LOCATIONS = [{'end_date': '2006-02-27', 'facility': 'C1', 'site': 'sgp', 'start_date': '2000-03-01'}]" - ] - }, - { - "cell_type": "markdown", - "id": "9faaf875", - "metadata": {}, - "source": [ - "## Define site, facility, and date range" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "ac6764f5", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The following locations and date ranges are available for this VAP:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sitefacilitystart_dateend_date
0sgpC12000-03-012006-02-27
\n", - "
" - ], - "text/plain": [ - " site facility start_date end_date\n", - "0 sgp C1 2000-03-01 2006-02-27" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "print(\"The following locations and date ranges are available for this VAP:\")\n", - "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" - ] - }, - { - "cell_type": "markdown", - "id": "8d132223", - "metadata": {}, - "source": [ - "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "e563983a", - "metadata": {}, - "outputs": [], - "source": [ - "site_facility = ( 'sgp', 'C1' )\n", - "\n", - "date_start = '2006-02-25'\n", - "date_end = '2006-02-27'" - ] - }, - { - "cell_type": "markdown", - "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", - "metadata": {}, - "source": [ - "## Load data files\n", - "Load data files from /data/archive/" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'/data/archive/sgp/sgpbbhrpavg1mlawerC1.c1'" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Compile list of files\n", - "site, facility = site_facility\n", - "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", - "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", - "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", - "dir_path\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "6be8f3dc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['20060225', '20060226', '20060227']" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from datetime import date, timedelta\n", - "import pandas as pd\n", - "\n", - "def get_ARM_formated_dates(start_date, end_date):\n", - " \"\"\"\n", - " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", - " EXAMPLE:\n", - " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", - " >> [\"20180219\", \"20180220\", \"20180221\"] \n", - " \"\"\"\n", - " \n", - " _start_date = pd.to_datetime(start_date)\n", - " _end_date = pd.to_datetime(end_date)\n", - " \n", - " delta = _end_date - _start_date # returns timedelta \n", - " dates = []\n", - "\n", - " for i in range(delta.days + 1):\n", - " day = _start_date + timedelta(days=i)\n", - " day_formated = day.strftime(format=\"%Y%m%d\")\n", - " dates.append(day_formated)\n", - " return dates\n", - "\n", - "\n", - "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "51feea2e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['/data/archive/sgp/sgpbbhrpavg1mlawerC1.c1/sgpbbhrpavg1mlawerC1.c1.20060225.002000.cdf',\n", - " '/data/archive/sgp/sgpbbhrpavg1mlawerC1.c1/sgpbbhrpavg1mlawerC1.c1.20060227.002000.cdf']" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Filter a list of files based on date pattern\n", - "import glob\n", - "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", - "files_filter = []\n", - "for date in dates:\n", - " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", - " files_filter\n", - "files_filter" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "b0e5d0d7", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2 files loaded\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset>\n",
-       "Dimensions:                               (time: 96, levels: 55, layers: 54)\n",
-       "Coordinates:\n",
-       "  * time                                  (time) datetime64[ns] 2006-02-25T00...\n",
-       "Dimensions without coordinates: levels, layers\n",
-       "Data variables: (12/52)\n",
-       "    base_time                             (time) datetime64[ns] 2006-02-25T00...\n",
-       "    time_offset                           (time) datetime64[ns] 2006-02-25T00...\n",
-       "    height                                (time, levels) float32 dask.array<chunksize=(48, 55), meta=np.ndarray>\n",
-       "    pressure                              (time, levels) float32 dask.array<chunksize=(48, 55), meta=np.ndarray>\n",
-       "    temperature                           (time, levels) float32 dask.array<chunksize=(48, 55), meta=np.ndarray>\n",
-       "    column_ozone                          (time) float32 dask.array<chunksize=(48,), meta=np.ndarray>\n",
-       "    ...                                    ...\n",
-       "    cloud_tot_lwp                         (time) float32 dask.array<chunksize=(48,), meta=np.ndarray>\n",
-       "    cloud_tot_iwp                         (time) float32 dask.array<chunksize=(48,), meta=np.ndarray>\n",
-       "    cloud_fraction                        (time) float32 dask.array<chunksize=(48,), meta=np.ndarray>\n",
-       "    lat                                   (time) float32 36.61 36.61 ... 36.61\n",
-       "    lon                                   (time) float32 -97.49 ... -97.49\n",
-       "    alt                                   (time) float32 315.0 315.0 ... 315.0\n",
-       "Attributes:\n",
-       "    Date:                 Thu Jun  4 22:13:53 2009\n",
-       "    Version:              Version: ver1.5\n",
-       "    missing_value:        -9999.0\n",
-       "    _file_dates:          ['20060225', '20060227']\n",
-       "    _file_times:          ['002000', '002000']\n",
-       "    datastream:           sgpbbhrpavg1mlawerC1.c1\n",
-       "    _datastream:          sgpbbhrpavg1mlawerC1.c1\n",
-       "    _arm_standards_flag:  1
" - ], - "text/plain": [ - "\n", - "Dimensions: (time: 96, levels: 55, layers: 54)\n", - "Coordinates:\n", - " * time (time) datetime64[ns] 2006-02-25T00...\n", - "Dimensions without coordinates: levels, layers\n", - "Data variables: (12/52)\n", - " base_time (time) datetime64[ns] 2006-02-25T00...\n", - " time_offset (time) datetime64[ns] 2006-02-25T00...\n", - " height (time, levels) float32 dask.array\n", - " pressure (time, levels) float32 dask.array\n", - " temperature (time, levels) float32 dask.array\n", - " column_ozone (time) float32 dask.array\n", - " ... ...\n", - " cloud_tot_lwp (time) float32 dask.array\n", - " cloud_tot_iwp (time) float32 dask.array\n", - " cloud_fraction (time) float32 dask.array\n", - " lat (time) float32 36.61 36.61 ... 36.61\n", - " lon (time) float32 -97.49 ... -97.49\n", - " alt (time) float32 315.0 315.0 ... 315.0\n", - "Attributes:\n", - " Date: Thu Jun 4 22:13:53 2009\n", - " Version: Version: ver1.5\n", - " missing_value: -9999.0\n", - " _file_dates: ['20060225', '20060227']\n", - " _file_times: ['002000', '002000']\n", - " datastream: sgpbbhrpavg1mlawerC1.c1\n", - " _datastream: sgpbbhrpavg1mlawerC1.c1\n", - " _arm_standards_flag: 1" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load files as a single dataset\n", - "files_list = files_filter \n", - "ds = act.io.armfiles.read_netcdf(files_list)\n", - "ds.clean.cleanup()\n", - "print(f'{len(files_list)} files loaded')\n", - "ds\n" - ] - }, - { - "cell_type": "markdown", - "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", - "metadata": {}, - "source": [ - "## Plot time series data\n", - "#### Define the list of variables to be plotted:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "d09b789e-84f1-4605-846b-a72c110c8048", - "metadata": {}, - "outputs": [], - "source": [ - "variables_to_plot = ['pressure', 'temperature', 'column_ozone']" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "901cca0c06a4463aa7bea7658c637e66", - "version_major": 2, - "version_minor": 0 - }, - "image/png": "", - "text/html": [ - "\n", - "
\n", - "
\n", - " Figure\n", - "
\n", - " \n", - "
\n", - " " - ], - "text/plain": [ - "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", - "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", - "\n", - "for i,v in enumerate(variables_to_plot):\n", - " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", - " ts_ax.grid()\n", - "\n", - "plt.show()\n" - ] - }, - { - "cell_type": "markdown", - "id": "194399aa-1907-452b-8ba9-bc31d7f60291", - "metadata": {}, - "source": [ - "## Quality check plots\n", - "#### Define variable for QC plot" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", - "metadata": {}, - "outputs": [], - "source": [ - "qc_variable = 'flux'" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", - "metadata": {}, - "outputs": [ - { - "ename": "KeyError", - "evalue": "'flux'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/dataset.py:1348\u001b[0m, in \u001b[0;36mDataset._construct_dataarray\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 1347\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1348\u001b[0m variable \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_variables\u001b[49m\u001b[43m[\u001b[49m\u001b[43mname\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 1349\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n", - "\u001b[0;31mKeyError\u001b[0m: 'flux'", - "\nDuring handling of the above exception, another exception occurred:\n", - "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[11], line 7\u001b[0m\n\u001b[1;32m 5\u001b[0m qc_display \u001b[38;5;241m=\u001b[39m act\u001b[38;5;241m.\u001b[39mplotting\u001b[38;5;241m.\u001b[39mTimeSeriesDisplay(ds)\n\u001b[1;32m 6\u001b[0m qc_display\u001b[38;5;241m.\u001b[39madd_subplots((\u001b[38;5;241m2\u001b[39m,), figsize \u001b[38;5;241m=\u001b[39m (\u001b[38;5;241m9.5\u001b[39m,\u001b[38;5;241m10\u001b[39m))\n\u001b[0;32m----> 7\u001b[0m qc_ax \u001b[38;5;241m=\u001b[39m \u001b[43mqc_display\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mplot\u001b[49m\u001b[43m(\u001b[49m\u001b[43mqc_variable\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msubplot_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mset_title\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mQC results on field: \u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mqc_variable\u001b[49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 8\u001b[0m qc_ax\u001b[38;5;241m.\u001b[39mgrid()\n\u001b[1;32m 9\u001b[0m qc_display\u001b[38;5;241m.\u001b[39mqc_flag_block_plot(qc_variable, subplot_index\u001b[38;5;241m=\u001b[39m(\u001b[38;5;241m1\u001b[39m,))\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/plotting/timeseriesdisplay.py:418\u001b[0m, in \u001b[0;36mTimeSeriesDisplay.plot\u001b[0;34m(self, field, dsname, subplot_index, cmap, set_title, add_nan, day_night_background, invert_y_axis, abs_limits, time_rng, y_rng, use_var_for_y, set_shading, assessment_overplot, overplot_marker, overplot_behind, overplot_markersize, assessment_overplot_category, assessment_overplot_category_color, force_line_plot, labels, cbar_label, cbar_h_adjust, secondary_y, y_axis_flag_meanings, colorbar_labels, cb_friendly, **kwargs)\u001b[0m\n\u001b[1;32m 415\u001b[0m assessment_overplot_category_color[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mAcceptable\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m (\u001b[38;5;241m0.0\u001b[39m, \u001b[38;5;241m0.4240129715562796\u001b[39m, \u001b[38;5;241m0.4240129715562796\u001b[39m),\n\u001b[1;32m 417\u001b[0m \u001b[38;5;66;03m# Get data and dimensions\u001b[39;00m\n\u001b[0;32m--> 418\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_obj\u001b[49m\u001b[43m[\u001b[49m\u001b[43mdsname\u001b[49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[43mfield\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 419\u001b[0m dim \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_obj[dsname][field]\u001b[38;5;241m.\u001b[39mdims)\n\u001b[1;32m 420\u001b[0m xdata \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_obj[dsname][dim[\u001b[38;5;241m0\u001b[39m]]\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/dataset.py:1439\u001b[0m, in \u001b[0;36mDataset.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1437\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39misel(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkey)\n\u001b[1;32m 1438\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m utils\u001b[38;5;241m.\u001b[39mhashable(key):\n\u001b[0;32m-> 1439\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_construct_dataarray\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1440\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m utils\u001b[38;5;241m.\u001b[39miterable_of_hashable(key):\n\u001b[1;32m 1441\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_copy_listed(key)\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/dataset.py:1350\u001b[0m, in \u001b[0;36mDataset._construct_dataarray\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 1348\u001b[0m variable \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_variables[name]\n\u001b[1;32m 1349\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[0;32m-> 1350\u001b[0m _, name, variable \u001b[38;5;241m=\u001b[39m \u001b[43m_get_virtual_variable\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_variables\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdims\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1352\u001b[0m needed_dims \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m(variable\u001b[38;5;241m.\u001b[39mdims)\n\u001b[1;32m 1354\u001b[0m coords: \u001b[38;5;28mdict\u001b[39m[Hashable, Variable] \u001b[38;5;241m=\u001b[39m {}\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/dataset.py:186\u001b[0m, in \u001b[0;36m_get_virtual_variable\u001b[0;34m(variables, key, dim_sizes)\u001b[0m\n\u001b[1;32m 184\u001b[0m split_key \u001b[38;5;241m=\u001b[39m key\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m 185\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(split_key) \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m2\u001b[39m:\n\u001b[0;32m--> 186\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key)\n\u001b[1;32m 188\u001b[0m ref_name, var_name \u001b[38;5;241m=\u001b[39m split_key\n\u001b[1;32m 189\u001b[0m ref_var \u001b[38;5;241m=\u001b[39m variables[ref_name]\n", - "\u001b[0;31mKeyError\u001b[0m: 'flux'" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "ae3815a803c64f629cc4a6feb7130f6b", - "version_major": 2, - "version_minor": 0 - }, - "image/png": "", - "text/html": [ - "\n", - "
\n", - "
\n", - " Figure\n", - "
\n", - " \n", - "
\n", - " " - ], - "text/plain": [ - "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# QC Plot\n", - "if ('qc_'+qc_variable) in ds.variables:\n", - "\n", - " # Plot\n", - " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", - " qc_display.add_subplots((2,), figsize = (9.5,10))\n", - " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", - " qc_ax.grid()\n", - " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", - "\n", - " plt.show()\n", - "else:\n", - " print(f'QC not available for the selected field: {qc_variable}')\n" - ] - }, - { - "cell_type": "markdown", - "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", - "metadata": {}, - "source": [ - "## Field selection dropdown menu\n", - "Select variable to be plotted from a dropdown menu" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", - "metadata": {}, - "outputs": [], - "source": [ - "plt.ioff()\n", - "\n", - "# populate dropdown menu with available variables \n", - "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", - "d_variable = 'pressure'\n", - "dropdown = widgets.Dropdown(\n", - " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", - " value= d_variable,\n", - " description='Field:',\n", - " disabled=False,\n", - ")\n", - "dropdown.layout.margin = '0px 30% 0px 20%'\n", - "dropdown.layout.width = '50%'\n", - "\n", - "# set up display\n", - "i_display = act.plotting.TimeSeriesDisplay(ds)\n", - "i_display.add_subplots((1,), figsize = (9.5,5))\n", - "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", - "i_ax.grid()\n", - "i_fig = i_display.fig\n", - "\n", - "# update plot callback function\n", - "def update_plot(change):\n", - " i_ax.cla()\n", - " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", - " i_ax_new.grid()\n", - " i_fig.canvas.draw()\n", - " i_fig.canvas.flush_events()\n", - "\n", - "dropdown.observe(update_plot, names='value')\n", - "\n", - "widgets.AppLayout(\n", - " header=dropdown,\n", - " center=i_fig.canvas,\n", - " pane_heights=[1, 6,1]\n", - ")\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "state": {}, - "version_major": 2, - "version_minor": 0 - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/VAPs/quicklook/CCNPROF/.ipynb_checkpoints/rlccnprof1ghan.c1-checkpoint.ipynb b/VAPs/quicklook/CCNPROF/.ipynb_checkpoints/rlccnprof1ghan.c1-checkpoint.ipynb deleted file mode 100644 index 696f23b7..00000000 --- a/VAPs/quicklook/CCNPROF/.ipynb_checkpoints/rlccnprof1ghan.c1-checkpoint.ipynb +++ /dev/null @@ -1,4109 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "70840257-70e4-45e2-b491-14bff5a257a3", - "metadata": {}, - "source": [ - "# RLCCNPROF1GHAN.C1 Plots\n", - "\n", - "[Click here](https://www.arm.gov/capabilities/vaps/ccnprof) for more information about this vap." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "460fd89f-e034-452c-b837-f65c5958264f", - "metadata": {}, - "outputs": [], - "source": [ - "%matplotlib widget\n", - "import ipywidgets as widgets\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import ipywidgets as widgets\n", - "import numpy as np\n", - "import pandas as pd\n", - "import os\n", - "from datetime import datetime\n", - "\n", - "import act\n", - "import xarray as xr\n", - "\n", - "# Data archive directory\n", - "DATA_DIR = r'/data/archive/'\n", - "\n", - "# Datastream info\n", - "DATASTREAM_NAME = 'rlccnprof1ghan'\n", - "DATA_LEVEL = 'c1'\n", - "LOCATIONS = [{'end_date': '2014-06-24', 'facility': 'C1', 'site': 'sgp', 'start_date': '2006-09-15'}]" - ] - }, - { - "cell_type": "markdown", - "id": "9faaf875", - "metadata": {}, - "source": [ - "## Define site, facility, and date range" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "ac6764f5", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The following locations and date ranges are available for this VAP:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sitefacilitystart_dateend_date
0sgpC12006-09-152014-06-24
\n", - "
" - ], - "text/plain": [ - " site facility start_date end_date\n", - "0 sgp C1 2006-09-15 2014-06-24" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "print(\"The following locations and date ranges are available for this VAP:\")\n", - "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" - ] - }, - { - "cell_type": "markdown", - "id": "8d132223", - "metadata": {}, - "source": [ - "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "e563983a", - "metadata": {}, - "outputs": [], - "source": [ - "site_facility = ( 'sgp', 'C1' )\n", - "\n", - "date_start = '2014-06-22'\n", - "date_end = '2014-06-24'" - ] - }, - { - "cell_type": "markdown", - "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", - "metadata": {}, - "source": [ - "## Load data files\n", - "Load data files from /data/archive/" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'/data/archive/sgp/sgprlccnprof1ghanC1.c1'" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Compile list of files\n", - "site, facility = site_facility\n", - "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", - "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", - "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", - "dir_path\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "6be8f3dc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['20140622', '20140623', '20140624']" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from datetime import date, timedelta\n", - "import pandas as pd\n", - "\n", - "def get_ARM_formated_dates(start_date, end_date):\n", - " \"\"\"\n", - " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", - " EXAMPLE:\n", - " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", - " >> [\"20180219\", \"20180220\", \"20180221\"] \n", - " \"\"\"\n", - " \n", - " _start_date = pd.to_datetime(start_date)\n", - " _end_date = pd.to_datetime(end_date)\n", - " \n", - " delta = _end_date - _start_date # returns timedelta \n", - " dates = []\n", - "\n", - " for i in range(delta.days + 1):\n", - " day = _start_date + timedelta(days=i)\n", - " day_formated = day.strftime(format=\"%Y%m%d\")\n", - " dates.append(day_formated)\n", - " return dates\n", - "\n", - "\n", - "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "51feea2e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['/data/archive/sgp/sgprlccnprof1ghanC1.c1/sgprlccnprof1ghanC1.c1.20140624.000000.cdf']" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Filter a list of files based on date pattern\n", - "import glob\n", - "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", - "files_filter = []\n", - "for date in dates:\n", - " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", - " files_filter\n", - "files_filter" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "b0e5d0d7", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1 files loaded\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset>\n",
-       "Dimensions:                           (time: 24, height: 52, ss_step: 7,\n",
-       "                                       param2: 2)\n",
-       "Coordinates:\n",
-       "  * time                              (time) datetime64[ns] 2014-06-24 ... 20...\n",
-       "  * height                            (height) float32 0.15 0.225 ... 3.9 3.975\n",
-       "  * ss_step                           (ss_step) float32 1.0 2.0 3.0 ... 6.0 7.0\n",
-       "Dimensions without coordinates: param2\n",
-       "Data variables: (12/62)\n",
-       "    base_time                         datetime64[ns] 2014-06-24\n",
-       "    time_offset                       (time) datetime64[ns] 2014-06-24 ... 20...\n",
-       "    qc_time                           (time) int32 dask.array<chunksize=(24,), meta=np.ndarray>\n",
-       "    rh_mean                           (time, height) float32 dask.array<chunksize=(24, 52), meta=np.ndarray>\n",
-       "    qc_rh_mean                        (time, height) int32 dask.array<chunksize=(24, 52), meta=np.ndarray>\n",
-       "    rh_std_dev                        (time, height) float32 dask.array<chunksize=(24, 52), meta=np.ndarray>\n",
-       "    ...                                ...\n",
-       "    qc_N_CCN_7                        (time) int32 dask.array<chunksize=(24,), meta=np.ndarray>\n",
-       "    temperature_second_deriv          (time, height) float32 dask.array<chunksize=(24, 52), meta=np.ndarray>\n",
-       "    cbh                               (time) float32 dask.array<chunksize=(24,), meta=np.ndarray>\n",
-       "    lat                               float32 ...\n",
-       "    lon                               float32 ...\n",
-       "    alt                               float32 ...\n",
-       "Attributes: (12/17)\n",
-       "    command_line:                   ccnprof -s sgp -f C1 -b 20140624 -e 20140...\n",
-       "    process_version:                v1.2\n",
-       "    dod_version:                    rlccnprof1ghan-c1-0.5\n",
-       "    site_id:                        sgp\n",
-       "    facility_id:                    C1: Lamont, Oklahoma\n",
-       "    input_datastreams:              sgpaosccn100C1.a1 : 12.9 : 20140624.00000...\n",
-       "    ...                             ...\n",
-       "    history:                        created by user dsmgr on machine iron at ...\n",
-       "    _file_dates:                    ['20140624']\n",
-       "    _file_times:                    ['000000']\n",
-       "    datastream:                     \n",
-       "    _datastream:                    \n",
-       "    _arm_standards_flag:            1
" - ], - "text/plain": [ - "\n", - "Dimensions: (time: 24, height: 52, ss_step: 7,\n", - " param2: 2)\n", - "Coordinates:\n", - " * time (time) datetime64[ns] 2014-06-24 ... 20...\n", - " * height (height) float32 0.15 0.225 ... 3.9 3.975\n", - " * ss_step (ss_step) float32 1.0 2.0 3.0 ... 6.0 7.0\n", - "Dimensions without coordinates: param2\n", - "Data variables: (12/62)\n", - " base_time datetime64[ns] 2014-06-24\n", - " time_offset (time) datetime64[ns] 2014-06-24 ... 20...\n", - " qc_time (time) int32 dask.array\n", - " rh_mean (time, height) float32 dask.array\n", - " qc_rh_mean (time, height) int32 dask.array\n", - " rh_std_dev (time, height) float32 dask.array\n", - " ... ...\n", - " qc_N_CCN_7 (time) int32 dask.array\n", - " temperature_second_deriv (time, height) float32 dask.array\n", - " cbh (time) float32 dask.array\n", - " lat float32 ...\n", - " lon float32 ...\n", - " alt float32 ...\n", - "Attributes: (12/17)\n", - " command_line: ccnprof -s sgp -f C1 -b 20140624 -e 20140...\n", - " process_version: v1.2\n", - " dod_version: rlccnprof1ghan-c1-0.5\n", - " site_id: sgp\n", - " facility_id: C1: Lamont, Oklahoma\n", - " input_datastreams: sgpaosccn100C1.a1 : 12.9 : 20140624.00000...\n", - " ... ...\n", - " history: created by user dsmgr on machine iron at ...\n", - " _file_dates: ['20140624']\n", - " _file_times: ['000000']\n", - " datastream: \n", - " _datastream: \n", - " _arm_standards_flag: 1" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load files as a single dataset\n", - "files_list = files_filter \n", - "ds = act.io.armfiles.read_netcdf(files_list)\n", - "ds.clean.cleanup()\n", - "print(f'{len(files_list)} files loaded')\n", - "ds\n" - ] - }, - { - "cell_type": "markdown", - "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", - "metadata": {}, - "source": [ - "## Plot time series data\n", - "#### Define the list of variables to be plotted:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "d09b789e-84f1-4605-846b-a72c110c8048", - "metadata": {}, - "outputs": [], - "source": [ - "variables_to_plot = ['nsteps', 'rh_mean', 'rh_std_dev']" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", - "metadata": {}, - "outputs": [ - { - "ename": "KeyError", - "evalue": "'nsteps'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[9], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m ts_display\u001b[38;5;241m.\u001b[39madd_subplots((\u001b[38;5;28mlen\u001b[39m(variables_to_plot),), figsize \u001b[38;5;241m=\u001b[39m (\u001b[38;5;241m9.5\u001b[39m,\u001b[38;5;241m4\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mlen\u001b[39m(variables_to_plot)))\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i,v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(variables_to_plot):\n\u001b[0;32m----> 5\u001b[0m ts_ax \u001b[38;5;241m=\u001b[39m ts_display\u001b[38;5;241m.\u001b[39mplot(v, subplot_index\u001b[38;5;241m=\u001b[39m(i,), set_title\u001b[38;5;241m=\u001b[39m\u001b[43mds\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvariables\u001b[49m\u001b[43m[\u001b[49m\u001b[43mv\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241m.\u001b[39mattrs[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlong_name\u001b[39m\u001b[38;5;124m'\u001b[39m],)\n\u001b[1;32m 6\u001b[0m ts_ax\u001b[38;5;241m.\u001b[39mgrid()\n\u001b[1;32m 8\u001b[0m plt\u001b[38;5;241m.\u001b[39mshow()\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/utils.py:453\u001b[0m, in \u001b[0;36mFrozen.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 452\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__getitem__\u001b[39m(\u001b[38;5;28mself\u001b[39m, key: K) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m V:\n\u001b[0;32m--> 453\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmapping\u001b[49m\u001b[43m[\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m]\u001b[49m\n", - "\u001b[0;31mKeyError\u001b[0m: 'nsteps'" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "b9705370d4b8484a87f6a3e53aec927d", - "version_major": 2, - "version_minor": 0 - }, - "image/png": "", - "text/html": [ - "\n", - "
\n", - "
\n", - " Figure\n", - "
\n", - " \n", - "
\n", - " " - ], - "text/plain": [ - "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", - "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", - "\n", - "for i,v in enumerate(variables_to_plot):\n", - " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", - " ts_ax.grid()\n", - "\n", - "plt.show()\n" - ] - }, - { - "cell_type": "markdown", - "id": "194399aa-1907-452b-8ba9-bc31d7f60291", - "metadata": {}, - "source": [ - "## Quality check plots\n", - "#### Define variable for QC plot" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", - "metadata": {}, - "outputs": [], - "source": [ - "qc_variable = 'rh_mean'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", - "metadata": {}, - "outputs": [], - "source": [ - "# QC Plot\n", - "if ('qc_'+qc_variable) in ds.variables:\n", - "\n", - " # Plot\n", - " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", - " qc_display.add_subplots((2,), figsize = (9.5,10))\n", - " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", - " qc_ax.grid()\n", - " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", - "\n", - " plt.show()\n", - "else:\n", - " print(f'QC not available for the selected field: {qc_variable}')\n" - ] - }, - { - "cell_type": "markdown", - "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", - "metadata": {}, - "source": [ - "## Field selection dropdown menu\n", - "Select variable to be plotted from a dropdown menu" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", - "metadata": {}, - "outputs": [], - "source": [ - "plt.ioff()\n", - "\n", - "# populate dropdown menu with available variables \n", - "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", - "d_variable = 'nsteps'\n", - "dropdown = widgets.Dropdown(\n", - " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", - " value= d_variable,\n", - " description='Field:',\n", - " disabled=False,\n", - ")\n", - "dropdown.layout.margin = '0px 30% 0px 20%'\n", - "dropdown.layout.width = '50%'\n", - "\n", - "# set up display\n", - "i_display = act.plotting.TimeSeriesDisplay(ds)\n", - "i_display.add_subplots((1,), figsize = (9.5,5))\n", - "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", - "i_ax.grid()\n", - "i_fig = i_display.fig\n", - "\n", - "# update plot callback function\n", - "def update_plot(change):\n", - " i_ax.cla()\n", - " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", - " i_ax_new.grid()\n", - " i_fig.canvas.draw()\n", - " i_fig.canvas.flush_events()\n", - "\n", - "dropdown.observe(update_plot, names='value')\n", - "\n", - "widgets.AppLayout(\n", - " header=dropdown,\n", - " center=i_fig.canvas,\n", - " pane_heights=[1, 6,1]\n", - ")\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "state": {}, - "version_major": 2, - "version_minor": 0 - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/VAPs/quicklook/CMAC2/.ipynb_checkpoints/cmac2.c1-checkpoint.ipynb b/VAPs/quicklook/CMAC2/.ipynb_checkpoints/cmac2.c1-checkpoint.ipynb deleted file mode 100644 index 425a9cbc..00000000 --- a/VAPs/quicklook/CMAC2/.ipynb_checkpoints/cmac2.c1-checkpoint.ipynb +++ /dev/null @@ -1,3537 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "70840257-70e4-45e2-b491-14bff5a257a3", - "metadata": {}, - "source": [ - "# CMAC2.C1 Plots\n", - "\n", - "[Click here](https://www.arm.gov/capabilities/vaps/cmac2) for more information about this vap." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "460fd89f-e034-452c-b837-f65c5958264f", - "metadata": {}, - "outputs": [], - "source": [ - "%matplotlib widget\n", - "import ipywidgets as widgets\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import ipywidgets as widgets\n", - "import numpy as np\n", - "import pandas as pd\n", - "import os\n", - "from datetime import datetime\n", - "\n", - "import act\n", - "import xarray as xr\n", - "\n", - "# Data archive directory\n", - "DATA_DIR = r'/data/archive/'\n", - "\n", - "# Datastream info\n", - "DATASTREAM_NAME = 'cmac2'\n", - "DATA_LEVEL = 'c1'\n", - "LOCATIONS = [{'end_date': '2019-01-22', 'facility': 'I4', 'site': 'sgp', 'start_date': '2018-08-30'}, {'end_date': '2019-04-05', 'facility': 'I5', 'site': 'sgp', 'start_date': '2018-08-30'}, {'end_date': '2019-02-26', 'facility': 'I6', 'site': 'sgp', 'start_date': '2018-08-30'}]" - ] - }, - { - "cell_type": "markdown", - "id": "9faaf875", - "metadata": {}, - "source": [ - "## Define site, facility, and date range" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "ac6764f5", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The following locations and date ranges are available for this VAP:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sitefacilitystart_dateend_date
0sgpI42018-08-302019-01-22
1sgpI52018-08-302019-04-05
2sgpI62018-08-302019-02-26
\n", - "
" - ], - "text/plain": [ - " site facility start_date end_date\n", - "0 sgp I4 2018-08-30 2019-01-22\n", - "1 sgp I5 2018-08-30 2019-04-05\n", - "2 sgp I6 2018-08-30 2019-02-26" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "print(\"The following locations and date ranges are available for this VAP:\")\n", - "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" - ] - }, - { - "cell_type": "markdown", - "id": "8d132223", - "metadata": {}, - "source": [ - "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "e563983a", - "metadata": {}, - "outputs": [], - "source": [ - "site_facility = ( 'sgp', 'I4' )\n", - "\n", - "date_start = '2019-01-21'\n", - "date_end = '2019-01-22'" - ] - }, - { - "cell_type": "markdown", - "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", - "metadata": {}, - "source": [ - "## Load data files\n", - "Load data files from /data/archive/" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'/data/archive/sgp/sgpcmac2I4.c1'" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Compile list of files\n", - "site, facility = site_facility\n", - "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", - "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", - "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", - "dir_path\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "6be8f3dc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['20190121', '20190122']" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from datetime import date, timedelta\n", - "import pandas as pd\n", - "\n", - "def get_ARM_formated_dates(start_date, end_date):\n", - " \"\"\"\n", - " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", - " EXAMPLE:\n", - " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", - " >> [\"20180219\", \"20180220\", \"20180221\"] \n", - " \"\"\"\n", - " \n", - " _start_date = pd.to_datetime(start_date)\n", - " _end_date = pd.to_datetime(end_date)\n", - " \n", - " delta = _end_date - _start_date # returns timedelta \n", - " dates = []\n", - "\n", - " for i in range(delta.days + 1):\n", - " day = _start_date + timedelta(days=i)\n", - " day_formated = day.strftime(format=\"%Y%m%d\")\n", - " dates.append(day_formated)\n", - " return dates\n", - "\n", - "\n", - "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "51feea2e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['/data/archive/sgp/sgpcmac2I4.c1/sgpcmac2I4.c1.20190122.020009.nc',\n", - " '/data/archive/sgp/sgpcmac2I4.c1/sgpcmac2I4.c1.20190122.044928.nc',\n", - " '/data/archive/sgp/sgpcmac2I4.c1/sgpcmac2I4.c1.20190122.024236.nc',\n", - " '/data/archive/sgp/sgpcmac2I4.c1/sgpcmac2I4.c1.20190122.005559.nc',\n", - " '/data/archive/sgp/sgpcmac2I4.c1/sgpcmac2I4.c1.20190122.003457.nc',\n", - " '/data/archive/sgp/sgpcmac2I4.c1/sgpcmac2I4.c1.20190122.034552.nc',\n", - " '/data/archive/sgp/sgpcmac2I4.c1/sgpcmac2I4.c1.20190122.042813.nc',\n", - " '/data/archive/sgp/sgpcmac2I4.c1/sgpcmac2I4.c1.20190122.022111.nc',\n", - " '/data/archive/sgp/sgpcmac2I4.c1/sgpcmac2I4.c1.20190122.030339.nc',\n", - " '/data/archive/sgp/sgpcmac2I4.c1/sgpcmac2I4.c1.20190122.051031.nc',\n", - " '/data/archive/sgp/sgpcmac2I4.c1/sgpcmac2I4.c1.20190122.040656.nc',\n", - " '/data/archive/sgp/sgpcmac2I4.c1/sgpcmac2I4.c1.20190122.011706.nc',\n", - " '/data/archive/sgp/sgpcmac2I4.c1/sgpcmac2I4.c1.20190122.013821.nc',\n", - " '/data/archive/sgp/sgpcmac2I4.c1/sgpcmac2I4.c1.20190122.032444.nc',\n", - " '/data/archive/sgp/sgpcmac2I4.c1/sgpcmac2I4.c1.20190122.001346.nc']" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Filter a list of files based on date pattern\n", - "import glob\n", - "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", - "files_filter = []\n", - "for date in dates:\n", - " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", - " files_filter\n", - "files_filter" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "b0e5d0d7", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "15 files loaded\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/kefeimo/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/qc/clean.py:234: RuntimeWarning: invalid value encountered in cast\n", - " data = data.astype(dtype)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset>\n",
-       "Dimensions:                                   (time: 97200, range: 1001,\n",
-       "                                               sweep: 18)\n",
-       "Coordinates:\n",
-       "  * time                                      (time) datetime64[ns] 2019-01-2...\n",
-       "  * range                                     (range) float32 0.0 ... 1e+05\n",
-       "    azimuth                                   (time) float32 dask.array<chunksize=(6480,), meta=np.ndarray>\n",
-       "    elevation                                 (time) float32 dask.array<chunksize=(6480,), meta=np.ndarray>\n",
-       "Dimensions without coordinates: sweep\n",
-       "Data variables: (12/49)\n",
-       "    base_time                                 (time) datetime64[ns] 2019-01-2...\n",
-       "    time_offset                               (time) datetime64[ns] 2019-01-2...\n",
-       "    reflectivity                              (time, range) float32 dask.array<chunksize=(6480, 1001), meta=np.ndarray>\n",
-       "    cross_correlation_ratio_hv                (time, range) float32 dask.array<chunksize=(6480, 1001), meta=np.ndarray>\n",
-       "    normalized_coherent_power                 (time, range) float32 dask.array<chunksize=(6480, 1001), meta=np.ndarray>\n",
-       "    mean_doppler_velocity                     (time, range) float32 dask.array<chunksize=(6480, 1001), meta=np.ndarray>\n",
-       "    ...                                        ...\n",
-       "    path_integrated_attenuation               (time, range) float32 dask.array<chunksize=(6480, 1001), meta=np.ndarray>\n",
-       "    corrected_differential_reflectivity       (time, range) float32 dask.array<chunksize=(6480, 1001), meta=np.ndarray>\n",
-       "    ground_clutter                            (time, range) float32 dask.array<chunksize=(6480, 1001), meta=np.ndarray>\n",
-       "    lat                                       (time) float32 36.58 ... 36.58\n",
-       "    lon                                       (time) float32 -97.36 ... -97.36\n",
-       "    alt                                       (time) float32 330.0 ... 330.0\n",
-       "Attributes: (12/26)\n",
-       "    Conventions:           ARM-1.0 CF/Radial instrument_parameters\n",
-       "    title:                 Atmospheric Radiation Measurement (ARM) program X-...\n",
-       "    institution:           United States Department of Energy - Atmospheric R...\n",
-       "    references:            See XSAPR Instrument Handbook\n",
-       "    source:                Atmospheric Radiation Measurement (ARM) program X-...\n",
-       "    comment:               Data in this file has not be calibrated, corrected...\n",
-       "    ...                    ...\n",
-       "    original_container:    sigmet\n",
-       "    history:               created by user rjackson on machine or-condo-c215....\n",
-       "    _file_dates:           ['20190122', '20190122', '20190122', '20190122', '...\n",
-       "    _file_times:           ['001346', '003457', '005559', '011706', '013821',...\n",
-       "    _datastream:           sgpadicmac2I4.c1\n",
-       "    _arm_standards_flag:   1
" - ], - "text/plain": [ - "\n", - "Dimensions: (time: 97200, range: 1001,\n", - " sweep: 18)\n", - "Coordinates:\n", - " * time (time) datetime64[ns] 2019-01-2...\n", - " * range (range) float32 0.0 ... 1e+05\n", - " azimuth (time) float32 dask.array\n", - " elevation (time) float32 dask.array\n", - "Dimensions without coordinates: sweep\n", - "Data variables: (12/49)\n", - " base_time (time) datetime64[ns] 2019-01-2...\n", - " time_offset (time) datetime64[ns] 2019-01-2...\n", - " reflectivity (time, range) float32 dask.array\n", - " cross_correlation_ratio_hv (time, range) float32 dask.array\n", - " normalized_coherent_power (time, range) float32 dask.array\n", - " mean_doppler_velocity (time, range) float32 dask.array\n", - " ... ...\n", - " path_integrated_attenuation (time, range) float32 dask.array\n", - " corrected_differential_reflectivity (time, range) float32 dask.array\n", - " ground_clutter (time, range) float32 dask.array\n", - " lat (time) float32 36.58 ... 36.58\n", - " lon (time) float32 -97.36 ... -97.36\n", - " alt (time) float32 330.0 ... 330.0\n", - "Attributes: (12/26)\n", - " Conventions: ARM-1.0 CF/Radial instrument_parameters\n", - " title: Atmospheric Radiation Measurement (ARM) program X-...\n", - " institution: United States Department of Energy - Atmospheric R...\n", - " references: See XSAPR Instrument Handbook\n", - " source: Atmospheric Radiation Measurement (ARM) program X-...\n", - " comment: Data in this file has not be calibrated, corrected...\n", - " ... ...\n", - " original_container: sigmet\n", - " history: created by user rjackson on machine or-condo-c215....\n", - " _file_dates: ['20190122', '20190122', '20190122', '20190122', '...\n", - " _file_times: ['001346', '003457', '005559', '011706', '013821',...\n", - " _datastream: sgpadicmac2I4.c1\n", - " _arm_standards_flag: 1" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load files as a single dataset\n", - "files_list = files_filter \n", - "ds = act.io.armfiles.read_netcdf(files_list)\n", - "ds.clean.cleanup()\n", - "print(f'{len(files_list)} files loaded')\n", - "ds\n" - ] - }, - { - "cell_type": "markdown", - "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", - "metadata": {}, - "source": [ - "## Plot time series data\n", - "#### Define the list of variables to be plotted:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "d09b789e-84f1-4605-846b-a72c110c8048", - "metadata": {}, - "outputs": [], - "source": [ - "variables_to_plot = ['reflectivity', 'cross_correlation_ratio_hv', 'normalized_coherent_power']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", - "metadata": {}, - "outputs": [], - "source": [ - "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", - "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", - "\n", - "for i,v in enumerate(variables_to_plot):\n", - " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", - " ts_ax.grid()\n", - "\n", - "plt.show()\n" - ] - }, - { - "cell_type": "markdown", - "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", - "metadata": {}, - "source": [ - "## Field selection dropdown menu\n", - "Select variable to be plotted from a dropdown menu" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", - "metadata": {}, - "outputs": [], - "source": [ - "plt.ioff()\n", - "\n", - "# populate dropdown menu with available variables \n", - "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", - "d_variable = 'reflectivity'\n", - "dropdown = widgets.Dropdown(\n", - " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", - " value= d_variable,\n", - " description='Field:',\n", - " disabled=False,\n", - ")\n", - "dropdown.layout.margin = '0px 30% 0px 20%'\n", - "dropdown.layout.width = '50%'\n", - "\n", - "# set up display\n", - "i_display = act.plotting.TimeSeriesDisplay(ds)\n", - "i_display.add_subplots((1,), figsize = (9.5,5))\n", - "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", - "i_ax.grid()\n", - "i_fig = i_display.fig\n", - "\n", - "# update plot callback function\n", - "def update_plot(change):\n", - " i_ax.cla()\n", - " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", - " i_ax_new.grid()\n", - " i_fig.canvas.draw()\n", - " i_fig.canvas.flush_events()\n", - "\n", - "dropdown.observe(update_plot, names='value')\n", - "\n", - "widgets.AppLayout(\n", - " header=dropdown,\n", - " center=i_fig.canvas,\n", - " pane_heights=[1, 6,1]\n", - ")\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "state": {}, - "version_major": 2, - "version_minor": 0 - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/VAPs/quicklook/KAZRCOR/.ipynb_checkpoints/kazrcorge.c1-checkpoint.ipynb b/VAPs/quicklook/KAZRCOR/.ipynb_checkpoints/kazrcorge.c1-checkpoint.ipynb deleted file mode 100644 index bf322ff2..00000000 --- a/VAPs/quicklook/KAZRCOR/.ipynb_checkpoints/kazrcorge.c1-checkpoint.ipynb +++ /dev/null @@ -1,445 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "70840257-70e4-45e2-b491-14bff5a257a3", - "metadata": {}, - "source": [ - "# KAZRCORGE.C1 Plots\n", - "\n", - "[Click here](https://www.arm.gov/capabilities/vaps/kazrcor) for more information about this vap." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "460fd89f-e034-452c-b837-f65c5958264f", - "metadata": {}, - "outputs": [], - "source": [ - "%matplotlib widget\n", - "import ipywidgets as widgets\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import ipywidgets as widgets\n", - "import numpy as np\n", - "import pandas as pd\n", - "import os\n", - "from datetime import datetime\n", - "\n", - "import act\n", - "import xarray as xr\n", - "\n", - "# Data archive directory\n", - "DATA_DIR = r'/data/archive/'\n", - "\n", - "# Datastream info\n", - "DATASTREAM_NAME = 'kazrcorge'\n", - "DATA_LEVEL = 'c1'\n", - "LOCATIONS = [{'end_date': '2014-02-07', 'facility': 'C1', 'site': 'nsa', 'start_date': '2011-11-11'}, {'end_date': '2014-03-15', 'facility': 'C1', 'site': 'sgp', 'start_date': '2011-01-18'}, {'end_date': '2014-03-16', 'facility': 'C1', 'site': 'twp', 'start_date': '2011-03-12'}, {'end_date': '2014-05-03', 'facility': 'C3', 'site': 'twp', 'start_date': '2011-01-27'}]" - ] - }, - { - "cell_type": "markdown", - "id": "9faaf875", - "metadata": {}, - "source": [ - "## Define site, facility, and date range" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "ac6764f5", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The following locations and date ranges are available for this VAP:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sitefacilitystart_dateend_date
0nsaC12011-11-112014-02-07
1sgpC12011-01-182014-03-15
2twpC12011-03-122014-03-16
3twpC32011-01-272014-05-03
\n", - "
" - ], - "text/plain": [ - " site facility start_date end_date\n", - "0 nsa C1 2011-11-11 2014-02-07\n", - "1 sgp C1 2011-01-18 2014-03-15\n", - "2 twp C1 2011-03-12 2014-03-16\n", - "3 twp C3 2011-01-27 2014-05-03" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "print(\"The following locations and date ranges are available for this VAP:\")\n", - "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" - ] - }, - { - "cell_type": "markdown", - "id": "8d132223", - "metadata": {}, - "source": [ - "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "e563983a", - "metadata": {}, - "outputs": [], - "source": [ - "site_facility = ( 'sgp', 'C1' )\n", - "\n", - "date_start = '2014-03-14'\n", - "date_end = '2014-03-15'" - ] - }, - { - "cell_type": "markdown", - "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", - "metadata": {}, - "source": [ - "## Load data files\n", - "Load data files from /data/archive/" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'/data/archive/sgp/sgpkazrcorgeC1.c1'" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Compile list of files\n", - "site, facility = site_facility\n", - "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", - "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", - "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", - "dir_path\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "6be8f3dc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['20140314', '20140315']" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from datetime import date, timedelta\n", - "import pandas as pd\n", - "\n", - "def get_ARM_formated_dates(start_date, end_date):\n", - " \"\"\"\n", - " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", - " EXAMPLE:\n", - " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", - " >> [\"20180219\", \"20180220\", \"20180221\"] \n", - " \"\"\"\n", - " \n", - " _start_date = pd.to_datetime(start_date)\n", - " _end_date = pd.to_datetime(end_date)\n", - " \n", - " delta = _end_date - _start_date # returns timedelta \n", - " dates = []\n", - "\n", - " for i in range(delta.days + 1):\n", - " day = _start_date + timedelta(days=i)\n", - " day_formated = day.strftime(format=\"%Y%m%d\")\n", - " dates.append(day_formated)\n", - " return dates\n", - "\n", - "\n", - "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "51feea2e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['/data/archive/sgp/sgpkazrcorgeC1.c1/sgpkazrcorgeC1.c1.20140314.000001.nc',\n", - " '/data/archive/sgp/sgpkazrcorgeC1.c1/sgpkazrcorgeC1.c1.20140315.000002.nc']" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Filter a list of files based on date pattern\n", - "import glob\n", - "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", - "files_filter = []\n", - "for date in dates:\n", - " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", - " files_filter\n", - "files_filter" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b0e5d0d7", - "metadata": {}, - "outputs": [], - "source": [ - "# Load files as a single dataset\n", - "files_list = files_filter \n", - "ds = act.io.armfiles.read_netcdf(files_list)\n", - "ds.clean.cleanup()\n", - "print(f'{len(files_list)} files loaded')\n", - "ds\n" - ] - }, - { - "cell_type": "markdown", - "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", - "metadata": {}, - "source": [ - "## Plot time series data\n", - "#### Define the list of variables to be plotted:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d09b789e-84f1-4605-846b-a72c110c8048", - "metadata": {}, - "outputs": [], - "source": [ - "variables_to_plot = ['reflectivity', 'mean_doppler_velocity', 'spectral_width']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", - "metadata": {}, - "outputs": [], - "source": [ - "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", - "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", - "\n", - "for i,v in enumerate(variables_to_plot):\n", - " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", - " ts_ax.grid()\n", - "\n", - "plt.show()\n" - ] - }, - { - "cell_type": "markdown", - "id": "194399aa-1907-452b-8ba9-bc31d7f60291", - "metadata": {}, - "source": [ - "## Quality check plots\n", - "#### Define variable for QC plot" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", - "metadata": {}, - "outputs": [], - "source": [ - "qc_variable = 'reflectivity'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", - "metadata": {}, - "outputs": [], - "source": [ - "# QC Plot\n", - "if ('qc_'+qc_variable) in ds.variables:\n", - "\n", - " # Plot\n", - " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", - " qc_display.add_subplots((2,), figsize = (9.5,10))\n", - " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", - " qc_ax.grid()\n", - " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", - "\n", - " plt.show()\n", - "else:\n", - " print(f'QC not available for the selected field: {qc_variable}')\n" - ] - }, - { - "cell_type": "markdown", - "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", - "metadata": {}, - "source": [ - "## Field selection dropdown menu\n", - "Select variable to be plotted from a dropdown menu" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", - "metadata": {}, - "outputs": [], - "source": [ - "plt.ioff()\n", - "\n", - "# populate dropdown menu with available variables \n", - "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", - "d_variable = 'reflectivity'\n", - "dropdown = widgets.Dropdown(\n", - " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", - " value= d_variable,\n", - " description='Field:',\n", - " disabled=False,\n", - ")\n", - "dropdown.layout.margin = '0px 30% 0px 20%'\n", - "dropdown.layout.width = '50%'\n", - "\n", - "# set up display\n", - "i_display = act.plotting.TimeSeriesDisplay(ds)\n", - "i_display.add_subplots((1,), figsize = (9.5,5))\n", - "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", - "i_ax.grid()\n", - "i_fig = i_display.fig\n", - "\n", - "# update plot callback function\n", - "def update_plot(change):\n", - " i_ax.cla()\n", - " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", - " i_ax_new.grid()\n", - " i_fig.canvas.draw()\n", - " i_fig.canvas.flush_events()\n", - "\n", - "dropdown.observe(update_plot, names='value')\n", - "\n", - "widgets.AppLayout(\n", - " header=dropdown,\n", - " center=i_fig.canvas,\n", - " pane_heights=[1, 6,1]\n", - ")\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "state": {}, - "version_major": 2, - "version_minor": 0 - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/VAPs/quicklook/KAZRCOR/.ipynb_checkpoints/kazrcorhi.c1-checkpoint.ipynb b/VAPs/quicklook/KAZRCOR/.ipynb_checkpoints/kazrcorhi.c1-checkpoint.ipynb deleted file mode 100644 index 5595ed42..00000000 --- a/VAPs/quicklook/KAZRCOR/.ipynb_checkpoints/kazrcorhi.c1-checkpoint.ipynb +++ /dev/null @@ -1,1856 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "70840257-70e4-45e2-b491-14bff5a257a3", - "metadata": {}, - "source": [ - "# KAZRCORHI.C1 Plots\n", - "\n", - "[Click here](https://www.arm.gov/capabilities/vaps/kazrcor) for more information about this vap." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "460fd89f-e034-452c-b837-f65c5958264f", - "metadata": {}, - "outputs": [], - "source": [ - "%matplotlib widget\n", - "import ipywidgets as widgets\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import ipywidgets as widgets\n", - "import numpy as np\n", - "import pandas as pd\n", - "import os\n", - "from datetime import datetime\n", - "\n", - "import act\n", - "import xarray as xr\n", - "\n", - "# Data archive directory\n", - "DATA_DIR = r'/data/archive/'\n", - "\n", - "# Datastream info\n", - "DATASTREAM_NAME = 'kazrcorhi'\n", - "DATA_LEVEL = 'c1'\n", - "LOCATIONS = [{'end_date': '2014-03-16', 'facility': 'C1', 'site': 'twp', 'start_date': '2011-03-12'}, {'end_date': '2014-05-03', 'facility': 'C3', 'site': 'twp', 'start_date': '2011-01-27'}]" - ] - }, - { - "cell_type": "markdown", - "id": "9faaf875", - "metadata": {}, - "source": [ - "## Define site, facility, and date range" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "ac6764f5", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The following locations and date ranges are available for this VAP:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sitefacilitystart_dateend_date
0twpC12011-03-122014-03-16
1twpC32011-01-272014-05-03
\n", - "
" - ], - "text/plain": [ - " site facility start_date end_date\n", - "0 twp C1 2011-03-12 2014-03-16\n", - "1 twp C3 2011-01-27 2014-05-03" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "print(\"The following locations and date ranges are available for this VAP:\")\n", - "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" - ] - }, - { - "cell_type": "markdown", - "id": "8d132223", - "metadata": {}, - "source": [ - "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "e563983a", - "metadata": {}, - "outputs": [], - "source": [ - "site_facility = ( 'twp', 'C1' )\n", - "\n", - "date_start = '2014-03-15'\n", - "date_end = '2014-03-16'" - ] - }, - { - "cell_type": "markdown", - "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", - "metadata": {}, - "source": [ - "## Load data files\n", - "Load data files from /data/archive/" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'/data/archive/twp/twpkazrcorhiC1.c1'" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Compile list of files\n", - "site, facility = site_facility\n", - "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", - "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", - "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", - "dir_path\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "6be8f3dc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['20140315', '20140316']" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from datetime import date, timedelta\n", - "import pandas as pd\n", - "\n", - "def get_ARM_formated_dates(start_date, end_date):\n", - " \"\"\"\n", - " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", - " EXAMPLE:\n", - " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", - " >> [\"20180219\", \"20180220\", \"20180221\"] \n", - " \"\"\"\n", - " \n", - " _start_date = pd.to_datetime(start_date)\n", - " _end_date = pd.to_datetime(end_date)\n", - " \n", - " delta = _end_date - _start_date # returns timedelta \n", - " dates = []\n", - "\n", - " for i in range(delta.days + 1):\n", - " day = _start_date + timedelta(days=i)\n", - " day_formated = day.strftime(format=\"%Y%m%d\")\n", - " dates.append(day_formated)\n", - " return dates\n", - "\n", - "\n", - "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "51feea2e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['/data/archive/twp/twpkazrcorhiC1.c1/twpkazrcorhiC1.c1.20140315.000001.nc',\n", - " '/data/archive/twp/twpkazrcorhiC1.c1/twpkazrcorhiC1.c1.20140316.000001.nc']" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Filter a list of files based on date pattern\n", - "import glob\n", - "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", - "files_filter = []\n", - "for date in dates:\n", - " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", - " files_filter\n", - "files_filter" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "b0e5d0d7", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2 files loaded\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset>\n",
-       "Dimensions:                                   (time: 89645, range: 516)\n",
-       "Coordinates:\n",
-       "  * time                                      (time) datetime64[ns] 2014-03-1...\n",
-       "  * range                                     (range) float32 2.007e+03 ... 1...\n",
-       "Data variables: (12/23)\n",
-       "    base_time                                 (time) datetime64[ns] 2014-03-1...\n",
-       "    time_offset                               (time) datetime64[ns] 2014-03-1...\n",
-       "    reflectivity_copol                        (time, range) float32 dask.array<chunksize=(46771, 516), meta=np.ndarray>\n",
-       "    qc_reflectivity_copol                     (time, range) int32 dask.array<chunksize=(46771, 516), meta=np.ndarray>\n",
-       "    gaseous_attenuation_correction_copol      (time, range) float32 dask.array<chunksize=(46771, 516), meta=np.ndarray>\n",
-       "    qc_gaseous_attenuation_correction_copol   (time, range) int32 dask.array<chunksize=(46771, 516), meta=np.ndarray>\n",
-       "    ...                                        ...\n",
-       "    qc_rh                                     (time, range) int32 dask.array<chunksize=(46771, 516), meta=np.ndarray>\n",
-       "    bar_pres                                  (time, range) float32 dask.array<chunksize=(46771, 516), meta=np.ndarray>\n",
-       "    qc_bar_pres                               (time, range) int32 dask.array<chunksize=(46771, 516), meta=np.ndarray>\n",
-       "    lat                                       (time) float32 -2.06 ... -2.06\n",
-       "    lon                                       (time) float32 147.4 ... 147.4\n",
-       "    alt                                       (time) float32 4.0 4.0 ... 4.0 4.0\n",
-       "Attributes: (12/32)\n",
-       "    command_line:                idl -R -n kazrcor -s twp -f C1 -b 20140315 -...\n",
-       "    Conventions:                 ARM-1.1\n",
-       "    process_version:             vap-kazrcor-1.6-0.el6\n",
-       "    input_datastreams:           twpkazrgeC1.b1 : 1.3 : 20140315.000001\\ntwpk...\n",
-       "    dod_version:                 kazrcorhi-c1-1.3\n",
-       "    site_id:                     twp\n",
-       "    ...                          ...\n",
-       "    doi:                         10.5439/1228772\n",
-       "    history:                     created by user ttoto on machine chalk at 20...\n",
-       "    _file_dates:                 ['20140315', '20140316']\n",
-       "    _file_times:                 ['000001', '000001']\n",
-       "    _datastream:                 twpkazrcorhiC1.c1\n",
-       "    _arm_standards_flag:         1
" - ], - "text/plain": [ - "\n", - "Dimensions: (time: 89645, range: 516)\n", - "Coordinates:\n", - " * time (time) datetime64[ns] 2014-03-1...\n", - " * range (range) float32 2.007e+03 ... 1...\n", - "Data variables: (12/23)\n", - " base_time (time) datetime64[ns] 2014-03-1...\n", - " time_offset (time) datetime64[ns] 2014-03-1...\n", - " reflectivity_copol (time, range) float32 dask.array\n", - " qc_reflectivity_copol (time, range) int32 dask.array\n", - " gaseous_attenuation_correction_copol (time, range) float32 dask.array\n", - " qc_gaseous_attenuation_correction_copol (time, range) int32 dask.array\n", - " ... ...\n", - " qc_rh (time, range) int32 dask.array\n", - " bar_pres (time, range) float32 dask.array\n", - " qc_bar_pres (time, range) int32 dask.array\n", - " lat (time) float32 -2.06 ... -2.06\n", - " lon (time) float32 147.4 ... 147.4\n", - " alt (time) float32 4.0 4.0 ... 4.0 4.0\n", - "Attributes: (12/32)\n", - " command_line: idl -R -n kazrcor -s twp -f C1 -b 20140315 -...\n", - " Conventions: ARM-1.1\n", - " process_version: vap-kazrcor-1.6-0.el6\n", - " input_datastreams: twpkazrgeC1.b1 : 1.3 : 20140315.000001\\ntwpk...\n", - " dod_version: kazrcorhi-c1-1.3\n", - " site_id: twp\n", - " ... ...\n", - " doi: 10.5439/1228772\n", - " history: created by user ttoto on machine chalk at 20...\n", - " _file_dates: ['20140315', '20140316']\n", - " _file_times: ['000001', '000001']\n", - " _datastream: twpkazrcorhiC1.c1\n", - " _arm_standards_flag: 1" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load files as a single dataset\n", - "files_list = files_filter \n", - "ds = act.io.armfiles.read_netcdf(files_list)\n", - "ds.clean.cleanup()\n", - "print(f'{len(files_list)} files loaded')\n", - "ds\n" - ] - }, - { - "cell_type": "markdown", - "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", - "metadata": {}, - "source": [ - "## Plot time series data\n", - "#### Define the list of variables to be plotted:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "d09b789e-84f1-4605-846b-a72c110c8048", - "metadata": {}, - "outputs": [], - "source": [ - "variables_to_plot = ['reflectivity', 'mean_doppler_velocity', 'spectral_width']" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", - "metadata": {}, - "outputs": [ - { - "ename": "KeyError", - "evalue": "'reflectivity'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[9], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m ts_display\u001b[38;5;241m.\u001b[39madd_subplots((\u001b[38;5;28mlen\u001b[39m(variables_to_plot),), figsize \u001b[38;5;241m=\u001b[39m (\u001b[38;5;241m9.5\u001b[39m,\u001b[38;5;241m4\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mlen\u001b[39m(variables_to_plot)))\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i,v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(variables_to_plot):\n\u001b[0;32m----> 5\u001b[0m ts_ax \u001b[38;5;241m=\u001b[39m ts_display\u001b[38;5;241m.\u001b[39mplot(v, subplot_index\u001b[38;5;241m=\u001b[39m(i,), set_title\u001b[38;5;241m=\u001b[39m\u001b[43mds\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvariables\u001b[49m\u001b[43m[\u001b[49m\u001b[43mv\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241m.\u001b[39mattrs[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlong_name\u001b[39m\u001b[38;5;124m'\u001b[39m],)\n\u001b[1;32m 6\u001b[0m ts_ax\u001b[38;5;241m.\u001b[39mgrid()\n\u001b[1;32m 8\u001b[0m plt\u001b[38;5;241m.\u001b[39mshow()\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/utils.py:453\u001b[0m, in \u001b[0;36mFrozen.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 452\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__getitem__\u001b[39m(\u001b[38;5;28mself\u001b[39m, key: K) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m V:\n\u001b[0;32m--> 453\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmapping\u001b[49m\u001b[43m[\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m]\u001b[49m\n", - "\u001b[0;31mKeyError\u001b[0m: 'reflectivity'" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "f666994b48ed49da969503777e133ba7", - "version_major": 2, - "version_minor": 0 - }, - "image/png": "", - "text/html": [ - "\n", - "
\n", - "
\n", - " Figure\n", - "
\n", - " \n", - "
\n", - " " - ], - "text/plain": [ - "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", - "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", - "\n", - "for i,v in enumerate(variables_to_plot):\n", - " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", - " ts_ax.grid()\n", - "\n", - "plt.show()\n" - ] - }, - { - "cell_type": "markdown", - "id": "194399aa-1907-452b-8ba9-bc31d7f60291", - "metadata": {}, - "source": [ - "## Quality check plots\n", - "#### Define variable for QC plot" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", - "metadata": {}, - "outputs": [], - "source": [ - "qc_variable = 'reflectivity'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", - "metadata": {}, - "outputs": [], - "source": [ - "# QC Plot\n", - "if ('qc_'+qc_variable) in ds.variables:\n", - "\n", - " # Plot\n", - " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", - " qc_display.add_subplots((2,), figsize = (9.5,10))\n", - " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", - " qc_ax.grid()\n", - " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", - "\n", - " plt.show()\n", - "else:\n", - " print(f'QC not available for the selected field: {qc_variable}')\n" - ] - }, - { - "cell_type": "markdown", - "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", - "metadata": {}, - "source": [ - "## Field selection dropdown menu\n", - "Select variable to be plotted from a dropdown menu" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", - "metadata": {}, - "outputs": [], - "source": [ - "plt.ioff()\n", - "\n", - "# populate dropdown menu with available variables \n", - "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", - "d_variable = 'reflectivity'\n", - "dropdown = widgets.Dropdown(\n", - " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", - " value= d_variable,\n", - " description='Field:',\n", - " disabled=False,\n", - ")\n", - "dropdown.layout.margin = '0px 30% 0px 20%'\n", - "dropdown.layout.width = '50%'\n", - "\n", - "# set up display\n", - "i_display = act.plotting.TimeSeriesDisplay(ds)\n", - "i_display.add_subplots((1,), figsize = (9.5,5))\n", - "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", - "i_ax.grid()\n", - "i_fig = i_display.fig\n", - "\n", - "# update plot callback function\n", - "def update_plot(change):\n", - " i_ax.cla()\n", - " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", - " i_ax_new.grid()\n", - " i_fig.canvas.draw()\n", - " i_fig.canvas.flush_events()\n", - "\n", - "dropdown.observe(update_plot, names='value')\n", - "\n", - "widgets.AppLayout(\n", - " header=dropdown,\n", - " center=i_fig.canvas,\n", - " pane_heights=[1, 6,1]\n", - ")\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "state": {}, - "version_major": 2, - "version_minor": 0 - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/VAPs/quicklook/KAZRCOR/.ipynb_checkpoints/kazrcormd.c1-checkpoint.ipynb b/VAPs/quicklook/KAZRCOR/.ipynb_checkpoints/kazrcormd.c1-checkpoint.ipynb deleted file mode 100644 index 284e0d7b..00000000 --- a/VAPs/quicklook/KAZRCOR/.ipynb_checkpoints/kazrcormd.c1-checkpoint.ipynb +++ /dev/null @@ -1,2667 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "70840257-70e4-45e2-b491-14bff5a257a3", - "metadata": {}, - "source": [ - "# KAZRCORMD.C1 Plots\n", - "\n", - "[Click here](https://www.arm.gov/capabilities/vaps/kazrcor) for more information about this vap." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "460fd89f-e034-452c-b837-f65c5958264f", - "metadata": {}, - "outputs": [], - "source": [ - "%matplotlib widget\n", - "import ipywidgets as widgets\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import ipywidgets as widgets\n", - "import numpy as np\n", - "import pandas as pd\n", - "import os\n", - "from datetime import datetime\n", - "\n", - "import act\n", - "import xarray as xr\n", - "\n", - "# Data archive directory\n", - "DATA_DIR = r'/data/archive/'\n", - "\n", - "# Datastream info\n", - "DATASTREAM_NAME = 'kazrcormd'\n", - "DATA_LEVEL = 'c1'\n", - "LOCATIONS = [{'end_date': '2014-02-07', 'facility': 'C1', 'site': 'nsa', 'start_date': '2011-11-11'}, {'end_date': '2014-03-15', 'facility': 'C1', 'site': 'sgp', 'start_date': '2011-05-03'}]" - ] - }, - { - "cell_type": "markdown", - "id": "9faaf875", - "metadata": {}, - "source": [ - "## Define site, facility, and date range" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "ac6764f5", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The following locations and date ranges are available for this VAP:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sitefacilitystart_dateend_date
0nsaC12011-11-112014-02-07
1sgpC12011-05-032014-03-15
\n", - "
" - ], - "text/plain": [ - " site facility start_date end_date\n", - "0 nsa C1 2011-11-11 2014-02-07\n", - "1 sgp C1 2011-05-03 2014-03-15" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "print(\"The following locations and date ranges are available for this VAP:\")\n", - "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" - ] - }, - { - "cell_type": "markdown", - "id": "8d132223", - "metadata": {}, - "source": [ - "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "e563983a", - "metadata": {}, - "outputs": [], - "source": [ - "site_facility = ( 'sgp', 'C1' )\n", - "\n", - "date_start = '2014-03-14'\n", - "date_end = '2014-03-15'" - ] - }, - { - "cell_type": "markdown", - "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", - "metadata": {}, - "source": [ - "## Load data files\n", - "Load data files from /data/archive/" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'/data/archive/sgp/sgpkazrcormdC1.c1'" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Compile list of files\n", - "site, facility = site_facility\n", - "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", - "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", - "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", - "dir_path\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "6be8f3dc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['20140314', '20140315']" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from datetime import date, timedelta\n", - "import pandas as pd\n", - "\n", - "def get_ARM_formated_dates(start_date, end_date):\n", - " \"\"\"\n", - " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", - " EXAMPLE:\n", - " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", - " >> [\"20180219\", \"20180220\", \"20180221\"] \n", - " \"\"\"\n", - " \n", - " _start_date = pd.to_datetime(start_date)\n", - " _end_date = pd.to_datetime(end_date)\n", - " \n", - " delta = _end_date - _start_date # returns timedelta \n", - " dates = []\n", - "\n", - " for i in range(delta.days + 1):\n", - " day = _start_date + timedelta(days=i)\n", - " day_formated = day.strftime(format=\"%Y%m%d\")\n", - " dates.append(day_formated)\n", - " return dates\n", - "\n", - "\n", - "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "51feea2e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['/data/archive/sgp/sgpkazrcormdC1.c1/sgpkazrcormdC1.c1.20140314.000001.nc',\n", - " '/data/archive/sgp/sgpkazrcormdC1.c1/sgpkazrcormdC1.c1.20140315.000002.nc']" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Filter a list of files based on date pattern\n", - "import glob\n", - "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", - "files_filter = []\n", - "for date in dates:\n", - " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", - " files_filter\n", - "files_filter" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "b0e5d0d7", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "72 files loaded\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset>\n",
-       "Dimensions:                                   (time: 23386, range: 652)\n",
-       "Coordinates:\n",
-       "  * time                                      (time) datetime64[ns] 2014-03-1...\n",
-       "  * range                                     (range) float32 718.1 ... 2.023...\n",
-       "Data variables: (12/39)\n",
-       "    base_time                                 datetime64[ns] 2014-03-14\n",
-       "    time_offset                               (time) datetime64[ns] 2014-03-1...\n",
-       "    reflectivity_copol                        (time, range) float32 dask.array<chunksize=(23386, 652), meta=np.ndarray>\n",
-       "    qc_reflectivity_copol                     (time, range) int32 dask.array<chunksize=(23386, 652), meta=np.ndarray>\n",
-       "    gaseous_attenuation_correction_copol      (time, range) float32 dask.array<chunksize=(23386, 652), meta=np.ndarray>\n",
-       "    qc_gaseous_attenuation_correction_copol   (time, range) int32 dask.array<chunksize=(23386, 652), meta=np.ndarray>\n",
-       "    ...                                        ...\n",
-       "    qc_rh                                     (time, range) int32 dask.array<chunksize=(23386, 652), meta=np.ndarray>\n",
-       "    bar_pres                                  (time, range) float32 dask.array<chunksize=(23386, 652), meta=np.ndarray>\n",
-       "    qc_bar_pres                               (time, range) int32 dask.array<chunksize=(23386, 652), meta=np.ndarray>\n",
-       "    lat                                       float32 ...\n",
-       "    lon                                       float32 ...\n",
-       "    alt                                       float32 ...\n",
-       "Attributes: (12/33)\n",
-       "    command_line:                idl -R -n kazrcor -s sgp -f C1 -b 20140314 -...\n",
-       "    Conventions:                 ARM-1.1\n",
-       "    process_version:             vap-kazrcor-1.6-0.el6\n",
-       "    input_datastreams:           sgpkazrgeC1.b1 : 1.3 : 20140314.000001\\nsgpk...\n",
-       "    dod_version:                 kazrcormd-c1-2.0\n",
-       "    site_id:                     sgp\n",
-       "    ...                          ...\n",
-       "    doi:                         10.5439/1228771\n",
-       "    history:                     created by user ttoto on machine chalk at 20...\n",
-       "    _file_dates:                 ['20140314']\n",
-       "    _file_times:                 ['000001']\n",
-       "    _datastream:                 sgpkazrcormdC1.c1\n",
-       "    _arm_standards_flag:         1
" - ], - "text/plain": [ - "\n", - "Dimensions: (time: 23386, range: 652)\n", - "Coordinates:\n", - " * time (time) datetime64[ns] 2014-03-1...\n", - " * range (range) float32 718.1 ... 2.023...\n", - "Data variables: (12/39)\n", - " base_time datetime64[ns] 2014-03-14\n", - " time_offset (time) datetime64[ns] 2014-03-1...\n", - " reflectivity_copol (time, range) float32 dask.array\n", - " qc_reflectivity_copol (time, range) int32 dask.array\n", - " gaseous_attenuation_correction_copol (time, range) float32 dask.array\n", - " qc_gaseous_attenuation_correction_copol (time, range) int32 dask.array\n", - " ... ...\n", - " qc_rh (time, range) int32 dask.array\n", - " bar_pres (time, range) float32 dask.array\n", - " qc_bar_pres (time, range) int32 dask.array\n", - " lat float32 ...\n", - " lon float32 ...\n", - " alt float32 ...\n", - "Attributes: (12/33)\n", - " command_line: idl -R -n kazrcor -s sgp -f C1 -b 20140314 -...\n", - " Conventions: ARM-1.1\n", - " process_version: vap-kazrcor-1.6-0.el6\n", - " input_datastreams: sgpkazrgeC1.b1 : 1.3 : 20140314.000001\\nsgpk...\n", - " dod_version: kazrcormd-c1-2.0\n", - " site_id: sgp\n", - " ... ...\n", - " doi: 10.5439/1228771\n", - " history: created by user ttoto on machine chalk at 20...\n", - " _file_dates: ['20140314']\n", - " _file_times: ['000001']\n", - " _datastream: sgpkazrcormdC1.c1\n", - " _arm_standards_flag: 1" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load files as a single dataset\n", - "files_list = files_filter [0]\n", - "ds = act.io.armfiles.read_netcdf(files_list)\n", - "ds.clean.cleanup()\n", - "print(f'{len(files_list)} files loaded')\n", - "ds\n" - ] - }, - { - "cell_type": "markdown", - "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", - "metadata": {}, - "source": [ - "## Plot time series data\n", - "#### Define the list of variables to be plotted:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "d09b789e-84f1-4605-846b-a72c110c8048", - "metadata": {}, - "outputs": [], - "source": [ - "variables_to_plot = ['reflectivity', 'mean_doppler_velocity', 'spectral_width']" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", - "metadata": {}, - "outputs": [ - { - "ename": "KeyError", - "evalue": "'reflectivity'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[9], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m ts_display\u001b[38;5;241m.\u001b[39madd_subplots((\u001b[38;5;28mlen\u001b[39m(variables_to_plot),), figsize \u001b[38;5;241m=\u001b[39m (\u001b[38;5;241m9.5\u001b[39m,\u001b[38;5;241m4\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mlen\u001b[39m(variables_to_plot)))\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i,v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(variables_to_plot):\n\u001b[0;32m----> 5\u001b[0m ts_ax \u001b[38;5;241m=\u001b[39m ts_display\u001b[38;5;241m.\u001b[39mplot(v, subplot_index\u001b[38;5;241m=\u001b[39m(i,), set_title\u001b[38;5;241m=\u001b[39m\u001b[43mds\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvariables\u001b[49m\u001b[43m[\u001b[49m\u001b[43mv\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241m.\u001b[39mattrs[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlong_name\u001b[39m\u001b[38;5;124m'\u001b[39m],)\n\u001b[1;32m 6\u001b[0m ts_ax\u001b[38;5;241m.\u001b[39mgrid()\n\u001b[1;32m 8\u001b[0m plt\u001b[38;5;241m.\u001b[39mshow()\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/utils.py:453\u001b[0m, in \u001b[0;36mFrozen.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 452\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__getitem__\u001b[39m(\u001b[38;5;28mself\u001b[39m, key: K) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m V:\n\u001b[0;32m--> 453\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmapping\u001b[49m\u001b[43m[\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m]\u001b[49m\n", - "\u001b[0;31mKeyError\u001b[0m: 'reflectivity'" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "fe6a6740ed58487aa977d2bfc40e0bdc", - "version_major": 2, - "version_minor": 0 - }, - "image/png": "", - "text/html": [ - "\n", - "
\n", - "
\n", - " Figure\n", - "
\n", - " \n", - "
\n", - " " - ], - "text/plain": [ - "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", - "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", - "\n", - "for i,v in enumerate(variables_to_plot):\n", - " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", - " ts_ax.grid()\n", - "\n", - "plt.show()\n" - ] - }, - { - "cell_type": "markdown", - "id": "194399aa-1907-452b-8ba9-bc31d7f60291", - "metadata": {}, - "source": [ - "## Quality check plots\n", - "#### Define variable for QC plot" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", - "metadata": {}, - "outputs": [], - "source": [ - "qc_variable = 'reflectivity'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", - "metadata": {}, - "outputs": [], - "source": [ - "# QC Plot\n", - "if ('qc_'+qc_variable) in ds.variables:\n", - "\n", - " # Plot\n", - " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", - " qc_display.add_subplots((2,), figsize = (9.5,10))\n", - " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", - " qc_ax.grid()\n", - " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", - "\n", - " plt.show()\n", - "else:\n", - " print(f'QC not available for the selected field: {qc_variable}')\n" - ] - }, - { - "cell_type": "markdown", - "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", - "metadata": {}, - "source": [ - "## Field selection dropdown menu\n", - "Select variable to be plotted from a dropdown menu" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", - "metadata": {}, - "outputs": [], - "source": [ - "plt.ioff()\n", - "\n", - "# populate dropdown menu with available variables \n", - "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", - "d_variable = 'reflectivity'\n", - "dropdown = widgets.Dropdown(\n", - " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", - " value= d_variable,\n", - " description='Field:',\n", - " disabled=False,\n", - ")\n", - "dropdown.layout.margin = '0px 30% 0px 20%'\n", - "dropdown.layout.width = '50%'\n", - "\n", - "# set up display\n", - "i_display = act.plotting.TimeSeriesDisplay(ds)\n", - "i_display.add_subplots((1,), figsize = (9.5,5))\n", - "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", - "i_ax.grid()\n", - "i_fig = i_display.fig\n", - "\n", - "# update plot callback function\n", - "def update_plot(change):\n", - " i_ax.cla()\n", - " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", - " i_ax_new.grid()\n", - " i_fig.canvas.draw()\n", - " i_fig.canvas.flush_events()\n", - "\n", - "dropdown.observe(update_plot, names='value')\n", - "\n", - "widgets.AppLayout(\n", - " header=dropdown,\n", - " center=i_fig.canvas,\n", - " pane_heights=[1, 6,1]\n", - ")\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "state": {}, - "version_major": 2, - "version_minor": 0 - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/VAPs/quicklook/MFRSRCLDOD/.ipynb_checkpoints/mfrsrcldod1min.c1-checkpoint.ipynb b/VAPs/quicklook/MFRSRCLDOD/.ipynb_checkpoints/mfrsrcldod1min.c1-checkpoint.ipynb deleted file mode 100644 index 41b70061..00000000 --- a/VAPs/quicklook/MFRSRCLDOD/.ipynb_checkpoints/mfrsrcldod1min.c1-checkpoint.ipynb +++ /dev/null @@ -1,799 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "70840257-70e4-45e2-b491-14bff5a257a3", - "metadata": {}, - "source": [ - "# MFRSRCLDOD1MIN.C1 Plots\n", - "\n", - "[Click here](https://www.arm.gov/capabilities/vaps/mfrsrcldod) for more information about this vap." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "460fd89f-e034-452c-b837-f65c5958264f", - "metadata": {}, - "outputs": [], - "source": [ - "%matplotlib widget\n", - "import ipywidgets as widgets\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import ipywidgets as widgets\n", - "import numpy as np\n", - "import pandas as pd\n", - "import os\n", - "from datetime import datetime\n", - "\n", - "import act\n", - "import xarray as xr\n", - "\n", - "# Data archive directory\n", - "DATA_DIR = r'/data/archive/'\n", - "\n", - "# Datastream info\n", - "DATASTREAM_NAME = 'mfrsrcldod1min'\n", - "DATA_LEVEL = 'c1'\n", - "LOCATIONS = [{'end_date': '2017-11-01', 'facility': 'M1', 'site': 'asi', 'start_date': '2016-05-02'}, {'end_date': '2019-04-30', 'facility': 'M1', 'site': 'cor', 'start_date': '2018-09-18'}, {'end_date': '2013-07-01', 'facility': 'M1', 'site': 'pvc', 'start_date': '2012-07-09'}, {'end_date': '2015-08-29', 'facility': 'M1', 'site': 'mao', 'start_date': '2015-04-17'}, {'end_date': '2012-04-01', 'facility': 'M1', 'site': 'pgh', 'start_date': '2011-07-19'}, {'end_date': '2012-02-05', 'facility': 'M1', 'site': 'gan', 'start_date': '2011-10-09'}, {'end_date': '2010-12-29', 'facility': 'M1', 'site': 'grw', 'start_date': '2009-05-04'}, {'end_date': '2018-03-13', 'facility': 'S1', 'site': 'mcq', 'start_date': '2016-04-01'}, {'end_date': '2007-11-15', 'facility': 'M1', 'site': 'fkb', 'start_date': '2007-05-07'}, {'end_date': '2019-10-27', 'facility': 'C1', 'site': 'ena', 'start_date': '2014-06-01'}, {'end_date': '2005-09-15', 'facility': 'M1', 'site': 'pye', 'start_date': '2005-02-19'}, {'end_date': '2021-01-29', 'facility': 'C1', 'site': 'sgp', 'start_date': '1998-01-01'}, {'end_date': '2011-10-19', 'facility': 'E10', 'site': 'sgp', 'start_date': '1997-10-31'}, {'end_date': '2021-09-21', 'facility': 'E11', 'site': 'sgp', 'start_date': '1997-08-23'}, {'end_date': '2021-09-21', 'facility': 'E12', 'site': 'sgp', 'start_date': '2001-07-24'}, {'end_date': '2022-06-30', 'facility': 'E13', 'site': 'sgp', 'start_date': '1998-07-10'}, {'end_date': '2021-09-21', 'facility': 'E15', 'site': 'sgp', 'start_date': '1997-09-10'}, {'end_date': '2011-11-15', 'facility': 'E16', 'site': 'sgp', 'start_date': '1997-08-21'}, {'end_date': '2009-11-17', 'facility': 'E18', 'site': 'sgp', 'start_date': '1997-10-17'}, {'end_date': '2011-05-23', 'facility': 'E19', 'site': 'sgp', 'start_date': '1998-07-09'}, {'end_date': '2009-10-14', 'facility': 'E1', 'site': 'sgp', 'start_date': '1997-11-21'}, {'end_date': '2011-11-17', 'facility': 'E20', 'site': 'sgp', 'start_date': '1999-04-24'}, {'end_date': '2009-12-01', 'facility': 'E22', 'site': 'sgp', 'start_date': '1999-01-15'}, {'end_date': '2009-11-14', 'facility': 'E24', 'site': 'sgp', 'start_date': '1997-11-26'}, {'end_date': '2002-04-08', 'facility': 'E25', 'site': 'sgp', 'start_date': '1998-01-11'}, {'end_date': '2009-12-04', 'facility': 'E27', 'site': 'sgp', 'start_date': '2003-12-30'}, {'end_date': '2009-10-20', 'facility': 'E2', 'site': 'sgp', 'start_date': '1997-11-05'}, {'end_date': '2021-06-22', 'facility': 'E31', 'site': 'sgp', 'start_date': '2011-11-26'}, {'end_date': '2021-09-21', 'facility': 'E32', 'site': 'sgp', 'start_date': '2011-11-26'}, {'end_date': '2021-09-21', 'facility': 'E33', 'site': 'sgp', 'start_date': '2011-09-27'}, {'end_date': '2021-09-21', 'facility': 'E34', 'site': 'sgp', 'start_date': '2011-09-28'}, {'end_date': '2021-09-21', 'facility': 'E35', 'site': 'sgp', 'start_date': '2011-09-28'}, {'end_date': '2021-09-21', 'facility': 'E36', 'site': 'sgp', 'start_date': '2011-10-18'}, {'end_date': '2021-09-21', 'facility': 'E37', 'site': 'sgp', 'start_date': '2011-11-02'}, {'end_date': '2017-10-15', 'facility': 'E38', 'site': 'sgp', 'start_date': '2011-12-15'}, {'end_date': '2009-10-28', 'facility': 'E3', 'site': 'sgp', 'start_date': '1998-07-24'}, {'end_date': '2011-09-26', 'facility': 'E4', 'site': 'sgp', 'start_date': '1997-12-20'}, {'end_date': '2009-11-02', 'facility': 'E5', 'site': 'sgp', 'start_date': '1998-03-22'}, {'end_date': '2011-10-18', 'facility': 'E6', 'site': 'sgp', 'start_date': '2003-12-19'}, {'end_date': '2011-11-14', 'facility': 'E7', 'site': 'sgp', 'start_date': '1999-07-12'}, {'end_date': '2009-11-10', 'facility': 'E8', 'site': 'sgp', 'start_date': '1997-09-03'}, {'end_date': '2021-09-21', 'facility': 'E9', 'site': 'sgp', 'start_date': '2008-03-25'}, {'end_date': '2014-06-04', 'facility': 'C1', 'site': 'twp', 'start_date': '1999-10-22'}, {'end_date': '2013-09-09', 'facility': 'C2', 'site': 'twp', 'start_date': '1999-09-08'}, {'end_date': '2014-10-05', 'facility': 'C3', 'site': 'twp', 'start_date': '2002-03-07'}]" - ] - }, - { - "cell_type": "markdown", - "id": "9faaf875", - "metadata": {}, - "source": [ - "## Define site, facility, and date range" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "ac6764f5", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The following locations and date ranges are available for this VAP:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sitefacilitystart_dateend_date
0asiM12016-05-022017-11-01
1corM12018-09-182019-04-30
2pvcM12012-07-092013-07-01
3maoM12015-04-172015-08-29
4pghM12011-07-192012-04-01
5ganM12011-10-092012-02-05
6grwM12009-05-042010-12-29
7mcqS12016-04-012018-03-13
8fkbM12007-05-072007-11-15
9enaC12014-06-012019-10-27
10pyeM12005-02-192005-09-15
11sgpC11998-01-012021-01-29
12sgpE101997-10-312011-10-19
13sgpE111997-08-232021-09-21
14sgpE122001-07-242021-09-21
15sgpE131998-07-102022-06-30
16sgpE151997-09-102021-09-21
17sgpE161997-08-212011-11-15
18sgpE181997-10-172009-11-17
19sgpE191998-07-092011-05-23
20sgpE11997-11-212009-10-14
21sgpE201999-04-242011-11-17
22sgpE221999-01-152009-12-01
23sgpE241997-11-262009-11-14
24sgpE251998-01-112002-04-08
25sgpE272003-12-302009-12-04
26sgpE21997-11-052009-10-20
27sgpE312011-11-262021-06-22
28sgpE322011-11-262021-09-21
29sgpE332011-09-272021-09-21
30sgpE342011-09-282021-09-21
31sgpE352011-09-282021-09-21
32sgpE362011-10-182021-09-21
33sgpE372011-11-022021-09-21
34sgpE382011-12-152017-10-15
35sgpE31998-07-242009-10-28
36sgpE41997-12-202011-09-26
37sgpE51998-03-222009-11-02
38sgpE62003-12-192011-10-18
39sgpE71999-07-122011-11-14
40sgpE81997-09-032009-11-10
41sgpE92008-03-252021-09-21
42twpC11999-10-222014-06-04
43twpC21999-09-082013-09-09
44twpC32002-03-072014-10-05
\n", - "
" - ], - "text/plain": [ - " site facility start_date end_date\n", - "0 asi M1 2016-05-02 2017-11-01\n", - "1 cor M1 2018-09-18 2019-04-30\n", - "2 pvc M1 2012-07-09 2013-07-01\n", - "3 mao M1 2015-04-17 2015-08-29\n", - "4 pgh M1 2011-07-19 2012-04-01\n", - "5 gan M1 2011-10-09 2012-02-05\n", - "6 grw M1 2009-05-04 2010-12-29\n", - "7 mcq S1 2016-04-01 2018-03-13\n", - "8 fkb M1 2007-05-07 2007-11-15\n", - "9 ena C1 2014-06-01 2019-10-27\n", - "10 pye M1 2005-02-19 2005-09-15\n", - "11 sgp C1 1998-01-01 2021-01-29\n", - "12 sgp E10 1997-10-31 2011-10-19\n", - "13 sgp E11 1997-08-23 2021-09-21\n", - "14 sgp E12 2001-07-24 2021-09-21\n", - "15 sgp E13 1998-07-10 2022-06-30\n", - "16 sgp E15 1997-09-10 2021-09-21\n", - "17 sgp E16 1997-08-21 2011-11-15\n", - "18 sgp E18 1997-10-17 2009-11-17\n", - "19 sgp E19 1998-07-09 2011-05-23\n", - "20 sgp E1 1997-11-21 2009-10-14\n", - "21 sgp E20 1999-04-24 2011-11-17\n", - "22 sgp E22 1999-01-15 2009-12-01\n", - "23 sgp E24 1997-11-26 2009-11-14\n", - "24 sgp E25 1998-01-11 2002-04-08\n", - "25 sgp E27 2003-12-30 2009-12-04\n", - "26 sgp E2 1997-11-05 2009-10-20\n", - "27 sgp E31 2011-11-26 2021-06-22\n", - "28 sgp E32 2011-11-26 2021-09-21\n", - "29 sgp E33 2011-09-27 2021-09-21\n", - "30 sgp E34 2011-09-28 2021-09-21\n", - "31 sgp E35 2011-09-28 2021-09-21\n", - "32 sgp E36 2011-10-18 2021-09-21\n", - "33 sgp E37 2011-11-02 2021-09-21\n", - "34 sgp E38 2011-12-15 2017-10-15\n", - "35 sgp E3 1998-07-24 2009-10-28\n", - "36 sgp E4 1997-12-20 2011-09-26\n", - "37 sgp E5 1998-03-22 2009-11-02\n", - "38 sgp E6 2003-12-19 2011-10-18\n", - "39 sgp E7 1999-07-12 2011-11-14\n", - "40 sgp E8 1997-09-03 2009-11-10\n", - "41 sgp E9 2008-03-25 2021-09-21\n", - "42 twp C1 1999-10-22 2014-06-04\n", - "43 twp C2 1999-09-08 2013-09-09\n", - "44 twp C3 2002-03-07 2014-10-05" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "print(\"The following locations and date ranges are available for this VAP:\")\n", - "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" - ] - }, - { - "cell_type": "markdown", - "id": "8d132223", - "metadata": {}, - "source": [ - "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "e563983a", - "metadata": {}, - "outputs": [], - "source": [ - "site_facility = ( 'sgp', 'C1' )\n", - "\n", - "date_start = '2021-01-27'\n", - "date_end = '2021-01-29'" - ] - }, - { - "cell_type": "markdown", - "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", - "metadata": {}, - "source": [ - "## Load data files\n", - "Load data files from /data/archive/" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'/data/archive/sgp/sgpmfrsrcldod1minC1.c1'" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Compile list of files\n", - "site, facility = site_facility\n", - "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", - "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", - "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", - "dir_path\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "6be8f3dc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['20210127', '20210128', '20210129']" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from datetime import date, timedelta\n", - "import pandas as pd\n", - "\n", - "def get_ARM_formated_dates(start_date, end_date):\n", - " \"\"\"\n", - " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", - " EXAMPLE:\n", - " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", - " >> [\"20180219\", \"20180220\", \"20180221\"] \n", - " \"\"\"\n", - " \n", - " _start_date = pd.to_datetime(start_date)\n", - " _end_date = pd.to_datetime(end_date)\n", - " \n", - " delta = _end_date - _start_date # returns timedelta \n", - " dates = []\n", - "\n", - " for i in range(delta.days + 1):\n", - " day = _start_date + timedelta(days=i)\n", - " day_formated = day.strftime(format=\"%Y%m%d\")\n", - " dates.append(day_formated)\n", - " return dates\n", - "\n", - "\n", - "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "51feea2e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['/data/archive/sgp/sgpmfrsrcldod1minC1.c1/sgpmfrsrcldod1minC1.c1.20210127.000000.cdf',\n", - " '/data/archive/sgp/sgpmfrsrcldod1minC1.c1/sgpmfrsrcldod1minC1.c1.20210128.000000.cdf',\n", - " '/data/archive/sgp/sgpmfrsrcldod1minC1.c1/sgpmfrsrcldod1minC1.c1.20210129.000000.cdf']" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Filter a list of files based on date pattern\n", - "import glob\n", - "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", - "files_filter = []\n", - "for date in dates:\n", - " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", - " files_filter\n", - "files_filter" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "b0e5d0d7", - "metadata": {}, - "outputs": [ - { - "ename": "ValueError", - "evalue": "cannot reindex or align along dimension 'n_Io' because of conflicting dimension sizes: {113, 114}", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[7], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Load files as a single dataset\u001b[39;00m\n\u001b[1;32m 2\u001b[0m files_list \u001b[38;5;241m=\u001b[39m files_filter \n\u001b[0;32m----> 3\u001b[0m ds \u001b[38;5;241m=\u001b[39m \u001b[43mact\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mio\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marmfiles\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_netcdf\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfiles_list\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4\u001b[0m ds\u001b[38;5;241m.\u001b[39mclean\u001b[38;5;241m.\u001b[39mcleanup()\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlen\u001b[39m(files_list)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m files loaded\u001b[39m\u001b[38;5;124m'\u001b[39m)\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/io/armfiles.py:168\u001b[0m, in \u001b[0;36mread_netcdf\u001b[0;34m(filenames, concat_dim, return_None, combine, decode_times, use_cftime, use_base_time, combine_attrs, cleanup_qc, keep_variables, **kwargs)\u001b[0m\n\u001b[1;32m 164\u001b[0m ds \u001b[38;5;241m=\u001b[39m xr\u001b[38;5;241m.\u001b[39mopen_mfdataset(filenames, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 166\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 167\u001b[0m \u001b[38;5;66;03m# When all else fails raise the orginal exception\u001b[39;00m\n\u001b[0;32m--> 168\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exception\n\u001b[1;32m 170\u001b[0m \u001b[38;5;66;03m# If requested use base_time and time_offset to derive time. Assumes that the units\u001b[39;00m\n\u001b[1;32m 171\u001b[0m \u001b[38;5;66;03m# of both are in seconds and that the value is number of seconds since epoch.\u001b[39;00m\n\u001b[1;32m 172\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m use_base_time:\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/io/armfiles.py:143\u001b[0m, in \u001b[0;36mread_netcdf\u001b[0;34m(filenames, concat_dim, return_None, combine, decode_times, use_cftime, use_base_time, combine_attrs, cleanup_qc, keep_variables, **kwargs)\u001b[0m\n\u001b[1;32m 139\u001b[0m except_tuple \u001b[38;5;241m=\u001b[39m except_tuple \u001b[38;5;241m+\u001b[39m (\u001b[38;5;167;01mFileNotFoundError\u001b[39;00m, \u001b[38;5;167;01mOSError\u001b[39;00m)\n\u001b[1;32m 141\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 142\u001b[0m \u001b[38;5;66;03m# Read data file with Xarray function\u001b[39;00m\n\u001b[0;32m--> 143\u001b[0m ds \u001b[38;5;241m=\u001b[39m \u001b[43mxr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen_mfdataset\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilenames\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 145\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m except_tuple \u001b[38;5;28;01mas\u001b[39;00m exception:\n\u001b[1;32m 146\u001b[0m \u001b[38;5;66;03m# If requested return None for File not found error\u001b[39;00m\n\u001b[1;32m 147\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(exception)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mFileNotFoundError\u001b[39m\u001b[38;5;124m'\u001b[39m:\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/backends/api.py:1026\u001b[0m, in \u001b[0;36mopen_mfdataset\u001b[0;34m(paths, chunks, concat_dim, compat, preprocess, engine, data_vars, coords, combine, parallel, join, attrs_file, combine_attrs, **kwargs)\u001b[0m\n\u001b[1;32m 1013\u001b[0m combined \u001b[38;5;241m=\u001b[39m _nested_combine(\n\u001b[1;32m 1014\u001b[0m datasets,\n\u001b[1;32m 1015\u001b[0m concat_dims\u001b[38;5;241m=\u001b[39mconcat_dim,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1021\u001b[0m combine_attrs\u001b[38;5;241m=\u001b[39mcombine_attrs,\n\u001b[1;32m 1022\u001b[0m )\n\u001b[1;32m 1023\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m combine \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mby_coords\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 1024\u001b[0m \u001b[38;5;66;03m# Redo ordering from coordinates, ignoring how they were ordered\u001b[39;00m\n\u001b[1;32m 1025\u001b[0m \u001b[38;5;66;03m# previously\u001b[39;00m\n\u001b[0;32m-> 1026\u001b[0m combined \u001b[38;5;241m=\u001b[39m \u001b[43mcombine_by_coords\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1027\u001b[0m \u001b[43m \u001b[49m\u001b[43mdatasets\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1028\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1029\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_vars\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_vars\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1030\u001b[0m \u001b[43m \u001b[49m\u001b[43mcoords\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcoords\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1031\u001b[0m \u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1032\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombine_attrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcombine_attrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1033\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1034\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1035\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 1036\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m is an invalid option for the keyword argument\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1037\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m ``combine``\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(combine)\n\u001b[1;32m 1038\u001b[0m )\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/combine.py:982\u001b[0m, in \u001b[0;36mcombine_by_coords\u001b[0;34m(data_objects, compat, data_vars, coords, fill_value, join, combine_attrs, datasets)\u001b[0m\n\u001b[1;32m 980\u001b[0m concatenated_grouped_by_data_vars \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m 981\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m \u001b[38;5;28mvars\u001b[39m, datasets_with_same_vars \u001b[38;5;129;01min\u001b[39;00m grouped_by_vars:\n\u001b[0;32m--> 982\u001b[0m concatenated \u001b[38;5;241m=\u001b[39m \u001b[43m_combine_single_variable_hypercube\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 983\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mlist\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mdatasets_with_same_vars\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 984\u001b[0m \u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 985\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_vars\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_vars\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 986\u001b[0m \u001b[43m \u001b[49m\u001b[43mcoords\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcoords\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 987\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 988\u001b[0m \u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 989\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombine_attrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcombine_attrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 990\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 991\u001b[0m concatenated_grouped_by_data_vars\u001b[38;5;241m.\u001b[39mappend(concatenated)\n\u001b[1;32m 993\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m merge(\n\u001b[1;32m 994\u001b[0m concatenated_grouped_by_data_vars,\n\u001b[1;32m 995\u001b[0m compat\u001b[38;5;241m=\u001b[39mcompat,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 998\u001b[0m combine_attrs\u001b[38;5;241m=\u001b[39mcombine_attrs,\n\u001b[1;32m 999\u001b[0m )\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/combine.py:640\u001b[0m, in \u001b[0;36m_combine_single_variable_hypercube\u001b[0;34m(datasets, fill_value, data_vars, coords, compat, join, combine_attrs)\u001b[0m\n\u001b[1;32m 637\u001b[0m _check_dimension_depth_tile_ids(combined_ids)\n\u001b[1;32m 639\u001b[0m \u001b[38;5;66;03m# Concatenate along all of concat_dims one by one to create single ds\u001b[39;00m\n\u001b[0;32m--> 640\u001b[0m concatenated \u001b[38;5;241m=\u001b[39m \u001b[43m_combine_nd\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 641\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombined_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 642\u001b[0m \u001b[43m \u001b[49m\u001b[43mconcat_dims\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconcat_dims\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 643\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_vars\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_vars\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 644\u001b[0m \u001b[43m \u001b[49m\u001b[43mcoords\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcoords\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 645\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 646\u001b[0m \u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 647\u001b[0m \u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 648\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombine_attrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcombine_attrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 649\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 651\u001b[0m \u001b[38;5;66;03m# Check the overall coordinates are monotonically increasing\u001b[39;00m\n\u001b[1;32m 652\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m dim \u001b[38;5;129;01min\u001b[39;00m concat_dims:\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/combine.py:239\u001b[0m, in \u001b[0;36m_combine_nd\u001b[0;34m(combined_ids, concat_dims, data_vars, coords, compat, fill_value, join, combine_attrs)\u001b[0m\n\u001b[1;32m 235\u001b[0m \u001b[38;5;66;03m# Each iteration of this loop reduces the length of the tile_ids tuples\u001b[39;00m\n\u001b[1;32m 236\u001b[0m \u001b[38;5;66;03m# by one. It always combines along the first dimension, removing the first\u001b[39;00m\n\u001b[1;32m 237\u001b[0m \u001b[38;5;66;03m# element of the tuple\u001b[39;00m\n\u001b[1;32m 238\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m concat_dim \u001b[38;5;129;01min\u001b[39;00m concat_dims:\n\u001b[0;32m--> 239\u001b[0m combined_ids \u001b[38;5;241m=\u001b[39m \u001b[43m_combine_all_along_first_dim\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 240\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombined_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 241\u001b[0m \u001b[43m \u001b[49m\u001b[43mdim\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconcat_dim\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 242\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_vars\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_vars\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 243\u001b[0m \u001b[43m \u001b[49m\u001b[43mcoords\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcoords\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 244\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 245\u001b[0m \u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 246\u001b[0m \u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 247\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombine_attrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcombine_attrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 248\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 249\u001b[0m (combined_ds,) \u001b[38;5;241m=\u001b[39m combined_ids\u001b[38;5;241m.\u001b[39mvalues()\n\u001b[1;32m 250\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m combined_ds\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/combine.py:275\u001b[0m, in \u001b[0;36m_combine_all_along_first_dim\u001b[0;34m(combined_ids, dim, data_vars, coords, compat, fill_value, join, combine_attrs)\u001b[0m\n\u001b[1;32m 273\u001b[0m combined_ids \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mdict\u001b[39m(\u001b[38;5;28msorted\u001b[39m(group))\n\u001b[1;32m 274\u001b[0m datasets \u001b[38;5;241m=\u001b[39m combined_ids\u001b[38;5;241m.\u001b[39mvalues()\n\u001b[0;32m--> 275\u001b[0m new_combined_ids[new_id] \u001b[38;5;241m=\u001b[39m \u001b[43m_combine_1d\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 276\u001b[0m \u001b[43m \u001b[49m\u001b[43mdatasets\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdim\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata_vars\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcoords\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcombine_attrs\u001b[49m\n\u001b[1;32m 277\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 278\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m new_combined_ids\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/combine.py:298\u001b[0m, in \u001b[0;36m_combine_1d\u001b[0;34m(datasets, concat_dim, compat, data_vars, coords, fill_value, join, combine_attrs)\u001b[0m\n\u001b[1;32m 296\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m concat_dim \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 297\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 298\u001b[0m combined \u001b[38;5;241m=\u001b[39m \u001b[43mconcat\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 299\u001b[0m \u001b[43m \u001b[49m\u001b[43mdatasets\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 300\u001b[0m \u001b[43m \u001b[49m\u001b[43mdim\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconcat_dim\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 301\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_vars\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_vars\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 302\u001b[0m \u001b[43m \u001b[49m\u001b[43mcoords\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcoords\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 303\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 304\u001b[0m \u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 305\u001b[0m \u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 306\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombine_attrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcombine_attrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 307\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 308\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[1;32m 309\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mencountered unexpected variable\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mstr\u001b[39m(err):\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/concat.py:248\u001b[0m, in \u001b[0;36mconcat\u001b[0;34m(objs, dim, data_vars, coords, compat, positions, fill_value, join, combine_attrs)\u001b[0m\n\u001b[1;32m 236\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _dataarray_concat(\n\u001b[1;32m 237\u001b[0m objs,\n\u001b[1;32m 238\u001b[0m dim\u001b[38;5;241m=\u001b[39mdim,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 245\u001b[0m combine_attrs\u001b[38;5;241m=\u001b[39mcombine_attrs,\n\u001b[1;32m 246\u001b[0m )\n\u001b[1;32m 247\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(first_obj, Dataset):\n\u001b[0;32m--> 248\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_dataset_concat\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 249\u001b[0m \u001b[43m \u001b[49m\u001b[43mobjs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 250\u001b[0m \u001b[43m \u001b[49m\u001b[43mdim\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdim\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 251\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_vars\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_vars\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 252\u001b[0m \u001b[43m \u001b[49m\u001b[43mcoords\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcoords\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 253\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 254\u001b[0m \u001b[43m \u001b[49m\u001b[43mpositions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpositions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 255\u001b[0m \u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 256\u001b[0m \u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 257\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombine_attrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcombine_attrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 258\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 259\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 260\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\n\u001b[1;32m 261\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcan only concatenate xarray Dataset and DataArray \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 262\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mobjects, got \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(first_obj)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 263\u001b[0m )\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/concat.py:471\u001b[0m, in \u001b[0;36m_dataset_concat\u001b[0;34m(datasets, dim, data_vars, coords, compat, positions, fill_value, join, combine_attrs)\u001b[0m\n\u001b[1;32m 468\u001b[0m \u001b[38;5;66;03m# Make sure we're working on a copy (we'll be loading variables)\u001b[39;00m\n\u001b[1;32m 469\u001b[0m datasets \u001b[38;5;241m=\u001b[39m [ds\u001b[38;5;241m.\u001b[39mcopy() \u001b[38;5;28;01mfor\u001b[39;00m ds \u001b[38;5;129;01min\u001b[39;00m datasets]\n\u001b[1;32m 470\u001b[0m datasets \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(\n\u001b[0;32m--> 471\u001b[0m \u001b[43malign\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mdatasets\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexclude\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[43mdim\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfill_value\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 472\u001b[0m )\n\u001b[1;32m 474\u001b[0m dim_coords, dims_sizes, coord_names, data_names \u001b[38;5;241m=\u001b[39m _parse_datasets(datasets)\n\u001b[1;32m 475\u001b[0m dim_names \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m(dim_coords)\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/alignment.py:797\u001b[0m, in \u001b[0;36malign\u001b[0;34m(join, copy, indexes, exclude, fill_value, *objects)\u001b[0m\n\u001b[1;32m 601\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 602\u001b[0m \u001b[38;5;124;03mGiven any number of Dataset and/or DataArray objects, returns new\u001b[39;00m\n\u001b[1;32m 603\u001b[0m \u001b[38;5;124;03mobjects with aligned indexes and dimension sizes.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 787\u001b[0m \n\u001b[1;32m 788\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 789\u001b[0m aligner \u001b[38;5;241m=\u001b[39m Aligner(\n\u001b[1;32m 790\u001b[0m objects,\n\u001b[1;32m 791\u001b[0m join\u001b[38;5;241m=\u001b[39mjoin,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 795\u001b[0m fill_value\u001b[38;5;241m=\u001b[39mfill_value,\n\u001b[1;32m 796\u001b[0m )\n\u001b[0;32m--> 797\u001b[0m \u001b[43maligner\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43malign\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 798\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m aligner\u001b[38;5;241m.\u001b[39mresults\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/alignment.py:585\u001b[0m, in \u001b[0;36mAligner.align\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 583\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39massert_no_index_conflict()\n\u001b[1;32m 584\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39malign_indexes()\n\u001b[0;32m--> 585\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43massert_unindexed_dim_sizes_equal\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 587\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mjoin \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124moverride\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 588\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moverride_indexes()\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/alignment.py:484\u001b[0m, in \u001b[0;36mAligner.assert_unindexed_dim_sizes_equal\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 482\u001b[0m add_err_msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 483\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(sizes) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[0;32m--> 484\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 485\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcannot reindex or align along dimension \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdim\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 486\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbecause of conflicting dimension sizes: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00msizes\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m add_err_msg\n\u001b[1;32m 487\u001b[0m )\n", - "\u001b[0;31mValueError\u001b[0m: cannot reindex or align along dimension 'n_Io' because of conflicting dimension sizes: {113, 114}" - ] - } - ], - "source": [ - "# Load files as a single dataset\n", - "files_list = files_filter \n", - "ds = act.io.armfiles.read_netcdf(files_list)\n", - "ds.clean.cleanup()\n", - "print(f'{len(files_list)} files loaded')\n", - "ds\n" - ] - }, - { - "cell_type": "markdown", - "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", - "metadata": {}, - "source": [ - "## Plot time series data\n", - "#### Define the list of variables to be plotted:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d09b789e-84f1-4605-846b-a72c110c8048", - "metadata": {}, - "outputs": [], - "source": [ - "variables_to_plot = ['optical_depth_instantaneous']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", - "metadata": {}, - "outputs": [], - "source": [ - "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", - "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", - "\n", - "for i,v in enumerate(variables_to_plot):\n", - " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", - " ts_ax.grid()\n", - "\n", - "plt.show()\n" - ] - }, - { - "cell_type": "markdown", - "id": "194399aa-1907-452b-8ba9-bc31d7f60291", - "metadata": {}, - "source": [ - "## Quality check plots\n", - "#### Define variable for QC plot" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", - "metadata": {}, - "outputs": [], - "source": [ - "qc_variable = 'optical_depth_instantaneous'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", - "metadata": {}, - "outputs": [], - "source": [ - "# QC Plot\n", - "if ('qc_'+qc_variable) in ds.variables:\n", - "\n", - " # Plot\n", - " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", - " qc_display.add_subplots((2,), figsize = (9.5,10))\n", - " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", - " qc_ax.grid()\n", - " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", - "\n", - " plt.show()\n", - "else:\n", - " print(f'QC not available for the selected field: {qc_variable}')\n" - ] - }, - { - "cell_type": "markdown", - "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", - "metadata": {}, - "source": [ - "## Field selection dropdown menu\n", - "Select variable to be plotted from a dropdown menu" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", - "metadata": {}, - "outputs": [], - "source": [ - "plt.ioff()\n", - "\n", - "# populate dropdown menu with available variables \n", - "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", - "d_variable = 'optical_depth_instantaneous'\n", - "dropdown = widgets.Dropdown(\n", - " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", - " value= d_variable,\n", - " description='Field:',\n", - " disabled=False,\n", - ")\n", - "dropdown.layout.margin = '0px 30% 0px 20%'\n", - "dropdown.layout.width = '50%'\n", - "\n", - "# set up display\n", - "i_display = act.plotting.TimeSeriesDisplay(ds)\n", - "i_display.add_subplots((1,), figsize = (9.5,5))\n", - "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", - "i_ax.grid()\n", - "i_fig = i_display.fig\n", - "\n", - "# update plot callback function\n", - "def update_plot(change):\n", - " i_ax.cla()\n", - " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", - " i_ax_new.grid()\n", - " i_fig.canvas.draw()\n", - " i_fig.canvas.flush_events()\n", - "\n", - "dropdown.observe(update_plot, names='value')\n", - "\n", - "widgets.AppLayout(\n", - " header=dropdown,\n", - " center=i_fig.canvas,\n", - " pane_heights=[1, 6,1]\n", - ")\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "state": {}, - "version_major": 2, - "version_minor": 0 - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/VAPs/quicklook/MICROBASE/.ipynb_checkpoints/microbasepi2.c1-checkpoint.ipynb b/VAPs/quicklook/MICROBASE/.ipynb_checkpoints/microbasepi2.c1-checkpoint.ipynb deleted file mode 100644 index 72c386b4..00000000 --- a/VAPs/quicklook/MICROBASE/.ipynb_checkpoints/microbasepi2.c1-checkpoint.ipynb +++ /dev/null @@ -1,468 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "70840257-70e4-45e2-b491-14bff5a257a3", - "metadata": {}, - "source": [ - "# MICROBASEPI2.C1 Plots\n", - "\n", - "[Click here](https://www.arm.gov/capabilities/vaps/microbase) for more information about this vap." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "460fd89f-e034-452c-b837-f65c5958264f", - "metadata": {}, - "outputs": [], - "source": [ - "%matplotlib widget\n", - "import ipywidgets as widgets\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import ipywidgets as widgets\n", - "import numpy as np\n", - "import pandas as pd\n", - "import os\n", - "from datetime import datetime\n", - "\n", - "import act\n", - "import xarray as xr\n", - "\n", - "# Data archive directory\n", - "DATA_DIR = r'/data/archive/'\n", - "\n", - "# Datastream info\n", - "DATASTREAM_NAME = 'microbasepi2'\n", - "DATA_LEVEL = 'c1'\n", - "LOCATIONS = [{'end_date': '2011-03-22', 'facility': 'C1', 'site': 'nsa', 'start_date': '2002-01-01'}, {'end_date': '2010-12-30', 'facility': 'C1', 'site': 'sgp', 'start_date': '1996-11-08'}, {'end_date': '2011-02-25', 'facility': 'C1', 'site': 'twp', 'start_date': '1999-07-01'}, {'end_date': '2009-02-13', 'facility': 'C2', 'site': 'twp', 'start_date': '2002-01-01'}, {'end_date': '2011-02-27', 'facility': 'C3', 'site': 'twp', 'start_date': '2005-11-04'}]" - ] - }, - { - "cell_type": "markdown", - "id": "9faaf875", - "metadata": {}, - "source": [ - "## Define site, facility, and date range" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "ac6764f5", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The following locations and date ranges are available for this VAP:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sitefacilitystart_dateend_date
0nsaC12002-01-012011-03-22
1sgpC11996-11-082010-12-30
2twpC11999-07-012011-02-25
3twpC22002-01-012009-02-13
4twpC32005-11-042011-02-27
\n", - "
" - ], - "text/plain": [ - " site facility start_date end_date\n", - "0 nsa C1 2002-01-01 2011-03-22\n", - "1 sgp C1 1996-11-08 2010-12-30\n", - "2 twp C1 1999-07-01 2011-02-25\n", - "3 twp C2 2002-01-01 2009-02-13\n", - "4 twp C3 2005-11-04 2011-02-27" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "print(\"The following locations and date ranges are available for this VAP:\")\n", - "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" - ] - }, - { - "cell_type": "markdown", - "id": "8d132223", - "metadata": {}, - "source": [ - "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "e563983a", - "metadata": {}, - "outputs": [], - "source": [ - "site_facility = ( 'sgp', 'C1' )\n", - "\n", - "date_start = '2010-12-28'\n", - "date_end = '2010-12-30'" - ] - }, - { - "cell_type": "markdown", - "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", - "metadata": {}, - "source": [ - "## Load data files\n", - "Load data files from /data/archive/" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'/data/archive/sgp/sgpmicrobasepi2C1.c1'" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Compile list of files\n", - "site, facility = site_facility\n", - "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", - "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", - "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", - "dir_path\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "6be8f3dc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['20101228', '20101229', '20101230']" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from datetime import date, timedelta\n", - "import pandas as pd\n", - "\n", - "def get_ARM_formated_dates(start_date, end_date):\n", - " \"\"\"\n", - " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", - " EXAMPLE:\n", - " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", - " >> [\"20180219\", \"20180220\", \"20180221\"] \n", - " \"\"\"\n", - " \n", - " _start_date = pd.to_datetime(start_date)\n", - " _end_date = pd.to_datetime(end_date)\n", - " \n", - " delta = _end_date - _start_date # returns timedelta \n", - " dates = []\n", - "\n", - " for i in range(delta.days + 1):\n", - " day = _start_date + timedelta(days=i)\n", - " day_formated = day.strftime(format=\"%Y%m%d\")\n", - " dates.append(day_formated)\n", - " return dates\n", - "\n", - "\n", - "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "51feea2e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['/data/archive/sgp/sgpmicrobasepi2C1.c1/sgpmicrobasepi2C1.c1.20101228.000000.cdf',\n", - " '/data/archive/sgp/sgpmicrobasepi2C1.c1/sgpmicrobasepi2C1.c1.20101229.000000.cdf',\n", - " '/data/archive/sgp/sgpmicrobasepi2C1.c1/sgpmicrobasepi2C1.c1.20101230.000000.cdf']" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Filter a list of files based on date pattern\n", - "import glob\n", - "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", - "files_filter = []\n", - "for date in dates:\n", - " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", - " files_filter\n", - "files_filter" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "2d9b10cd", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: cftime in /home/kefeimo/.conda/envs/jupyter-vaps/lib/python3.8/site-packages (1.6.2)\n", - "Requirement already satisfied: numpy>1.13.3 in /home/kefeimo/.conda/envs/jupyter-vaps/lib/python3.8/site-packages (from cftime) (1.24.2)\n" - ] - } - ], - "source": [ - "! pip install cftime " - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "b0e5d0d7", - "metadata": {}, - "outputs": [ - { - "ename": "ValueError", - "evalue": "Failed to decode variable 'time_offset': unable to decode time units 'seconds since base_time' with 'the default calendar'. Try opening your dataset with decode_times=False or installing cftime if it is not installed.", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/coding/times.py:184\u001b[0m, in \u001b[0;36m_decode_cf_datetime_dtype\u001b[0;34m(data, units, calendar, use_cftime)\u001b[0m\n\u001b[1;32m 183\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 184\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mdecode_cf_datetime\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexample_value\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43munits\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcalendar\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43muse_cftime\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 185\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m:\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/coding/times.py:308\u001b[0m, in \u001b[0;36mdecode_cf_datetime\u001b[0;34m(num_dates, units, calendar, use_cftime)\u001b[0m\n\u001b[1;32m 307\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m use_cftime:\n\u001b[0;32m--> 308\u001b[0m dates \u001b[38;5;241m=\u001b[39m \u001b[43m_decode_datetime_with_cftime\u001b[49m\u001b[43m(\u001b[49m\u001b[43mflat_num_dates\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43munits\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcalendar\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 309\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/coding/times.py:208\u001b[0m, in \u001b[0;36m_decode_datetime_with_cftime\u001b[0;34m(num_dates, units, calendar)\u001b[0m\n\u001b[1;32m 206\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m num_dates\u001b[38;5;241m.\u001b[39msize \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 207\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m np\u001b[38;5;241m.\u001b[39masarray(\n\u001b[0;32m--> 208\u001b[0m \u001b[43mcftime\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnum2date\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnum_dates\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43munits\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcalendar\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43monly_use_cftime_datetimes\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 209\u001b[0m )\n\u001b[1;32m 210\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", - "File \u001b[0;32msrc/cftime/_cftime.pyx:580\u001b[0m, in \u001b[0;36mcftime._cftime.num2date\u001b[0;34m()\u001b[0m\n", - "File \u001b[0;32msrc/cftime/_cftime.pyx:110\u001b[0m, in \u001b[0;36mcftime._cftime._dateparse\u001b[0;34m()\u001b[0m\n", - "File \u001b[0;32msrc/cftime/_cftime.pyx:767\u001b[0m, in \u001b[0;36mcftime._cftime._parse_date\u001b[0;34m()\u001b[0m\n", - "\u001b[0;31mValueError\u001b[0m: Unable to parse date string 'base_time'", - "\nDuring handling of the above exception, another exception occurred:\n", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/conventions.py:551\u001b[0m, in \u001b[0;36mdecode_cf_variables\u001b[0;34m(variables, attributes, concat_characters, mask_and_scale, decode_times, decode_coords, drop_variables, use_cftime, decode_timedelta)\u001b[0m\n\u001b[1;32m 550\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 551\u001b[0m new_vars[k] \u001b[38;5;241m=\u001b[39m \u001b[43mdecode_cf_variable\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 552\u001b[0m \u001b[43m \u001b[49m\u001b[43mk\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 553\u001b[0m \u001b[43m \u001b[49m\u001b[43mv\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 554\u001b[0m \u001b[43m \u001b[49m\u001b[43mconcat_characters\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconcat_characters\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 555\u001b[0m \u001b[43m \u001b[49m\u001b[43mmask_and_scale\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmask_and_scale\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 556\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecode_times\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_times\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 557\u001b[0m \u001b[43m \u001b[49m\u001b[43mstack_char_dim\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstack_char_dim\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 558\u001b[0m \u001b[43m \u001b[49m\u001b[43muse_cftime\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muse_cftime\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 559\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecode_timedelta\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_timedelta\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 560\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 561\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/conventions.py:397\u001b[0m, in \u001b[0;36mdecode_cf_variable\u001b[0;34m(name, var, concat_characters, mask_and_scale, decode_times, decode_endianness, stack_char_dim, use_cftime, decode_timedelta)\u001b[0m\n\u001b[1;32m 396\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m decode_times:\n\u001b[0;32m--> 397\u001b[0m var \u001b[38;5;241m=\u001b[39m \u001b[43mtimes\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mCFDatetimeCoder\u001b[49m\u001b[43m(\u001b[49m\u001b[43muse_cftime\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muse_cftime\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdecode\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvar\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 399\u001b[0m dimensions, data, attributes, encoding \u001b[38;5;241m=\u001b[39m variables\u001b[38;5;241m.\u001b[39munpack_for_decoding(var)\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/coding/times.py:716\u001b[0m, in \u001b[0;36mCFDatetimeCoder.decode\u001b[0;34m(self, variable, name)\u001b[0m\n\u001b[1;32m 715\u001b[0m calendar \u001b[38;5;241m=\u001b[39m pop_to(attrs, encoding, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcalendar\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 716\u001b[0m dtype \u001b[38;5;241m=\u001b[39m \u001b[43m_decode_cf_datetime_dtype\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43munits\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcalendar\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43muse_cftime\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 717\u001b[0m transform \u001b[38;5;241m=\u001b[39m partial(\n\u001b[1;32m 718\u001b[0m decode_cf_datetime,\n\u001b[1;32m 719\u001b[0m units\u001b[38;5;241m=\u001b[39munits,\n\u001b[1;32m 720\u001b[0m calendar\u001b[38;5;241m=\u001b[39mcalendar,\n\u001b[1;32m 721\u001b[0m use_cftime\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39muse_cftime,\n\u001b[1;32m 722\u001b[0m )\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/coding/times.py:194\u001b[0m, in \u001b[0;36m_decode_cf_datetime_dtype\u001b[0;34m(data, units, calendar, use_cftime)\u001b[0m\n\u001b[1;32m 189\u001b[0m msg \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 190\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124munable to decode time units \u001b[39m\u001b[38;5;132;01m{\u001b[39;00munits\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m with \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcalendar_msg\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m. Try \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 191\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mopening your dataset with decode_times=False or installing cftime \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 192\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mif it is not installed.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 193\u001b[0m )\n\u001b[0;32m--> 194\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(msg)\n\u001b[1;32m 195\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", - "\u001b[0;31mValueError\u001b[0m: unable to decode time units 'seconds since base_time' with 'the default calendar'. Try opening your dataset with decode_times=False or installing cftime if it is not installed.", - "\nDuring handling of the above exception, another exception occurred:\n", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[7], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Load files as a single dataset\u001b[39;00m\n\u001b[1;32m 2\u001b[0m files_list \u001b[38;5;241m=\u001b[39m files_filter \n\u001b[0;32m----> 3\u001b[0m ds \u001b[38;5;241m=\u001b[39m \u001b[43mact\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mio\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marmfiles\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_netcdf\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfiles_list\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4\u001b[0m ds\u001b[38;5;241m.\u001b[39mclean\u001b[38;5;241m.\u001b[39mcleanup()\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlen\u001b[39m(files_list)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m files loaded\u001b[39m\u001b[38;5;124m'\u001b[39m)\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/io/armfiles.py:168\u001b[0m, in \u001b[0;36mread_netcdf\u001b[0;34m(filenames, concat_dim, return_None, combine, decode_times, use_cftime, use_base_time, combine_attrs, cleanup_qc, keep_variables, **kwargs)\u001b[0m\n\u001b[1;32m 164\u001b[0m ds \u001b[38;5;241m=\u001b[39m xr\u001b[38;5;241m.\u001b[39mopen_mfdataset(filenames, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 166\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 167\u001b[0m \u001b[38;5;66;03m# When all else fails raise the orginal exception\u001b[39;00m\n\u001b[0;32m--> 168\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exception\n\u001b[1;32m 170\u001b[0m \u001b[38;5;66;03m# If requested use base_time and time_offset to derive time. Assumes that the units\u001b[39;00m\n\u001b[1;32m 171\u001b[0m \u001b[38;5;66;03m# of both are in seconds and that the value is number of seconds since epoch.\u001b[39;00m\n\u001b[1;32m 172\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m use_base_time:\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/io/armfiles.py:143\u001b[0m, in \u001b[0;36mread_netcdf\u001b[0;34m(filenames, concat_dim, return_None, combine, decode_times, use_cftime, use_base_time, combine_attrs, cleanup_qc, keep_variables, **kwargs)\u001b[0m\n\u001b[1;32m 139\u001b[0m except_tuple \u001b[38;5;241m=\u001b[39m except_tuple \u001b[38;5;241m+\u001b[39m (\u001b[38;5;167;01mFileNotFoundError\u001b[39;00m, \u001b[38;5;167;01mOSError\u001b[39;00m)\n\u001b[1;32m 141\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 142\u001b[0m \u001b[38;5;66;03m# Read data file with Xarray function\u001b[39;00m\n\u001b[0;32m--> 143\u001b[0m ds \u001b[38;5;241m=\u001b[39m \u001b[43mxr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen_mfdataset\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilenames\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 145\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m except_tuple \u001b[38;5;28;01mas\u001b[39;00m exception:\n\u001b[1;32m 146\u001b[0m \u001b[38;5;66;03m# If requested return None for File not found error\u001b[39;00m\n\u001b[1;32m 147\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(exception)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mFileNotFoundError\u001b[39m\u001b[38;5;124m'\u001b[39m:\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/backends/api.py:998\u001b[0m, in \u001b[0;36mopen_mfdataset\u001b[0;34m(paths, chunks, concat_dim, compat, preprocess, engine, data_vars, coords, combine, parallel, join, attrs_file, combine_attrs, **kwargs)\u001b[0m\n\u001b[1;32m 995\u001b[0m open_ \u001b[38;5;241m=\u001b[39m open_dataset\n\u001b[1;32m 996\u001b[0m getattr_ \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m\n\u001b[0;32m--> 998\u001b[0m datasets \u001b[38;5;241m=\u001b[39m [open_(p, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mopen_kwargs) \u001b[38;5;28;01mfor\u001b[39;00m p \u001b[38;5;129;01min\u001b[39;00m paths]\n\u001b[1;32m 999\u001b[0m closers \u001b[38;5;241m=\u001b[39m [getattr_(ds, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_close\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mfor\u001b[39;00m ds \u001b[38;5;129;01min\u001b[39;00m datasets]\n\u001b[1;32m 1000\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m preprocess \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/backends/api.py:998\u001b[0m, in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 995\u001b[0m open_ \u001b[38;5;241m=\u001b[39m open_dataset\n\u001b[1;32m 996\u001b[0m getattr_ \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m\n\u001b[0;32m--> 998\u001b[0m datasets \u001b[38;5;241m=\u001b[39m [\u001b[43mopen_\u001b[49m\u001b[43m(\u001b[49m\u001b[43mp\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mopen_kwargs\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m p \u001b[38;5;129;01min\u001b[39;00m paths]\n\u001b[1;32m 999\u001b[0m closers \u001b[38;5;241m=\u001b[39m [getattr_(ds, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_close\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mfor\u001b[39;00m ds \u001b[38;5;129;01min\u001b[39;00m datasets]\n\u001b[1;32m 1000\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m preprocess \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/backends/api.py:541\u001b[0m, in \u001b[0;36mopen_dataset\u001b[0;34m(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, backend_kwargs, **kwargs)\u001b[0m\n\u001b[1;32m 529\u001b[0m decoders \u001b[38;5;241m=\u001b[39m _resolve_decoders_kwargs(\n\u001b[1;32m 530\u001b[0m decode_cf,\n\u001b[1;32m 531\u001b[0m open_backend_dataset_parameters\u001b[38;5;241m=\u001b[39mbackend\u001b[38;5;241m.\u001b[39mopen_dataset_parameters,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 537\u001b[0m decode_coords\u001b[38;5;241m=\u001b[39mdecode_coords,\n\u001b[1;32m 538\u001b[0m )\n\u001b[1;32m 540\u001b[0m overwrite_encoded_chunks \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124moverwrite_encoded_chunks\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[0;32m--> 541\u001b[0m backend_ds \u001b[38;5;241m=\u001b[39m \u001b[43mbackend\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen_dataset\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 542\u001b[0m \u001b[43m \u001b[49m\u001b[43mfilename_or_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 543\u001b[0m \u001b[43m \u001b[49m\u001b[43mdrop_variables\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdrop_variables\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 544\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mdecoders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 545\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 546\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 547\u001b[0m ds \u001b[38;5;241m=\u001b[39m _dataset_from_backend_dataset(\n\u001b[1;32m 548\u001b[0m backend_ds,\n\u001b[1;32m 549\u001b[0m filename_or_obj,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 557\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 558\u001b[0m )\n\u001b[1;32m 559\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ds\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/backends/netCDF4_.py:592\u001b[0m, in \u001b[0;36mNetCDF4BackendEntrypoint.open_dataset\u001b[0;34m(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, group, mode, format, clobber, diskless, persist, lock, autoclose)\u001b[0m\n\u001b[1;32m 590\u001b[0m store_entrypoint \u001b[38;5;241m=\u001b[39m StoreBackendEntrypoint()\n\u001b[1;32m 591\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m close_on_error(store):\n\u001b[0;32m--> 592\u001b[0m ds \u001b[38;5;241m=\u001b[39m \u001b[43mstore_entrypoint\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen_dataset\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 593\u001b[0m \u001b[43m \u001b[49m\u001b[43mstore\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 594\u001b[0m \u001b[43m \u001b[49m\u001b[43mmask_and_scale\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmask_and_scale\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 595\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecode_times\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_times\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 596\u001b[0m \u001b[43m \u001b[49m\u001b[43mconcat_characters\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconcat_characters\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 597\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecode_coords\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_coords\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 598\u001b[0m \u001b[43m \u001b[49m\u001b[43mdrop_variables\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdrop_variables\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 599\u001b[0m \u001b[43m \u001b[49m\u001b[43muse_cftime\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muse_cftime\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 600\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecode_timedelta\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_timedelta\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 601\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 602\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ds\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/backends/store.py:35\u001b[0m, in \u001b[0;36mStoreBackendEntrypoint.open_dataset\u001b[0;34m(self, store, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta)\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;28mvars\u001b[39m, attrs \u001b[38;5;241m=\u001b[39m store\u001b[38;5;241m.\u001b[39mload()\n\u001b[1;32m 33\u001b[0m encoding \u001b[38;5;241m=\u001b[39m store\u001b[38;5;241m.\u001b[39mget_encoding()\n\u001b[0;32m---> 35\u001b[0m \u001b[38;5;28mvars\u001b[39m, attrs, coord_names \u001b[38;5;241m=\u001b[39m \u001b[43mconventions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdecode_cf_variables\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 36\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mvars\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 37\u001b[0m \u001b[43m \u001b[49m\u001b[43mattrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 38\u001b[0m \u001b[43m \u001b[49m\u001b[43mmask_and_scale\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmask_and_scale\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 39\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecode_times\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_times\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 40\u001b[0m \u001b[43m \u001b[49m\u001b[43mconcat_characters\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconcat_characters\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 41\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecode_coords\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_coords\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 42\u001b[0m \u001b[43m \u001b[49m\u001b[43mdrop_variables\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdrop_variables\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 43\u001b[0m \u001b[43m \u001b[49m\u001b[43muse_cftime\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muse_cftime\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 44\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecode_timedelta\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_timedelta\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 45\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 47\u001b[0m ds \u001b[38;5;241m=\u001b[39m Dataset(\u001b[38;5;28mvars\u001b[39m, attrs\u001b[38;5;241m=\u001b[39mattrs)\n\u001b[1;32m 48\u001b[0m ds \u001b[38;5;241m=\u001b[39m ds\u001b[38;5;241m.\u001b[39mset_coords(coord_names\u001b[38;5;241m.\u001b[39mintersection(\u001b[38;5;28mvars\u001b[39m))\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/conventions.py:562\u001b[0m, in \u001b[0;36mdecode_cf_variables\u001b[0;34m(variables, attributes, concat_characters, mask_and_scale, decode_times, decode_coords, drop_variables, use_cftime, decode_timedelta)\u001b[0m\n\u001b[1;32m 551\u001b[0m new_vars[k] \u001b[38;5;241m=\u001b[39m decode_cf_variable(\n\u001b[1;32m 552\u001b[0m k,\n\u001b[1;32m 553\u001b[0m v,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 559\u001b[0m decode_timedelta\u001b[38;5;241m=\u001b[39mdecode_timedelta,\n\u001b[1;32m 560\u001b[0m )\n\u001b[1;32m 561\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m--> 562\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(e)(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFailed to decode variable \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mk\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 563\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m decode_coords \u001b[38;5;129;01min\u001b[39;00m [\u001b[38;5;28;01mTrue\u001b[39;00m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcoordinates\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mall\u001b[39m\u001b[38;5;124m\"\u001b[39m]:\n\u001b[1;32m 564\u001b[0m var_attrs \u001b[38;5;241m=\u001b[39m new_vars[k]\u001b[38;5;241m.\u001b[39mattrs\n", - "\u001b[0;31mValueError\u001b[0m: Failed to decode variable 'time_offset': unable to decode time units 'seconds since base_time' with 'the default calendar'. Try opening your dataset with decode_times=False or installing cftime if it is not installed." - ] - } - ], - "source": [ - "# Load files as a single dataset\n", - "files_list = files_filter \n", - "ds = act.io.armfiles.read_netcdf(files_list)\n", - "ds.clean.cleanup()\n", - "print(f'{len(files_list)} files loaded')\n", - "ds\n" - ] - }, - { - "cell_type": "markdown", - "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", - "metadata": {}, - "source": [ - "## Plot time series data\n", - "#### Define the list of variables to be plotted:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d09b789e-84f1-4605-846b-a72c110c8048", - "metadata": {}, - "outputs": [], - "source": [ - "variables_to_plot = ['mwr_scale_factor', 'liquid_water_content', 'aqc_liquid_water_content']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", - "metadata": {}, - "outputs": [], - "source": [ - "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", - "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", - "\n", - "for i,v in enumerate(variables_to_plot):\n", - " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", - " ts_ax.grid()\n", - "\n", - "plt.show()\n" - ] - }, - { - "cell_type": "markdown", - "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", - "metadata": {}, - "source": [ - "## Field selection dropdown menu\n", - "Select variable to be plotted from a dropdown menu" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", - "metadata": {}, - "outputs": [], - "source": [ - "plt.ioff()\n", - "\n", - "# populate dropdown menu with available variables \n", - "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", - "d_variable = 'mwr_scale_factor'\n", - "dropdown = widgets.Dropdown(\n", - " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", - " value= d_variable,\n", - " description='Field:',\n", - " disabled=False,\n", - ")\n", - "dropdown.layout.margin = '0px 30% 0px 20%'\n", - "dropdown.layout.width = '50%'\n", - "\n", - "# set up display\n", - "i_display = act.plotting.TimeSeriesDisplay(ds)\n", - "i_display.add_subplots((1,), figsize = (9.5,5))\n", - "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", - "i_ax.grid()\n", - "i_fig = i_display.fig\n", - "\n", - "# update plot callback function\n", - "def update_plot(change):\n", - " i_ax.cla()\n", - " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", - " i_ax_new.grid()\n", - " i_fig.canvas.draw()\n", - " i_fig.canvas.flush_events()\n", - "\n", - "dropdown.observe(update_plot, names='value')\n", - "\n", - "widgets.AppLayout(\n", - " header=dropdown,\n", - " center=i_fig.canvas,\n", - " pane_heights=[1, 6,1]\n", - ")\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "state": {}, - "version_major": 2, - "version_minor": 0 - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/VAPs/quicklook/MICROBASE/.ipynb_checkpoints/microbasepiavg.c1-checkpoint.ipynb b/VAPs/quicklook/MICROBASE/.ipynb_checkpoints/microbasepiavg.c1-checkpoint.ipynb deleted file mode 100644 index 6f4b3ae0..00000000 --- a/VAPs/quicklook/MICROBASE/.ipynb_checkpoints/microbasepiavg.c1-checkpoint.ipynb +++ /dev/null @@ -1,1757 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "70840257-70e4-45e2-b491-14bff5a257a3", - "metadata": {}, - "source": [ - "# MICROBASEPIAVG.C1 Plots\n", - "\n", - "[Click here](https://www.arm.gov/capabilities/vaps/microbase) for more information about this vap." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "460fd89f-e034-452c-b837-f65c5958264f", - "metadata": {}, - "outputs": [], - "source": [ - "%matplotlib widget\n", - "import ipywidgets as widgets\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import ipywidgets as widgets\n", - "import numpy as np\n", - "import pandas as pd\n", - "import os\n", - "from datetime import datetime\n", - "\n", - "import act\n", - "import xarray as xr\n", - "\n", - "# Data archive directory\n", - "DATA_DIR = r'/data/archive/'\n", - "\n", - "# Datastream info\n", - "DATASTREAM_NAME = 'microbasepiavg'\n", - "DATA_LEVEL = 'c1'\n", - "LOCATIONS = [{'end_date': '2011-03-22', 'facility': 'C1', 'site': 'nsa', 'start_date': '2002-01-01'}, {'end_date': '2010-12-30', 'facility': 'C1', 'site': 'sgp', 'start_date': '1996-11-08'}, {'end_date': '2011-02-25', 'facility': 'C1', 'site': 'twp', 'start_date': '1999-07-01'}, {'end_date': '2009-02-13', 'facility': 'C2', 'site': 'twp', 'start_date': '2002-01-01'}, {'end_date': '2011-02-27', 'facility': 'C3', 'site': 'twp', 'start_date': '2005-11-04'}]" - ] - }, - { - "cell_type": "markdown", - "id": "9faaf875", - "metadata": {}, - "source": [ - "## Define site, facility, and date range" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "ac6764f5", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The following locations and date ranges are available for this VAP:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sitefacilitystart_dateend_date
0nsaC12002-01-012011-03-22
1sgpC11996-11-082010-12-30
2twpC11999-07-012011-02-25
3twpC22002-01-012009-02-13
4twpC32005-11-042011-02-27
\n", - "
" - ], - "text/plain": [ - " site facility start_date end_date\n", - "0 nsa C1 2002-01-01 2011-03-22\n", - "1 sgp C1 1996-11-08 2010-12-30\n", - "2 twp C1 1999-07-01 2011-02-25\n", - "3 twp C2 2002-01-01 2009-02-13\n", - "4 twp C3 2005-11-04 2011-02-27" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "print(\"The following locations and date ranges are available for this VAP:\")\n", - "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" - ] - }, - { - "cell_type": "markdown", - "id": "8d132223", - "metadata": {}, - "source": [ - "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "e563983a", - "metadata": {}, - "outputs": [], - "source": [ - "site_facility = ( 'sgp', 'C1' )\n", - "\n", - "date_start = '2010-12-28'\n", - "date_end = '2010-12-30'" - ] - }, - { - "cell_type": "markdown", - "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", - "metadata": {}, - "source": [ - "## Load data files\n", - "Load data files from /data/archive/" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'/data/archive/sgp/sgpmicrobasepiavgC1.c1'" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Compile list of files\n", - "site, facility = site_facility\n", - "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", - "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", - "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", - "dir_path\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "6be8f3dc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['20101228', '20101229', '20101230']" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from datetime import date, timedelta\n", - "import pandas as pd\n", - "\n", - "def get_ARM_formated_dates(start_date, end_date):\n", - " \"\"\"\n", - " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", - " EXAMPLE:\n", - " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", - " >> [\"20180219\", \"20180220\", \"20180221\"] \n", - " \"\"\"\n", - " \n", - " _start_date = pd.to_datetime(start_date)\n", - " _end_date = pd.to_datetime(end_date)\n", - " \n", - " delta = _end_date - _start_date # returns timedelta \n", - " dates = []\n", - "\n", - " for i in range(delta.days + 1):\n", - " day = _start_date + timedelta(days=i)\n", - " day_formated = day.strftime(format=\"%Y%m%d\")\n", - " dates.append(day_formated)\n", - " return dates\n", - "\n", - "\n", - "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "51feea2e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['/data/archive/sgp/sgpmicrobasepiavgC1.c1/sgpmicrobasepiavgC1.c1.20101228.001000.cdf',\n", - " '/data/archive/sgp/sgpmicrobasepiavgC1.c1/sgpmicrobasepiavgC1.c1.20101229.001000.cdf',\n", - " '/data/archive/sgp/sgpmicrobasepiavgC1.c1/sgpmicrobasepiavgC1.c1.20101230.001000.cdf']" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Filter a list of files based on date pattern\n", - "import glob\n", - "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", - "files_filter = []\n", - "for date in dates:\n", - " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", - " files_filter\n", - "files_filter" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "b0e5d0d7", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "3 files loaded\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset>\n",
-       "Dimensions:                    (time: 216, nheights: 233)\n",
-       "Coordinates:\n",
-       "  * time                       (time) datetime64[ns] 2010-12-28T00:10:00 ... ...\n",
-       "Dimensions without coordinates: nheights\n",
-       "Data variables: (12/17)\n",
-       "    base_time                  (time) datetime64[ns] 2010-12-28T00:10:00 ... ...\n",
-       "    time_offset                (time) datetime64[ns] 2010-12-28T00:10:00 ... ...\n",
-       "    Heights                    (time, nheights) float32 dask.array<chunksize=(72, 233), meta=np.ndarray>\n",
-       "    Avg_Retrieved_LWC          (time, nheights) float32 dask.array<chunksize=(72, 233), meta=np.ndarray>\n",
-       "    Avg_Retrieved_IWC          (time, nheights) float32 dask.array<chunksize=(72, 233), meta=np.ndarray>\n",
-       "    Avg_LiqEffectiveRadius     (time, nheights) float32 dask.array<chunksize=(72, 233), meta=np.ndarray>\n",
-       "    ...                         ...\n",
-       "    Integrated_CloudFraction   (time) float32 dask.array<chunksize=(72,), meta=np.ndarray>\n",
-       "    aqc_CloudFraction          (time) float32 dask.array<chunksize=(72,), meta=np.ndarray>\n",
-       "    aqc_CloudMissing           (time) float32 dask.array<chunksize=(72,), meta=np.ndarray>\n",
-       "    lat                        (time) float32 36.61 36.61 36.61 ... 36.61 36.61\n",
-       "    lon                        (time) float32 -97.49 -97.49 ... -97.49 -97.49\n",
-       "    alt                        (time) float32 318.0 318.0 318.0 ... 318.0 318.0\n",
-       "Attributes: (12/13)\n",
-       "    process_version:                $State: vap-microbasepi-1.2-1.sol5_10 $\n",
-       "    command_line:                   microbasepi -d 20101228 -f sgpC1\n",
-       "    site_id:                        sgp\n",
-       "    facility_id:                    C1: Lamont, Oklahoma\n",
-       "    input_datastreams_description:  A string consisting of the datastream(s),...\n",
-       "    input_datastreams_num:          3\n",
-       "    ...                             ...\n",
-       "    history:                        created by user dsmgr on machine garnet a...\n",
-       "    _file_dates:                    ['20101228', '20101229', '20101230']\n",
-       "    _file_times:                    ['001000', '001000', '001000']\n",
-       "    datastream:                     sgpmicrobasepiavgC1.c1\n",
-       "    _datastream:                    sgpmicrobasepiavgC1.c1\n",
-       "    _arm_standards_flag:            1
" - ], - "text/plain": [ - "\n", - "Dimensions: (time: 216, nheights: 233)\n", - "Coordinates:\n", - " * time (time) datetime64[ns] 2010-12-28T00:10:00 ... ...\n", - "Dimensions without coordinates: nheights\n", - "Data variables: (12/17)\n", - " base_time (time) datetime64[ns] 2010-12-28T00:10:00 ... ...\n", - " time_offset (time) datetime64[ns] 2010-12-28T00:10:00 ... ...\n", - " Heights (time, nheights) float32 dask.array\n", - " Avg_Retrieved_LWC (time, nheights) float32 dask.array\n", - " Avg_Retrieved_IWC (time, nheights) float32 dask.array\n", - " Avg_LiqEffectiveRadius (time, nheights) float32 dask.array\n", - " ... ...\n", - " Integrated_CloudFraction (time) float32 dask.array\n", - " aqc_CloudFraction (time) float32 dask.array\n", - " aqc_CloudMissing (time) float32 dask.array\n", - " lat (time) float32 36.61 36.61 36.61 ... 36.61 36.61\n", - " lon (time) float32 -97.49 -97.49 ... -97.49 -97.49\n", - " alt (time) float32 318.0 318.0 318.0 ... 318.0 318.0\n", - "Attributes: (12/13)\n", - " process_version: $State: vap-microbasepi-1.2-1.sol5_10 $\n", - " command_line: microbasepi -d 20101228 -f sgpC1\n", - " site_id: sgp\n", - " facility_id: C1: Lamont, Oklahoma\n", - " input_datastreams_description: A string consisting of the datastream(s),...\n", - " input_datastreams_num: 3\n", - " ... ...\n", - " history: created by user dsmgr on machine garnet a...\n", - " _file_dates: ['20101228', '20101229', '20101230']\n", - " _file_times: ['001000', '001000', '001000']\n", - " datastream: sgpmicrobasepiavgC1.c1\n", - " _datastream: sgpmicrobasepiavgC1.c1\n", - " _arm_standards_flag: 1" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load files as a single dataset\n", - "files_list = files_filter \n", - "ds = act.io.armfiles.read_netcdf(files_list)\n", - "ds.clean.cleanup()\n", - "print(f'{len(files_list)} files loaded')\n", - "ds\n" - ] - }, - { - "cell_type": "markdown", - "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", - "metadata": {}, - "source": [ - "## Plot time series data\n", - "#### Define the list of variables to be plotted:" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "d09b789e-84f1-4605-846b-a72c110c8048", - "metadata": {}, - "outputs": [], - "source": [ - "variables_to_plot = ['avg_retrieved_lwc', 'avg_retrieved_iwc', 'avg_liq_effective_radius']\n", - "variables_to_plot = ['avg_retrieved_iwc', 'avg_liq_effective_radius']" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", - "metadata": {}, - "outputs": [ - { - "ename": "KeyError", - "evalue": "'avg_retrieved_iwc'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[11], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m ts_display\u001b[38;5;241m.\u001b[39madd_subplots((\u001b[38;5;28mlen\u001b[39m(variables_to_plot),), figsize \u001b[38;5;241m=\u001b[39m (\u001b[38;5;241m9.5\u001b[39m,\u001b[38;5;241m4\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mlen\u001b[39m(variables_to_plot)))\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i,v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(variables_to_plot):\n\u001b[0;32m----> 5\u001b[0m ts_ax \u001b[38;5;241m=\u001b[39m ts_display\u001b[38;5;241m.\u001b[39mplot(v, subplot_index\u001b[38;5;241m=\u001b[39m(i,), set_title\u001b[38;5;241m=\u001b[39m\u001b[43mds\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvariables\u001b[49m\u001b[43m[\u001b[49m\u001b[43mv\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241m.\u001b[39mattrs[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlong_name\u001b[39m\u001b[38;5;124m'\u001b[39m],)\n\u001b[1;32m 6\u001b[0m ts_ax\u001b[38;5;241m.\u001b[39mgrid()\n\u001b[1;32m 8\u001b[0m plt\u001b[38;5;241m.\u001b[39mshow()\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/utils.py:453\u001b[0m, in \u001b[0;36mFrozen.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 452\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__getitem__\u001b[39m(\u001b[38;5;28mself\u001b[39m, key: K) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m V:\n\u001b[0;32m--> 453\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmapping\u001b[49m\u001b[43m[\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m]\u001b[49m\n", - "\u001b[0;31mKeyError\u001b[0m: 'avg_retrieved_iwc'" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "fa59298dbac54ea38a2f036fff77b0e0", - "version_major": 2, - "version_minor": 0 - }, - "image/png": "", - "text/html": [ - "\n", - "
\n", - "
\n", - " Figure\n", - "
\n", - " \n", - "
\n", - " " - ], - "text/plain": [ - "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", - "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", - "\n", - "for i,v in enumerate(variables_to_plot):\n", - " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", - " ts_ax.grid()\n", - "\n", - "plt.show()\n" - ] - }, - { - "cell_type": "markdown", - "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", - "metadata": {}, - "source": [ - "## Field selection dropdown menu\n", - "Select variable to be plotted from a dropdown menu" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", - "metadata": {}, - "outputs": [], - "source": [ - "plt.ioff()\n", - "\n", - "# populate dropdown menu with available variables \n", - "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", - "d_variable = 'avg_retrieved_lwc'\n", - "dropdown = widgets.Dropdown(\n", - " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", - " value= d_variable,\n", - " description='Field:',\n", - " disabled=False,\n", - ")\n", - "dropdown.layout.margin = '0px 30% 0px 20%'\n", - "dropdown.layout.width = '50%'\n", - "\n", - "# set up display\n", - "i_display = act.plotting.TimeSeriesDisplay(ds)\n", - "i_display.add_subplots((1,), figsize = (9.5,5))\n", - "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", - "i_ax.grid()\n", - "i_fig = i_display.fig\n", - "\n", - "# update plot callback function\n", - "def update_plot(change):\n", - " i_ax.cla()\n", - " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", - " i_ax_new.grid()\n", - " i_fig.canvas.draw()\n", - " i_fig.canvas.flush_events()\n", - "\n", - "dropdown.observe(update_plot, names='value')\n", - "\n", - "widgets.AppLayout(\n", - " header=dropdown,\n", - " center=i_fig.canvas,\n", - " pane_heights=[1, 6,1]\n", - ")\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "state": {}, - "version_major": 2, - "version_minor": 0 - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/VAPs/quicklook/MPLNOR/.ipynb_checkpoints/mplnor1camp.c1-checkpoint.ipynb b/VAPs/quicklook/MPLNOR/.ipynb_checkpoints/mplnor1camp.c1-checkpoint.ipynb deleted file mode 100644 index d37d9027..00000000 --- a/VAPs/quicklook/MPLNOR/.ipynb_checkpoints/mplnor1camp.c1-checkpoint.ipynb +++ /dev/null @@ -1,1732 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "70840257-70e4-45e2-b491-14bff5a257a3", - "metadata": {}, - "source": [ - "# MPLNOR1CAMP.C1 Plots\n", - "\n", - "[Click here](https://www.arm.gov/capabilities/vaps/mplnor) for more information about this vap." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "460fd89f-e034-452c-b837-f65c5958264f", - "metadata": {}, - "outputs": [], - "source": [ - "%matplotlib widget\n", - "import ipywidgets as widgets\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import ipywidgets as widgets\n", - "import numpy as np\n", - "import pandas as pd\n", - "import os\n", - "from datetime import datetime\n", - "\n", - "import act\n", - "import xarray as xr\n", - "\n", - "# Data archive directory\n", - "DATA_DIR = r'/data/archive/'\n", - "\n", - "# Datastream info\n", - "DATASTREAM_NAME = 'mplnor1camp'\n", - "DATA_LEVEL = 'c1'\n", - "LOCATIONS = [{'end_date': '2004-05-11', 'facility': 'C1', 'site': 'sgp', 'start_date': '1996-05-01'}, {'end_date': '1999-11-18', 'facility': 'C2', 'site': 'twp', 'start_date': '1998-11-20'}]" - ] - }, - { - "cell_type": "markdown", - "id": "9faaf875", - "metadata": {}, - "source": [ - "## Define site, facility, and date range" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "ac6764f5", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The following locations and date ranges are available for this VAP:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sitefacilitystart_dateend_date
0sgpC11996-05-012004-05-11
1twpC21998-11-201999-11-18
\n", - "
" - ], - "text/plain": [ - " site facility start_date end_date\n", - "0 sgp C1 1996-05-01 2004-05-11\n", - "1 twp C2 1998-11-20 1999-11-18" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "print(\"The following locations and date ranges are available for this VAP:\")\n", - "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" - ] - }, - { - "cell_type": "markdown", - "id": "8d132223", - "metadata": {}, - "source": [ - "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "e563983a", - "metadata": {}, - "outputs": [], - "source": [ - "site_facility = ( 'sgp', 'C1' )\n", - "\n", - "date_start = '2004-05-10'\n", - "date_end = '2004-05-11'" - ] - }, - { - "cell_type": "markdown", - "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", - "metadata": {}, - "source": [ - "## Load data files\n", - "Load data files from /data/archive/" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'/data/archive/sgp/sgpmplnor1campC1.c1'" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Compile list of files\n", - "site, facility = site_facility\n", - "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", - "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", - "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", - "dir_path\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "6be8f3dc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['20040510', '20040511']" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from datetime import date, timedelta\n", - "import pandas as pd\n", - "\n", - "def get_ARM_formated_dates(start_date, end_date):\n", - " \"\"\"\n", - " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", - " EXAMPLE:\n", - " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", - " >> [\"20180219\", \"20180220\", \"20180221\"] \n", - " \"\"\"\n", - " \n", - " _start_date = pd.to_datetime(start_date)\n", - " _end_date = pd.to_datetime(end_date)\n", - " \n", - " delta = _end_date - _start_date # returns timedelta \n", - " dates = []\n", - "\n", - " for i in range(delta.days + 1):\n", - " day = _start_date + timedelta(days=i)\n", - " day_formated = day.strftime(format=\"%Y%m%d\")\n", - " dates.append(day_formated)\n", - " return dates\n", - "\n", - "\n", - "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "51feea2e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['/data/archive/sgp/sgpmplnor1campC1.c1/sgpmplnor1campC1.c1.20040510.000020.cdf',\n", - " '/data/archive/sgp/sgpmplnor1campC1.c1/sgpmplnor1campC1.c1.20040511.000011.cdf']" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Filter a list of files based on date pattern\n", - "import glob\n", - "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", - "files_filter = []\n", - "for date in dates:\n", - " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", - " files_filter\n", - "files_filter" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "b0e5d0d7", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "77 files loaded\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset>\n",
-       "Dimensions:                     (time: 1420, height: 445, nlayers: 5)\n",
-       "Coordinates:\n",
-       "  * height                      (height) float32 0.02998 0.1199 ... 39.88 39.97\n",
-       "  * time                        (time) datetime64[ns] 2004-05-10T00:00:20 ......\n",
-       "Dimensions without coordinates: nlayers\n",
-       "Data variables: (12/20)\n",
-       "    base_time                   datetime64[ns] 2004-05-10T00:00:20\n",
-       "    time_offset                 (time) datetime64[ns] 2004-05-10T00:00:20 ......\n",
-       "    backscatter                 (time, height) float32 dask.array<chunksize=(1420, 445), meta=np.ndarray>\n",
-       "    background_signal           (time) float32 dask.array<chunksize=(1420,), meta=np.ndarray>\n",
-       "    cloud_base_height           (time, nlayers) float32 dask.array<chunksize=(1420, 5), meta=np.ndarray>\n",
-       "    cloud_top_height            (time, nlayers) float32 dask.array<chunksize=(1420, 5), meta=np.ndarray>\n",
-       "    ...                          ...\n",
-       "    detector_temp               (time) float32 dask.array<chunksize=(1420,), meta=np.ndarray>\n",
-       "    instrument_temp             (time) float32 dask.array<chunksize=(1420,), meta=np.ndarray>\n",
-       "    laser_temp                  (time) float32 dask.array<chunksize=(1420,), meta=np.ndarray>\n",
-       "    lat                         float32 ...\n",
-       "    lon                         float32 ...\n",
-       "    alt                         float32 ...\n",
-       "Attributes: (12/24)\n",
-       "    Date:                        Tue May 11 19:11:20 2004\n",
-       "    Version:                     $State: process-vap-mplnor-2.9-0 $\n",
-       "    Command_Line:                mplnor -d 20040510\n",
-       "    Input_Platforms:             sgpmplC1.a1\n",
-       "    BW_Version:                  Working_4_1\n",
-       "    Comment:                     Pass-through VAP to improve the data quality\n",
-       "    ...                          ...\n",
-       "    history:                     created by user dsmgr on machine fore at 11-...\n",
-       "    _file_dates:                 ['20040510']\n",
-       "    _file_times:                 ['000020']\n",
-       "    datastream:                  sgpmplnor1campC1.c1\n",
-       "    _datastream:                 sgpmplnor1campC1.c1\n",
-       "    _arm_standards_flag:         1
" - ], - "text/plain": [ - "\n", - "Dimensions: (time: 1420, height: 445, nlayers: 5)\n", - "Coordinates:\n", - " * height (height) float32 0.02998 0.1199 ... 39.88 39.97\n", - " * time (time) datetime64[ns] 2004-05-10T00:00:20 ......\n", - "Dimensions without coordinates: nlayers\n", - "Data variables: (12/20)\n", - " base_time datetime64[ns] 2004-05-10T00:00:20\n", - " time_offset (time) datetime64[ns] 2004-05-10T00:00:20 ......\n", - " backscatter (time, height) float32 dask.array\n", - " background_signal (time) float32 dask.array\n", - " cloud_base_height (time, nlayers) float32 dask.array\n", - " cloud_top_height (time, nlayers) float32 dask.array\n", - " ... ...\n", - " detector_temp (time) float32 dask.array\n", - " instrument_temp (time) float32 dask.array\n", - " laser_temp (time) float32 dask.array\n", - " lat float32 ...\n", - " lon float32 ...\n", - " alt float32 ...\n", - "Attributes: (12/24)\n", - " Date: Tue May 11 19:11:20 2004\n", - " Version: $State: process-vap-mplnor-2.9-0 $\n", - " Command_Line: mplnor -d 20040510\n", - " Input_Platforms: sgpmplC1.a1\n", - " BW_Version: Working_4_1\n", - " Comment: Pass-through VAP to improve the data quality\n", - " ... ...\n", - " history: created by user dsmgr on machine fore at 11-...\n", - " _file_dates: ['20040510']\n", - " _file_times: ['000020']\n", - " datastream: sgpmplnor1campC1.c1\n", - " _datastream: sgpmplnor1campC1.c1\n", - " _arm_standards_flag: 1" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load files as a single dataset\n", - "files_list = files_filter[0]\n", - "ds = act.io.armfiles.read_netcdf(files_list)\n", - "ds.clean.cleanup()\n", - "print(f'{len(files_list)} files loaded')\n", - "ds\n" - ] - }, - { - "cell_type": "markdown", - "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", - "metadata": {}, - "source": [ - "## Plot time series data\n", - "#### Define the list of variables to be plotted:" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "d09b789e-84f1-4605-846b-a72c110c8048", - "metadata": {}, - "outputs": [], - "source": [ - "variables_to_plot = ['backscatter', 'cloud_base_height', 'cloud_top_height']" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "b62b704e7cb241cea9f9f03bf8d662ea", - "version_major": 2, - "version_minor": 0 - }, - "image/png": "iVBORw0KGgoAAAANSUhEUgAAA7YAAASwCAYAAADPBNYLAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdeXgT1foH8O80bdKFtuxdoFRARAHhoii7BQRkERFwAVwANxRcEBUFFYogiOJ6Fbx6leUqF/1dERWQRVnEC0LZFEG9qAWqUJC1bN2S8/sDJkwmM5NJmkyS8v08Tx6aWc45c5LSvHnPOSMJIQSIiIiIiIiIolRMuBtAREREREREVBEMbImIiIiIiCiqMbAlIiIiIiKiqMbAloiIiIiIiKIaA1siIiIiIiKKagxsiYiIiIiIKKoxsCUiIiIiIqKoxsCWiIiIiIiIohoDWyIiIiIiIopqDGyJiIiIiIgoqjGwJSIiIiIioqjGwJaIiIiIiIiiGgNbIiIiIiIiimoMbImIiIiIiCiqMbAlIiIiIiKiqMbAloiIiIiIiKIaA1siIiIiIiKKagxsiYiIiIiIKKoxsCUiIiIiIqKoxsCWiIiIiIiIohoDWyIiIiIiIopqDGyJiIiIiIgoqjGwJSIiIiIioqjGwJaIiIiIiIiiGgNbIiIiIiIiimoMbImIiIiIiCiqMbAlIiIiIiKiqMbAloiIiIiIiKIaA1siIiIiIiKKagxsiYiIiIiIKKoxsCUiIiIiIqKoxsCWiIiIiIiIohoDWyIiIiIiIopqDGyJiIiIiIgoqjGwJSIiIiIioqjGwJaIiIiIiIiiGgNbIiIiIiIiimoMbImIiIiIiCiqMbAlIiIiIiKiqMbAloiIiIiIiKIaA1siIiIiIiKKagxsiYiIiIiIKKoxsCUiIiIiIqKoxsCWiIiIiIiIohoDWyIiIiIiIopqDGyJiIiIiIgoqjGwJSIiIiIioqjGwJaIiIiIiIiiGgNbIiIiIiIiimoMbImIiIiIiCiqMbAlIiIiIiKiqMbAloiIiIiIiKIaA1siIiIiIiKKagxsiYiIiIiIKKoxsCUiIiIiIqKoxsCWiIiIiIiIohoDWyIiIiIiIopqDGyJiIiIiIgoqjGwJSIiIiIioqjGwJaIiIiIiIiiGgNbIiIiIiIiimoMbImIiIiIiCiqMbAlIiIiIiKiqMbAloiIiIiIiKIaA1siIiIiIiKKagxsiYiIiIiIKKoxsCUiIiIiIqKoxsCWiIiIiIiIohoDWyIiIiIiIopqDGyJiIiIiIgoqjGwJSIiIiIioqjGwJaIiIiIiIiiGgNbIiIiIiIiimoMbImIiIiIiCiqMbAlIiIiIiKiqMbAloiIiIiIiKIaA1siIiIiIiKKagxsiYiIiIiIKKoxsCUiIiIiIqKoxsCWiIiIiIiIohoDWyIiIiIiIopqDGyJiIiIiIgoqjGwJSIiIiIioqjGwJaIiIiIiIiiGgNbIiIiIiIiimoMbImIiIiIiCiqMbAlIiIiIiKiqMbAloiIiIiIiKIaA1siIiIiIiKKagxsiYiIiIiIKKoxsCUiIiIiIqKoxsCWiIiIiIiIohoDWyIiIiIiIopqDGyJiIiIiIgoqjGwJSIiIiIioqjGwJaIiIiIiIiiGgNbIiIiIiIiimoMbImIiIiIiCiqMbAlIiIiIiKiqMbAloiIiIiIiKIaA1siIiIiIiKKagxsiYiIiIiIKKoxsCUiIiIiIqKoxsCWiIiIiIiIohoDWyIiIiIiIopqDGyJiIiIiIgoqjGwJSIiIiIioqjGwJaIiIiIiIiiGgNbIiIiIiIiimoMbImIiIiIiCiqMbAlIiIiIiKiqMbAloiIiIiIiKIaA1siIiIiIiKKagxsiYiIiIiIKKoxsCUiIiIiIqKoxsCWiIiIiIiIohoDWyIiIiIiIopqDGyJiIiIiIgoqjGwJSIiIiIioqjGwJaIiIiIiIiiGgNbIiIiIiIiimoMbImIiIiIiCiqMbAlIiIiIiKiqMbAlogqrdmzZ0OSJMTHx2PPnj1e+zt16oRmzZqFoWXBMXToUFx00UUe2y666CIMHTrU0nbs3r0bkiRh9uzZltYbbkuWLEFubm5Iyu7UqRM6deoUkrKVKvLa7du3D7m5udi2bZvXvtzcXEiSVPEG+kmu99ChQxdEvb4YvUenTJmChQsXWtoeIqJQYmBLRJVeSUkJnnnmmXA3wxKffvopnn322XA344KwZMkSTJw4MdzNCJt9+/Zh4sSJmoHtPffcg/Xr11vfKPJg9B5lYEtElQ0DWyKq9Hr06IF58+bh+++/D2k9Z86cCWn5ZrRs2RINGzYMdzPCwul0oqSkRHPf6dOnLW5NeAghIuJ9WLduXbRp0ybczSCLGf0OEhGFGgNbIqr0xowZgxo1auDJJ5/0eWxxcTHGjh2L+vXrw263o06dOhg5ciSOHTvmcdxFF12E66+/HgsWLEDLli0RHx+PiRMnYvXq1ZAkCfPmzcOTTz6JjIwMVKlSBX369MGBAwdw4sQJ3HfffahZsyZq1qyJYcOG4eTJkx5lv/XWW7jmmmtQu3ZtJCUl4fLLL8eLL76IsrIyn+1XD0Xu1KkTJEnSfCiHnxYWFmL48OGoW7cu7HY76tevj4kTJ6K8vNyj/H379uGWW25BcnIyUlNTceutt6KwsNBnu2R//vkn7rvvPmRlZcFutyMzMxM33XQTDhw44D5m7969uP3221G7dm04HA5cdtllePnll+FyudzHyENoX3zxRUyePBn169eHw+HAqlWr3MNCt2zZgptuugnVqlVzB/tCCMyYMQN/+9vfkJCQgGrVquGmm27C77//7tXWpUuX4tprr0VqaioSExNx2WWXYerUqQDODgN/6623AMCjT3fv3u1XPUIIvPjii8jOzkZ8fDyuuOIKfPnll6b7U5IkPPjgg3j77bdx2WWXweFwYM6cOQCAXbt2YfDgwR79KLfZyK+//ophw4ahUaNGSExMRJ06ddCnTx9s377dfczq1atx1VVXAQCGDRvmvn552Kt6KPKNN96I7Oxsj9dQ1rp1a1xxxRUefWL2NdJTUFCA/v37IyUlBampqbj99tvx119/eRzz0UcfoXv37sjIyEBCQgIuu+wyPPXUUzh16pRXeRs2bECfPn1Qo0YNxMfHo2HDhhg1apRhG37++Wc0aNAArVu3xsGDBwEAW7duxfXXX+9+TTIzM9G7d2/88ccf7vNcLhf+/ve/u6+/atWqaNOmDT7//HO/2m70HpUkCadOncKcOXPc25VD3838f2D0O0hEFA6x4W4AEVGoJScn45lnnsEjjzyClStXokuXLprHCSFw44034uuvv8bYsWPRsWNH/PDDD5gwYQLWr1+P9evXw+FwuI/fsmULfvrpJzzzzDOoX78+kpKS3B8sx40bh86dO2P27NnYvXs3Hn/8cQwaNAixsbFo0aIF/v3vf2Pr1q0YN24ckpOT8cYbb7jL/e233zB48GB3cP3999/j+eefx88//4z333/fr2ufMWMGioqKPLY9++yzWLVqFRo3bgzg7IfYq6++GjExMRg/fjwaNmyI9evXY/Lkydi9ezdmzZoF4GxGumvXrti3bx+mTp2KSy65BIsXL8att95qqi1//vknrrrqKpSVlWHcuHFo3rw5Dh8+jGXLluHo0aNIS0vDX3/9hXbt2qG0tBSTJk3CRRddhEWLFuHxxx/Hb7/9hhkzZniU+cYbb+CSSy7B9OnTkZKSgkaNGuG7774DAPTv3x8DBw7E/fff735dhg8fjtmzZ+Phhx/GtGnTcOTIETz33HNo164dvv/+e6SlpQEA3nvvPdx7773IycnB22+/jdq1a+N///sffvzxR3cfnjp1Cv/5z388htxmZGT4Vc/EiRMxceJE3H333bjppptQUFCAe++9F06n0/36+LJw4UKsXbsW48ePR3p6OmrXro2dO3eiXbt2qFevHl5++WWkp6dj2bJlePjhh3Ho0CFMmDBBt7x9+/ahRo0aeOGFF1CrVi0cOXIEc+bMQevWrbF161Y0btwYV1xxBWbNmoVhw4bhmWeeQe/evQGczdRqueuuu9C3b1+sXLkSXbt2dW//+eefsXHjRo/3v9m+M9KvXz/ccsstuP/++7Fjxw48++yz2LlzJzZs2IC4uDgAZwP/Xr16YdSoUUhKSsLPP/+MadOmYePGjVi5cqW7rGXLlqFPnz647LLL8Morr6BevXrYvXs3li9frlv/mjVr0K9fP1xzzTWYN28eEhMTcerUKXTr1g3169fHW2+9hbS0NBQWFmLVqlU4ceKE+9yhQ4figw8+wN13343nnnsOdrsdW7ZscX9pYrbtRu/R9evXo0uXLujcubN76kJKSgoA8/8fyLR+B4mIwkIQEVVSs2bNEgBEXl6eKCkpEQ0aNBCtWrUSLpdLCCFETk6OaNq0qfv4pUuXCgDixRdf9Cjno48+EgDEO++8496WnZ0tbDab+OWXXzyOXbVqlQAg+vTp47F91KhRAoB4+OGHPbbfeOONonr16rrX4HQ6RVlZmZg7d66w2WziyJEj7n1DhgwR2dnZHsdnZ2eLIUOG6Jb30ksveV3L8OHDRZUqVcSePXs8jp0+fboAIHbs2CGEEGLmzJkCgPjss888jrv33nsFADFr1izdeoUQ4q677hJxcXFi586dusc89dRTAoDYsGGDx/YHHnhASJLk7u/8/HwBQDRs2FCUlpZ6HDthwgQBQIwfP95j+/r16wUA8fLLL3tsLygoEAkJCWLMmDFCCCFOnDghUlJSRIcOHdzvFS0jR44UWn9GzdZz9OhRER8fL/r16+dx3H//+18BQOTk5OjWLQMgUlNTPd4XQghx3XXXibp164rjx497bH/wwQdFfHy8+3i5H41eu/LyclFaWioaNWokHn30Uff2vLw83XPl10BWVlYm0tLSxODBgz2OGzNmjLDb7eLQoUNCCPN9p0euV9lOIYT48MMPBQDxwQcfaJ7ncrlEWVmZWLNmjQAgvv/+e/e+hg0bioYNG4ozZ874rPevv/4S//rXv4TdbhcPP/ywcDqd7mM2bdokAIiFCxfqlvPNN98IAOLpp582vE6zbdd7jwohRFJSkub/FWb/PzD6HSQiCgcORSaiC4LdbsfkyZOxadMmfPzxx5rHyJkO9arCN998M5KSkvD11197bG/evDkuueQSzbKuv/56j+eXXXYZALgzW8rtR44c8RiOvHXrVtxwww2oUaMGbDYb4uLicOedd8LpdOJ///uf74vV8e9//xtjxozBM888g3vvvde9fdGiRejcuTMyMzNRXl7ufvTs2RPA2ewTAKxatQrJycm44YYbPModPHiwqfq//PJLdO7c2d0XWlauXIkmTZrg6quv9tg+dOhQCCE8MmkAcMMNN7gzcGoDBgzweL5o0SJIkoTbb7/d4zrT09PRokULrF69GgCwbt06FBUVYcSIEQGt7Gu2nvXr16O4uBi33Xabx/nt2rVDdna26fq6dOmCatWquZ8XFxfj66+/Rr9+/ZCYmOjRhl69eqG4uNid1dZSXl6OKVOmoEmTJrDb7YiNjYXdbseuXbvw008/+dcZ58TGxuL222/HggULcPz4cQBn52P+61//Qt++fVGjRg0A5vvOF3Wf3nLLLYiNjfUYJvv7779j8ODBSE9Pd/+e5eTkAID7Ov/3v//ht99+w9133434+Hif9T7//PMYOnQoXnjhBbz++uuIiTn/Meviiy9GtWrV8OSTT+Ltt9/Gzp07vc6Xh6GPHDnSsB4zbQ+U2f8PZEa/g0REVmJgS0QXjIEDB+KKK67A008/rTlf9fDhw4iNjUWtWrU8tkuShPT0dBw+fNhjuzzsVEv16tU9ntvtdsPtxcXFAM7OL+3YsSP+/PNPvP7661i7di3y8vLcc+UCXRho1apVGDp0KO68805MmjTJY9+BAwfwxRdfIC4uzuPRtGlTAHDfwuTw4cOaw0DT09NNteGvv/7SHaoqO3z4sGa/ZmZmuvcrGb0G6n0HDhyAEAJpaWle1/rdd9+5r1Oei+mrrXrM1iNfi1b/me1Tres8fPgwysvL8fe//92r/l69egGA4W1pRo8ejWeffRY33ngjvvjiC2zYsAF5eXlo0aJFhRamuuuuu1BcXIz58+cDODvEd//+/Rg2bJj7GLN954u6/2JjY1GjRg13n588eRIdO3bEhg0bMHnyZKxevRp5eXlYsGABgPO/Z/6+Fz744APUqVMHAwcO9NqXmpqKNWvW4G9/+xvGjRuHpk2bIjMzExMmTHD/f/TXX3/BZrMZvv5m2x4os/8fyIx+B4mIrMQ5tkR0wZAkCdOmTUO3bt3wzjvveO2vUaMGysvL8ddff3kEt0IIFBYWuhfLUZYXbAsXLsSpU6ewYMECj6yd1i1VzPrhhx9w4403IicnB++++67X/po1a6J58+Z4/vnnNc+Xg8oaNWpg48aNXvvNLh5Vq1Ytj0VytNSoUQP79+/32r5v3z53W5WMXgP1vpo1a0KSJKxdu9ZjrrRM3ia/9r7aqsdsPXKWUqv/CgsLve5RrEd9ndWqVYPNZsMdd9yhm/mrX7++bnkffPAB7rzzTkyZMsVj+6FDh1C1alVTbdIiZ+JnzZqF4cOHY9asWcjMzET37t3dx5jtO18KCwtRp04d9/Py8nIcPnzY3ecrV67Evn37sHr1anemE4DXInH+vheWLl2KW2+9FR07dsTXX3/tlXm//PLLMX/+fAgh8MMPP2D27Nl47rnnkJCQgKeeegq1atWC0+lEYWGhbsBotu2BMvv/gSwc9ysmItLCjC0RXVC6du2Kbt264bnnnvNajfjaa68FcPaDvdInn3yCU6dOufeHkvwhUfkBXgihGZCasXfvXvTs2RMNGjTAJ598ojlk8Prrr8ePP/6Ihg0bolWrVl4P+YNs586dceLECY/VWQFg3rx5ptrSs2dPrFq1Cr/88ovuMddeey127tyJLVu2eGyfO3cuJElC586dTdWl5frrr4cQAn/++afmdV5++eUAzg4FTk1Nxdtvvw0hhG558mukzpCZradNmzaIj4/Hhx9+6HH+unXrsGfPnoCvMzExEZ07d8bWrVvRvHlzzTbIAZ4WSZK8AsjFixfjzz//NHX9RoYNG4YNGzbg22+/xRdffIEhQ4bAZrO595vtO1/Uffrxxx+jvLzcvfKv1u8ZAPzjH//weH7JJZegYcOGeP/9903dxiY7O9sdlHfs2BG7du3SPE6SJLRo0QKvvvoqqlat6n6/y8N9Z86cqVuH2bYrj9F6jRwOh+Z2s/8fEBFFGmZsieiCM23aNFx55ZU4ePCge3gdAHTr1g3XXXcdnnzySRQVFaF9+/buVZFbtmyJO+64I+Rt69atG+x2OwYNGoQxY8aguLgYM2fOxNGjRwMqr2fPnjh27BjefPNN7Nixw2Nfw4YNUatWLTz33HNYsWIF2rVrh4cffhiNGzdGcXExdu/ejSVLluDtt99G3bp1ceedd+LVV1/FnXfeieeffx6NGjXCkiVLsGzZMlNtee655/Dll1/immuuwbhx43D55Zfj2LFjWLp0KUaPHo1LL70Ujz76KObOnYvevXvjueeeQ3Z2NhYvXowZM2bggQce0J3TbEb79u1x3333YdiwYdi0aROuueYaJCUlYf/+/fj2229x+eWX44EHHkCVKlXw8ssv45577kHXrl1x7733Ii0tDb/++iu+//57vPnmmwDgDrKmTZuGnj17wmazoXnz5qbrqVatGh5//HFMnjwZ99xzD26++WYUFBQgNzfXr6HIWl5//XV06NABHTt2xAMPPICLLroIJ06cwK+//oovvvjCa66y0vXXX4/Zs2fj0ksvRfPmzbF582a89NJLXsNxGzZsiISEBHz44Ye47LLLUKVKFWRmZhoGPoMGDcLo0aMxaNAglJSUeM1nN9t3vixYsACxsbHo1q2be1XkFi1a4JZbbgFw9suLatWq4f7778eECRMQFxeHDz/8UPNe12+99Rb69OmDNm3a4NFHH0W9evWwd+9eLFu2zCuABs4OzV2zZg2uu+46XHPNNVixYgWaNWuGRYsWYcaMGbjxxhvRoEEDCCGwYMECHDt2DN26dQMAdOzYEXfccQcmT56MAwcO4Prrr4fD4cDWrVuRmJiIhx56yK+2671H7XY7Lr/8cqxevRpffPEFMjIykJycjMaNG5v+/4CIKOKEZ80qIqLQU66KrDZ48GABwGNVZCGEOHPmjHjyySdFdna2iIuLExkZGeKBBx4QR48e9TguOztb9O7d26tceVXk//u//zPVFuVqqrIvvvhCtGjRQsTHx4s6deqIJ554Qnz55ZcCgFi1apX7ODOrIgPQfShXs/3rr7/Eww8/LOrXry/i4uJE9erVxZVXXimefvppcfLkSfdxf/zxhxgwYICoUqWKSE5OFgMGDBDr1q0ztSqyEGdXt73rrrtEenq6iIuLE5mZmeKWW24RBw4ccB+zZ88eMXjwYFGjRg0RFxcnGjduLF566SWPFWblFVlfeuklrzq0+lTp/fffF61btxZJSUkiISFBNGzYUNx5551i06ZNHsctWbJE5OTkiKSkJJGYmCiaNGkipk2b5t5fUlIi7rnnHlGrVi0hSZIAIPLz8/2qx+VyialTp4qsrCxht9tF8+bNxRdffCFycnJMr4o8cuRIzX35+fnirrvuEnXq1BFxcXGiVq1aol27dmLy5Mle/ah87Y4ePSruvvtuUbt2bZGYmCg6dOgg1q5dq9mmf//73+LSSy8VcXFxAoCYMGGCEMJ7VWQl+Xevffv2utdl9jVSk+vdvHmz6NOnj/t9OmjQII/3mBBCrFu3TrRt21YkJiaKWrVqiXvuuUds2bJF8728fv160bNnT5GamiocDodo2LChx8rLWu+5Y8eOifbt24vq1auLvLw88fPPP4tBgwaJhg0bioSEBJGamiquvvpqMXv2bI+6nE6nePXVV0WzZs2E3W4Xqampom3btuKLL77wu+1G79Ft27aJ9u3bi8TERK9VuM38f2D0O0hEFA6SEAbjrIiIiIiIiIgiHOfYEhERERERUVRjYEtERERERERRjYEtERERERERRTUGtmE2depUSJKEUaNGubcJIZCbm4vMzEwkJCSgU6dOXquZEhERERER0VkMbMMoLy8P77zzDpo3b+6x/cUXX8Qrr7yCN998E3l5eUhPT0e3bt1w4sSJMLWUiIiIiIgocjGwDZOTJ0/itttuw7vvvotq1aq5twsh8Nprr+Hpp59G//790axZM8yZMwenT5/GvHnzwthiIiIiIiKiyBQb7gZcqEaOHInevXuja9eumDx5snt7fn4+CgsL0b17d/c2h8OBnJwcrFu3DsOHD9csr6SkBCUlJe7nLpcLR44cQY0aNSBJUuguhIiIiIiIIITAiRMnkJmZiZgY4/xhcXExSktLDY+x2+2Ij48PZhMrNQa2YTB//nxs2bIFeXl5XvsKCwsBAGlpaR7b09LSsGfPHt0yp06diokTJwa3oURERERE5JeCggLUrVtXd39xcTHqZ1dB4UGnYTnp6enIz89ncGsSA1uLFRQU4JFHHsHy5csN36TqLKsQwjDzOnbsWIwePdr9/Pjx46hXrx66VL0FsVKc/w0VAmCm17S4+Fjc9vJ1+PCxZSgrLg93cyo99re12N/WYn9XkPz3y+TfsbiEONw2vXtg/a2qQ5SXQ7LZgJiYs/uM2iWfpz5OuU3ZfvlcjXp9tlFZlvpcvbL0jvP384HG8ZrvcfVxkgS4XGf7Ejj7s1x/TMzZ58rrMkPZB1p9rNdmo2P1jlc/13tPyNdidB3qvle+h5Rt03ovQaO/fZ3n6z2jPsbse8rsfl/Mni+/h4L5edbHe6FclGHlsY+RnJxsWExpaSkKDzqRvzkbKcnamd2iEy7Uv3IPSktLGdiaxMDWYps3b8bBgwdx5ZVXurc5nU588803ePPNN/HLL78AOJu5zcjIcB9z8OBBryyuksPhgMPh8N5RIgFSgFOpXQKIYXBrSkwMEhMTgdIYoIRT10OO/W0t9re12N8V55IDBhN/x2KkivW3og4Jdojickg2ADabd8CgbpdRcOsS7va5SYrgyJ+/0eqy1OfqlaV5nMl+NSpH7z2uPk6KBZwuwBYDIObsz3L9tnP7lNdlhrLPtfpYry1Gx+odr36u956Qr8XoOjz6XvkeUrwnJI3gGdDub1/n+XrPeByj8Z7w9R6p6GdMs+dLJvo2kLoB7TLFuf8LTAbTSVXOPrQ4tb+nIAP8i2mxa6+9Ftu3b8e2bdvcj1atWuG2227Dtm3b0KBBA6Snp2PFihXuc0pLS7FmzRq0a9fO2sb6+weTiIjIX+q/IcH4myL//fIIkgL8YCu3R/5XLkc3e3cu2FIGCi7h+UE8RhFc6WWfYiTvv8O+gjG99quPjVFdg15ZWoGdkiR5blcfI5dvi/HepnW8ul3yNqfrXHZTVZ4txncb1duVr4vyfPW1aF2DzeBjs15Qp+xjdbZPrk8IwBZzNtuv127le0d9PVrBut71KM+T63C6tF+LGNXroHeMJHnvV35xo8VX0Kt+bvS+MSK/b4L9WTVIgbILwvBB/mHG1mLJyclo1qyZx7akpCTUqFHDvX3UqFGYMmUKGjVqhEaNGmHKlClITEzE4MGDK94A9X+2OsNW/MKsLhERBUov6DLLKHOj3Kf8e+fP3z+tYBTQzQJJdrtxOR4HS57DYs1ci7/9Y3S8OvhWfxEgVIGKHBzYYs63WzeLJ53PtsoBlxzcKY9R/6w8xqbqe3Ub5aDOndXVeU3VbZRfO+X1KdsqSWfrdrrOpoBcQjsVJNctB3Hl5WezsvJzvfeYe0i56+zx57YJp/F8S4/r0QrW5f6R2+vrvaL1fpOvSVmXmvra5D5T95GvLLBeG7Seq3/ftN53Vn0eDWI9LrjgMthH/mFgG4HGjBmDM2fOYMSIETh69Chat26N5cuX+xyvb4r6DwIREVE0M/qQqQyIlM/9/fun9aFZDoLMfMA2CszUx+kN+VQHgUZtlTOLymBAqw3Kn206w1g9hk0L44BNbqOyLPlfrTYpz1NflzLwlMuMgWf9ygBOCO9r0BtOrLVPWZ/6SxC9RK0yGBbibJDqFcALz9dVGYjrTRXTe430jpXbr87q+grA9F5z9e+NervmcGqNjK36erTaY2o4seo1MFO21usbgVPsnELAqfP7pLed9DGwjQCrV6/2eC5JEnJzc5GbmxuW9riZ/Q8gAv+jICKiKBSqvyfBHIqoDFw0SPEOCL1bePg7/FIZPBsFWOoyhCLY0WqDfA0e2VT5AlRZZHe54myGUfjIRiqDOr0gXjmsVBlceQWtknd/G2W3tb4o8KhT1S/qhY+0zjcKlNX71cG/VoAoFNcjXABs3kGvVqCvzg7L9cRA+71tKhjW2Kc5L9xHhlV5XVoq8jutly33VbavINroCyS/2xf49RkNOeZQZP9xji1pk+dLmMGgloiIgkHr74mZgNRonqdR2WbrMRoCqaIZ1Jq5BjnrpAxqlENClQGdrzKMjtManu0riyw/5GGzynbp1aM3H1WdiVXP9VQepy5HKyMoSYDT6b2istK5eqQ4VT5HGSw7NYZ9KuepGr3HVHNl3WXJ56tfF/eQbpv366G8Lq2AWN1uNX/ea0YkybvdWnNmzX5eVPef2S+afL2fzdRnViBfgFXwM7ALAk6dBwNb/zGwJW0c/kBERJHAzAdHrcAz2B9QTZanuQCQ0Qdm+UO+1uI48nnKxYZsMfpBljoYUgciWov8aAVuegGMVruNAjGjawbODcdVBPJ6GU6Z3n55tWG96zoXHHvMYfW1mJgy66oO8owyf1rzdtUBudb7VXkLKK05yOrXJ0by7Dt1eb5ovTZ6/S6Xq5WVlTPeRu9j4PwXFEbDktXUi4gZtV/ZT2aCbb0h6maD9CCNAOHiUcHFwJaIiIgqn4qOJtKbX6hHaxVaZdBjNGRSOXxXkrwDPvVCPcpgSB1UaQ3flYMmp8v7HPXcQ/UwY/ma5JVzlYGJVt3qa9MKAJTzbbW+SJeDQb1sqbJ/1PVp9YOyDLluZaZYa7itus+Vx2rVqbVNzt5qZa+1VkbWun2R+v2jbJtextbXFxx6tLLKZgjh+T5Wl6dsqz+/l+oVw40CULmPNYeHw/h6/P2/wmxgTpZjYEtERESVR0UyKb4yN1qBlkz+cB+rGO5qZp6jVjmA93BfX3NKjbJbyqDJaLEg9X5ltlkdnMnt18vuGmXOjEaFyYGbMnhQ1y0HxOoMofpWTOpAVz2HVyt7qBEMe2Th1UOj1XUqX1e9Ptd6n+nN+dQLUPWCT2VmVRk0ax2nx1fgpvUFjl4AqTeE2ezvqV6grC5Db6h+IAG1P+2rIHnxKL0H+YeBLfnm65fbol9+IiKikNL68KsVMBrMO3SdOOF5vj9/I7XmWsrb5UDFV0CsbrO6DD16GVL5XHnhImUfuIR2AK51Ox3FMVKMzXu7si694ETer/xZK8DVCyTVw5WVWWq98gGIsnLP+uQylfWrKYcTqzOocn8afZGi7m/lNuXQaL1h6Vplmhl2rvUeUx+jHiKv7BdlnXI7teYvB2Pov1G23F9aw8At4PLxIP8wsCXf/JmnQEREFE56ixYFQj0kVU0dvLmE931sjYYgq+vS+lfJFuN9nlYQazREVW9YqtYcUrkvjebmKre7h6MaLC4VI0G4nOeP1wvCtdqqN/dUecsdmfJ85ZcCWl9OyEO0Ddrs3i4/lMOh9W4zpBz2rB4SrS5Xb598LcrsudGK1PLxRpTzftVBp7LdMq3gXe+LC/V5ynYB5pMlgYx2CJSyTy3+TKu3cJT8IP8wsL1QMKtKREQXgmAN31PPO/V1LOCdVTXKgqqpAy51+ersqN4taozKVwd/ynKUdcnUw6K1glj1duD8ar/KochGmWatgFmrrXoZOq3ASz5f3V71cfKXBXqLVBm1Wy5fXY/yGMAzY+uRgXV6vj/0vrSQM7Vax2gFjVpzpOVjlUO9ldvUr7v6fas8Tq9PjPap22v0BYu63XrHBEsYEzROYfwg/zCwvVAwq0pERJVdML/EVQ8BVddhFJAogwblojby8WayVlrDPdVBU6DXazQ30Wh4p7Jd6rKUAYtymK68zUxb1PVrzdfUumajVXDV/ahuj7of1cOS1dle5eugrFe94JfcXq17/sr1KedjG400UGeo1f1v1E++AmHldWkFwerX3mhIu9Y+vaHSesG3VjuNjjMrVAmeCpbLocjBxcCWiIiIKgd/vsT19YHUaBisUZ3q52ayjnp1ysGhVqZRHXRotVXrZ61AXKtdRqsOK4cQ65WlDK7UQ3HVAZRWMORrnrBymLTRqsVaZRgFVXrBuHposPJYOQuqHtarHKKsPF/+wkM5j1ZrHqo6i6zVV8qgVPn6mA0M1e1SXp/ecWbKVZ4nDwvXa5PZ39uKJGl8vb+1rsFM0FrBxJELEpw6DxeYlPIXA1siIiK68Pj6QKoVVCjpZdj0spNGWTR1ncrA0Ux7ze4zyrbptUWmdZ9a9X7ldr2hv+o26fWX3rxU+bl8Kxj166RetVgOnOTAUQ6ylK+Fr9dSnTHVCmCV/+oNE1dnitXZYPU16omRvF8PZVlmh87LbVLXpXXbISNa71d1PUaZcflYvS9kgklruDZg7sueEDXH6EH+YWBLRERE0SmYn/yMyjJaoEkvKFDzJ6iUy1fXa3RLH5kySPJ1exi9xaSUQZwySDPK6GkNt9XK0vnKaAPnA0StLwKUZWoFS74CFnlurDJY1WqD+h676uHlyr6RX3Pl7X1c4vxtguS+11sdWB3AawW6Tuf5a5TrUAfk6n7RG+at/FfOGleEVr+r2yM/V/e5MtuslT32xcywfvUXAGbOC5Sf5epla+UH+YeBLREREUUnXx9+9e6t6U9ZyoBEqxxVcCHZ43zXZZY6WFDfIkaL8jqUgZRWBlVrgSbAc16wMoBXZyLVgbEy1aQe8qtHGczJ/wqhHdzJdWsF+C7h/WWDchisOlhWnqtsu7IP9NJmQnF96iHAirJFWbnnysm+Mpta5cjk+bhC43VQUg9H1nu/GI0e8HdIv6/7EhuVbXYost5we19DrvW+UApVNtbPcoMZ2J44cQKtW7dGlSpV8OOPPwIA/vjjD9xwww3o1KkTJk6cCADYuXMnOnTogLZt2+Krr77yq45IF+v7EIpqLgHYFD9bMKyCiIgoIgTjFh5GWSTJO+MlSsus/dBs9LddnuMpHyNJ2ikNdRnKwFXZh+p7uKqzfb6CFrm/tIbQKgMyoyysSwAQnufp9YF8rq8gSM7CKvtKzoiqbyHkdHnPj1WvXixJnhlVdZCvNYRab5/WNq1bC6mvCTgXTHtfsrtOvUyr8jrVr7lWOco6tTL0Ru9RM19Oqe/r7OvzrPL1MSOMn49dQoJLaNett11PQkICFi1ahCeeeMK97YknnsDMmTNRp04d97Zx48Zh1qxZSEtLQ48ePdC1a9fAGh+BmLGt7AIZ1gFwYD8REVUuZv6u+fu3T/2BG4AU7/CvPF/zIdXUGU6tQFDZPvkY5XOjLJrRvEh5v1ZmUS+jJv+rtbqz0VBjeZt6RWrlnFj5GPW8WqN2GQ1rVu9XB+HKTK3ymuRhy/JroWyzVhnK+b5qckZWncFWXovWvFjldrOf9/SCWo33tZtWvVpDiZWU71Gt+wXrvXf02ujr+vy95VcYkz5mMrZFRUUej5KSEs2yYmNjUatWLffzsrIy7N69G4899hi6dOmCdevWAQD279+PRo0aISUlBTVq1MChQ4dCf6EWYWBL2pjZJSKicAjGF6taZZj5u6a1iJCvoZwqoljxodNMnVofwo0+6GuVrZVtNWijz2yX1rlGqyArt6uPUwaOWvNf9bKzyqyjug51YGl0H1l1IKv8Vz3nVSsI1srSywGx3orHyvrlIcjK8+VrUmdxtdqt/BJAef3qDLcyy6puh971qa9L3Sb1UHZ1uWa/lFG33ahMrXN9bQ/k993fL5RCxIkYwwcAZGVlITU11f2YOnWqqbIPHTqEH374AdOnT8e8efMwatQoAIBQvAapqak4cuRI0K8rXDgUmYiIiCJHoF+s6mUStfbr8Sc7pLNdssdBlJfr16nXDmVgpjWcU4+Z4bbq8o2ogzVl5lQZiCqH4GoFQ1rXoRwSLVNnSNUBW4ziPGXfyfu0ygG8h1ErM596Q9R9DZtVX6v62gDv4b9ax2sNJdYaqqz1nlb3gbpMrSHL6uy+1vWphxyb/X1RH691XkWSJcovTPSGF2u99r6GIvub1ZUFediyMBiKLM5tLygoQEpKinu7w+HQPF6tatWquOSSS1C3bl0AZzO65eXliIk5/wY9duwYqlevHmjzIw4DWyIiIooOFZmr5++H0UA+wLrE2Tm2cn1acye1AhT5eKOMmt6QYKN5xIFOR9KqUx1wAvpfBLiEfoAjBx1a9agDI68g10eAL7fJKDBU79Mauq04TrLZIOQVibXqVM61lcsw+vIiRvmz5H1tWmVCdY4e9RBseY6s3vvAaBVpvfepVnuUc7eNXiuzX67oBd5mr8NoKLUes7/vQQxqARguEiVvT0lJ8QhszUpISEDVqlVx/PhxxMbGorS0FLGxsUhPT8euXbuQlpaGI0eOoGbNmhW6hkjCwJbO4+JSREQUyfz5G1XRv2nqIEHvA7tqn6u4BDF2u3d5RsNw1cGBr/Yoz9FbSEhrrqnZTJxenXI56udmMnTugFYAMTbv/UZBrN5zjyAV54cJq4N9dyDuAuRb8CgDUeUwXvk1OFeGO6hVv1Z6K0oD54cDK7OGykBT73VSBrXqYdnKIFjZfqMyfc2RNvPeMXrPaGXV1e9jvayzr7L9oTec359yfQXvIeIUMXAK7V98ZwBJ5V69emHbtm345ZdfMHz4cDz//PO4/vrrUVZWhkmTJgEApkyZgmHDhsHpdOK5556rSPMjDgPbC1Wwh4oQERFFkmD+TVN/MDfYZ0tJhigt9a9cwDjTB2jvM7pnq97feOWKsUb1+bo3qV6bzJ5rtF3vudZQZOU29fBTRUDozrwaDZ/WWgFYa2EkvRWNleVpXYNWwK0uVx1wKxelUrdDq0yjn/Xaq96uXg1ajyQBNnUAblCumfb5EuzMqj9Z8BBwQYJL5xstF/yPbJcsWeK1be3atR7PmzRpgm+//dbvsqMBF4+6UDGIJSIiOs/PRaJ0j9EbtmqmXPXfZr25qGbLVC+Qox4ObVSm2TmIWtdhNK/UV39qZRhlynarr0GSDIM4d+ZVbxErrWBLudCTL5JGO9QZSvWxWvuUlPesNfrCQ++53gJJvl4D9ZBsvbqVKzyr+zAQZn7XfNXh76JQwfg8bKbdZAkGtlRx/IUmIqJwCObfH7PZRB/HCJfL4EA/y9VbNVeLUUbXaE6sv+T5s8D5YMaobf5+MWA2I6w8T/NetybbIV+P8rpkRsON1eTb+mgN/1VfkzI7rwx+9W7/41K9jsrj5f5Xt9/oNTEzxFx5XXr8HabrK9A2G7wbHaPX3lB+VtX7AsMEM7f7IfMY2FZ2VgSdzP4SEVE4+PP3xyizJQvGLUDkFZH16jCiF8AA+vMI5f3qe7mqg0W96/cnCFTeY1Z5jLptysBTmf2U26KuSx34Kevy1VZ1/fJcVq12awV+MZLnQ12P2aynOousF6jqZeWFSzs4V8+NVgaBctArZ+GVZTtd5oJyvfePXnuVx/j7+U9ur1Hg728bZL5uc1SB4NOQmXYakOfY6j3IP+yxyi7QoJNZWCIiCqVQ/p0xM8xX6++jPxlNddDiDsg07rtqli3GOxhUlqGuUxnMqOd9qoNFuWz5uXLOrZpeEKC+f6werayl1lBkdbCmbI/6epTHOl2ewao6uFBn7dS3pVEycy1KWtlFvWDNzPtJbptNtaCW1nBxraBdqw+MftYLKn3NG5Yk4yDO7JdCZocs+3uMUV8bDVevqAqWd3aOrf6D/MPA1mIzZ85E8+bN3Ut3t23bFl9++aV7/9ChQyFJksejTZs21jeUWVgiIgqlUP6d0coK+pOZBLyHu6ppfZD2N+OrHkKqXnRIneHyNSzUaLve3Fq5TDPZRWW71JlYdVZVL4DxtXCQXoCitWiSsi6tDKe6PVq39jFa5EqdddXL/Gl9kSD/q5dt1pvTq0dZh5njlF8GqLPTvoJ7rfeSr1vo6L1ugY6CMNMmX/S+XAmGIH0x50IMnDoPvUWlSB9XRbZY3bp18cILL+Diiy8GAMyZMwd9+/bF1q1b0bRpUwBAjx49MGvWLPc5dq3bBhAREZE5ZubP6n0I9jW0UVmW0wUpLvbsQkV6QzXVt6gxCvTUZRgFFmauUWtFYK2MnR6tbKW6n8zMkTVDa4iyUbZPXb/yX7nP5Vv6qDO4RmXqzYfVaptW/yjvJeurP7RWU1bTGq4sX6P69kJGZSnnJavr9fc1M6or0GBS71ZE/rRF70uIYAhSOca3++HoSX8xsLVYnz59PJ4///zzmDlzJr777jt3YOtwOJCenh6O5hEREUU/rQDDF6MA1myAGSNBlJWfvx+q1gdyrYBZ74N7IB+efQXUZs7Xu92Lv+3RCtSNjtNqu9Ytb9SZWq1rU5alDE7Vt1XSa6PR620UCKvvo6u+bY9eGyUJkG/vopVZ1jpH/QWDXpuVmX75PrPKYNbnFzCS93ZlW4IVLBr9HpgNboP1e2QRl0FmNpDb/VzoGNiGkdPpxP/93//h1KlTaNu2rXv76tWrUbt2bVStWhU5OTl4/vnnUbt2bcOySkpKUFJS4n5eVFQEAIiNj0VcDF/mUIuLj/X4l0KL/W0t9re12N86KpL9MxDs/pZibBAuk7f88TpZJ/hUDilVznMNZUZHq7/VQ3HNZgRVx+r2uVFg60879fYZtMknrddAuQ/wzJyavResRztsBvt8lCHXr/EaefS3P1/6GNUf7PefXl3q9obo/4GgcbmAM+YPdwoJTqF9PXrbSZ8kBPPcVtu+fTvatm2L4uJiVKlSBfPmzUOvXr0AAB999BGqVKmC7Oxs5Ofn49lnn0V5eTk2b94Mh8OhW2Zubi4mTpzotX3evHlITEwM2bUQERERERFw+vRpDB48GMePH0dKSorucUVFRUhNTcXsrS2QmGzTPOb0CSeGtvzeZ1l0HgPbMCgtLcXevXtx7NgxfPLJJ/jnP/+JNWvWoEmTJl7H7t+/H9nZ2Zg/fz769++vW6ZWxjYrKwvdq92GuBjFHN1I/6YrSsXFx2LoW70xe+RilBWX+z6BKoT9bS32t7XY30Fi8u+dJf3tT3YrFJkwWTCHNvvap65flbH16vOKfj7Rmj9sVK56TqpRv/uTnTZzvFb5Whlerbm8gN+vh1d/m82CG2VRKzLcPRCBZtgtVuYqxfKjH5oObN/f0tIwsL3riq0MbP3AcU5hYLfb3YtHtWrVCnl5eXj99dfxj3/8w+vYjIwMZGdnY9euXYZlOhwOzYxueXG5560H/MVA2C9lxeUoO8MPolZhf1uL/W0t9re1gtXfkt0OUVpqfJA/Q0zN/g2Wgwp1wKZXnr9lA4rFhpxnP1uYCfa0gr5zAZq7z9WBlFaApB6GLc8b1Vu0yR++hl2r6zc6XisoNBXwqYavm/3SwI9rLisuR9mpsuB/rlPOxwV8lx/qYfRhVC78+z9EXgFZe1/l7KNQ4jrSEUAI4ZFtVTp8+DAKCgqQkZFhcavOYVBLREQXgorevuPc+aJY++85AM/brpip15/5lcrb1+iVH+jKt0J4BiI2m/aKs1p1qgMYo8WGlIs9SdL5h7zNpbpGM9ejd7sd5X5fCw65VNevDniV2/QWdlLSu5WN8l+919CfY7SYXdDLiD8riGudE6yg1kzb1fffjTAunJ9nq34Y3NyLdDBja7Fx48ahZ8+eyMrKwokTJzB//nysXr0aS5cuxcmTJ5Gbm4sBAwYgIyMDu3fvxrhx41CzZk3069cv3E0nIiKKDoGMNvInY2dwvvt2P1q0Mqn+DvlUZwR9XaeZfjCTQdNbkVirfWYXTlKWq6bM9iqvVet4oyy0MtA02i9TBtLK1Yz1MuFGQ5D1+lVrVWFJAmw6x/v7hYTZL07kY40yrWb6zExdvn6/zAhkxIGv++8GKkgjGo1XRWb+0V8MbC124MAB3HHHHdi/fz9SU1PRvHlzLF26FN26dcOZM2ewfft2zJ07F8eOHUNGRgY6d+6Mjz76CMnJydY0kEOPiYgo2gXj75hRAGMkNvbsUF1/AgJf25WBh1ZGUMnfeZEuoT9+T+vWMr7mVsZoDNk1CpyNXiutTKAcNGvdEsdXn5ga2qtRv/yzv9fiz2rOehlhX7SCUl9zZNX1G71fzAakVgxD1gvwK/rZNRhfhAXI+D62DGz9xcDWYu+9957uvoSEBCxbtszC1mhgUEtERJVRMD78yoyyRuXl3tsDrdtsQKZkFIiYDbZ9ZUjV5+otemS0X/mvUVvVgZCcNTWag6suy1cWVd3eQIMvo75Wl6k1lFuZ6dbqd/UXCOqy1PfLNXrfmH1t9barzzX7HjUzbFnN5GiJaEzOuCDBBe02620nffwqgIiIiCq/in7glQMfHx/mhUtn4Sa1YMxtNCpHa+6n2T4wukYz7TY7PNooyNYLim06H12Vc3CN2uBrqK9LnF+YSj1vVZ5rK/+sngesbqfevFKtdpr5wqHcYGEircBei5k+MjrfzLmBMKpP74sR9c+BDJGW/PziKMjzdeWMrd6D/MMeIyIiItISrA+xWh+cjQJGvWGd/sxTtGLVWTMLM/lblq/96gAW0F6Uy6hM+Xit/cpMtRxEy8GP+ksCcS4I1spCas3HVc9PVrdBqLa5VNeqd5cL5eJWvoYEK+s2opUtVv6rteCVVrvMbNOqz+yxZoJSM8PdzQpyQC+viqz3IP+wx4iIiIi0BPIhVivwMluXXoBmdI4Z6gBJvc8fWsM+jYIos6vSqrOm6rYpM7V6c1nVmVOt4b6SiaBIvjblStPq/VplSKq+Ufe7MourlUnXCnz1hhzL/5oYReBFq82+qPtC2X69L1jMbpOp3ydmg/BwiNR2XeAY2BIREVHlZuZDaDA/qAaa1aloltWfbJjePq15r1r0hgSrz9W6/ZCvTK86YFYPAdZjpv/kLKsRdQCpbqtWwKxVv9bwb2W/GV2LMnOs7jNl+yv6vtXqC7Nl+soOB/qlSTQIUltdQjJ8kH8Y2BIREVHlVtHhin6QbDbfB5kJHgO5/6ZewKXc56s9ZoY/K+dzGg031atTDu58ZWm12qY3RFbreOV5Wu3yNYxWGfRp9a1W++XtWu2S7wesbIdeBll5jDLIjZHO95/8pYE/Q3612qoeBu/viAG9rG8wMq7RFOwGwGUwDJm3+/Efe4yIiIjISnrBo9FiQ3rBi9Z2s9lUrfp9DVtWDtFVnmtmiK+8z2gBJ62+8WeOpD/ZaTNDqfW2qYNiOUhV3gdXLwhWvj5Ol3ZgqDUPVy9Q1GunUUZcr23+BpLqLwCU282WZ3Y4dCXkEjGGD/IPe4yIiIguXMEYghzKRabk7XrBi1ZG1deQXKM5kUaBZEUX4TG7QJS/Q2H9OUZveLH6HGXArtUu9RxT4HxGVnmM+osC9esjB/ryMVpfEKgDT19Dqc0IVibU6L0JmHuNrFjoLEI5IRk+yD8MbImIiCi6VSSw9DXHNJAyKlKW8rxA2mbhsGtNvoYSG9EK5LTK8ycbqLfQlNH5yu3y6sZaC0KZaYfW4k7qQFdZhpkgz6h/Ik0g7zWzX4CYOTbQOiwSzIztiRMn0Lp1a1SpUgU//vije/vevXvhcDjc23bu3IkOHTqgbdu2+Oqrr4J6PeHGwJaIiIiiWzADtQqWJUpLfQ8dDbQNZtvma5ElM+cHeqx6WHOg/alcIVqZzVTON/W3PLlNRrf7kanbb3aFXzNBmd7Q30CG5IbiSwqjwNFM31WkfMD8FyDKn42G6/sqI4ycMMra+ichIQGLFi3CTTfd5LF92rRpaN++vfv5uHHjMGvWLCxbtgzjx4+v+EVEEAa2RERERHr8XZgnNrZicxaVzAyb1aI3PDQYQYlWXf60wdccXvlfrYWbKjJkVYizc1uVQ3l9DZfVCpr0hn+rzwG0FwAzyrbqDTHWa18gC4z5KhPQDxwB79v9VPRLFKuGRGuJgKxtMDO2sbGxqFWrlse2/Px8SJKEevXqubft378fGRkZmDt3Lv73v/+hevXqiI+Px8UXX4x77rkHeXl5Qbm2cGBgS0RERKTHzw/LorTs7A8Vmf8paQRywRh+qg5KfK1m629Qb4YyANEL+sxkZY3aqKS1IJfeIlJ65xu12SU870+rJK9abNTPWs/17ser3q6+lZJWu9Xb9OpWM8oeq8uJhOynr+y7loq0O0hBsVPEGD4AoKioyONRUlJiuvxp06bh8ccf99h2/PhxtGvXDnv37kXLli3xxRdf4MCBA1i8eDHat2+PsWPHYsCAAUG5PqvFhrsBRERERFFHZ5it5GulXSW9+Z/+ZCYrMtxXXrRIHTT7WkDKTJ1GxyiDAq0gXt6uTr8obzMkt0VZpi3m/JxYM+2RzzFapdnXdnnxJ62yzWaa1W3TO159iyU5aPbVfn/mJGu1wWj4tL/8fb+aPd7XPOdgC1KZAhJcOotEiXPbs7KyPLZPmDABubm5Psv+7bffAAAXXXSRx3a73Y4NGzYgISEB119/PRo3bozU1FSkpqaicePGGDZsGDZs2OD/xUQABrZERERUOekFbcrnvs7Ro3OM5HBAlJf72VCtgnQCFnXAp8wkBvJhuyLZS3/K1WqfHBQq98k/a127UYCoF2AaZX6V9UiSd1Cs16dm+tooyNZqh68vAowWoFIfKx8DnJ+XrNxnhtHQa39IqtfY3/erv3VX5IueMFBmZrX2AUBBQQFSUlLc2x0Oh6myv//+e+zYsQM9evTA9u3b8euvv2LVqlVo0KAB/vjjD6SlpeHIkSOoWbOm17mtW7cO4GrCj4EtERERVU6+gjZ/5+SZoReMKrNtZurQO16dSQOMgw+9AFnJKHOpFGiQpz7Xn2DaV11mVyb2xWhYr6/ytNplNqiVzzfqf6NyfN3H18wq0GbqDCTrq64/EHK9Zt6fgHVBbZACaJeQ4BLa5cjbU1JSPAJbI7169cK2bdvwyy+/YPjw4Vi7di0AYOjQoXj88ccRFxeHKVOmYNiwYdizZw86d+6MiRMnapY1YcKEAK4ovBjYEhERUeVkJqjzh4kPs6Ks3Dj7aObDsLLdZgI2o3aZHQYbyPBnMxlNoyy5JHkfa5TpVGYhtfpHXZfW66+V0VTf0kerLL3rU7dBb5uvvvKn/5VZe6sylGYy+f6c7++XCMEIkIPZT0Eqy4kYOHWWPNLbbmTJkiWa22fPnu3+uUmTJvj2228xZswYAMCpU6cwZ84cDBkyxH3MnDlzGNgSERERRYxgfxg2M0w1GHMRjeY3GrUvEBUZvqwXvJnNzmplSYWJ4FIr8JWDPWUwqzWHWA5g5eOcrvPZwFDOIzXTV2bLi7YgL1TvYX/OjdDhyWYytqHy4osvun/++uuvPZ5H6/1tuSoyERERXVjMzjUMZHhrRW7BoizDnzZozbkM5J6oSoFcg6+snr/nKwNc+Xy9YEZv4S2tAFg5rNVsttSfFYXNUGaM5evztXqv1v5gzUHVWu05WCo6TBwIzrVHIBdiDB9WOnPmDABACIHS0lJL6w4WZmyJiIjowhKsD8QaQYJkt0P4cTsOzfICychVZI6nun6t8gDvhYCU28y0z58Mm1623Oh8M20JpG+1VmgG9IdkK7f5apPydZKHRctlaNWlfC4H6Hp1mMkca21XX5evYdRG12j2Nfd1XCUIYrU4hQSnTmZWb3so5OTkoF+/fujbty9WrlyJZs2aWVZ3MDFjS0RERBQIjQ/brpOnAs92aWUXA6W3+I8/56gpgy6t2/T4qqOiQa1yv1Zd/mZetZ5rHe9rnq5Wxlw9N9Sl+leSPI+Tj5WHJ+vdy1ZJfj2Eqj+0yvaHOpj1FQwHYx57uAPXYGao/apWMnxYZerUqWjXrh2+/PJLXHTRRfjHP/5hWd3BxIwtERERUZBINlvwhof6e/9T5XZ1Ni+Yw2f12qbO4vrKwvkaemtUjtnFmdT7/F3ESC8jq9dmmd5K2MpgUG9xq0Cz9jKjFbhDdZsdX+UEK3MLBLbKuC/hDqzDzOFwYPz48eFuRoUxsCUiIiIKFuECYAvOwja+hrEaZdK05pxatYKumRWdtfbrLZKkFYyaCTb9vV7l0FqjodwVGfKsbJPRlwPqY33RWk3aTABeEf4MSTcb4JppX6C3MIpAQsTApXMfW6GzPVgaNGgAYfCa5efnh7T+UGBgS0RERBQssbHaGdJAgkqz8ybNiqYgwJ+VdH2tUG32tZC3aQWIvgJSNb1bEmnNSTWzwrbyOK3jg5WV94evDL7eOYG8hwN931v1ZU6AnJDghM4cW53twbJo0SL3z8XFxfjwww+RmpqKm266KaT1hhLn2Fps5syZaN68uftmy23btsWXX37p3i+EQG5uLjIzM5GQkIBOnTphx44dYWwxERERmab3Ad/Mh2szc2or8iE9WPMI/S1H73j1vFPl3FJ/+ktrTqpRG8wOP1ZvU5fnUgWnyn9DMffY15zXYDIz99jssWpWBbWB1GVGEOfjnn0b6c2xDVo1mpo0aeJ+XHHFFXj55ZexbNky97ZoxMDWYnXr1sULL7yATZs2YdOmTejSpQv69u3rDl5ffPFFvPLKK3jzzTeRl5eH9PR0dOvWDSdOnAhzy4mIiKJEqD8RapWvDmz8PQ8IziI8ynJ9BWd6bdIrT/7X33mzegtZaS2+JC+IZFS2VubUV9/5E+Aog2S9W/HIizypy9eaa6schqvXVjOvg96tpELxfld/WWD0Xgp1oB1pGdcgtsd1biiy3sNKhw4dQmFhoaV1BhuHIlusT58+Hs+ff/55zJw5E9999x2aNGmC1157DU8//TT69+8PAJgzZw7S0tIwb948DB8+PBxNJiIiii7h+KBtJpsWyjmPZuacmm2T0XH+zpv1dYxWkGtGIAsj+bptjUw5FLmir4/WfNBAF8OShxsH+lob0asvmHWQFxckuHSGHOttD5arr77aPcfW6XTi999/xxNPPBHSOkONgW0YOZ1O/N///R9OnTqFtm3bIj8/H4WFhejevbv7GIfDgZycHKxbt46BLRERUSTRuo9tbCxEeXnIytcV6Iqz4Z6D6M+quXrBnHxusIar+loQKRhzRANdRMnXvWUDbaPyWDOLY4X7fVNJhPM+ttOnT3f/HBsbi/r16yMjIyOkdYYaA9sw2L59O9q2bYvi4mJUqVIFn376KZo0aYJ169YBANLS0jyOT0tLw549ewzLLCkpQYnihvBFRUUAgNj4WMTF8GUOtbj4WI9/KbTY39Zif1uL/W2toPe3JAFxEfLaGWUoAesDk3PtCbjP/Q0O9cqQb4WkPD/QFZSD0cZgBokaZZnq70DbYCYIvpC4XMAZPw43GHIc6qHI11xzDQDg5MmTiImJQWJiYkjrs4IkjNZ5ppAoLS3F3r17cezYMXzyySf45z//iTVr1uDYsWNo37499u3b5/GNyb333ouCggIsXbpUt8zc3FxMnDjRa/u8efMqxRuViIiIiCiSnT59GoMHD8bx48eRkpKie1xRURFSU1Nxy9d3wJ5k1zym9FQpPr72Xz7LCtTBgwdx2223Ye3atXC5XOjSpYt7CmS0ipCvFC8sdrsdF198MQCgVatWyMvLw+uvv44nn3wSAFBYWOgR2B48eNDnm2zs2LEYPXq0+3lRURGysrIwe+RixMVo/8JQ8MTFx2LoW70xe+RilBUHaQga6WJ/W4v9bS32t7Us6W9/ho2GaohnsDKSQeB3n5vJghq1N9KzikYZdK1rt53L5KmvSacPdPs7WK+xUf9G4vD3ECpzlfp1vDCYYytCPMf20UcfRU5ODpYtW4ZWrVrhmWeewcMPP4yPPvoopPWGEgPbCCCEQElJCerXr4/09HSsWLECLVu2BHA2u7tmzRpMmzbNsAyHwwGHw+G1vby4HJC4+LVVyorLUXaGH0Stwv62FvvbWuxva5nq71DPPQzFB/5gLagUgvoDfo/LZYby9VAOWQ5m2f7cn9fXMUZfiGgEy2XF5Sg7Vea1XbMOX8dU5Hgz5UVp4Fsu/Hs/y7f20dsXStu3b8eHH34I4Gws0qFDBzz++OMhrTPUGNhabNy4cejZsyeysrJw4sQJzJ8/H6tXr8bSpUshSRJGjRqFKVOmoFGjRmjUqBGmTJmCxMREDB48ONxNJyIiurCZyPpJ8Q6IUv+yNh6C/aE+0FWMQxnUGt3X1p9Vlf3JwgYS4Kvn4eq1Q2ZmsSmzC0bpZam1FtAys8qy3nG+2mFGsIPQigT+USacc2zVnE4niouLLa0z2BjYWuzAgQO44447sH//fqSmpqJ58+ZYunQpunXrBgAYM2YMzpw5gxEjRuDo0aNo3bo1li9fjuTk5DC3nIiIiHypUFAbKRkvrfMDGc4bjqBZvT/QVaXVgWhFhjMHcr3qlY8DWUE5GO2INJUsuA1nxjY5ORn79+9HRkYGTp06heuvvx433nhjSOsMNQa2FnvvvfcM90uShNzcXOTm5lrTICIiIgqfYAWRShX94K91vvLeroD5AMOf4/w531dWz0wG0582mnk9nC7fdSlJkrlzAp0frT5PipJMqL+veRQL531s3377bbhcZ99/d999Ny699FL07ds3pHWGGgNbIiIiomCpyP1DZaFY6Kii8yD9CRTNHqcMZiua3VUGtP5+WRCsOcZmglNlucLkvWy15voaLaill+X15/r1WLEYlNUBbBgD+3BmbC+//HL3z/ICttGOqwoRERERBYkUyD1s9bKVwaTMYlpRn5pWnaEaeq33ZYGyDZKiL4zaps5yBtp3ZoYUa3GJs1lddb2BftFQUZGUSQ3W+ziMmWA5sNV7kH+YsSUiIiIKBpeAgNP/80L9wVor8LM6S+Vr6HBF2mNmiLQkeaZz5Oylv0OQA1nYqCIrTwcrmxzNKvHQ5HBmbCsjZmyJiIiIgiFGgmSzhaduZfbKV3ZPb5tMby5mKJjJZPqTmdMrp6LDu+U2SJL3NqN69fYp70drlmSir6wUisx/KDP7EYgZ2+BixpaIiIgoSIQzgIxtMPgaiguYmx/pEufTHqHKDKqDF1/1BGNOZ6ALMKnbIEwGs77ESN4Lcvnia56tvyp6T91QvDcqcRAbiT744AN8/vnnAIDevXtjyJAhYW5RxTCwJSIiItISQOAg2WzGwW1FF3GqCDOZWzMBsllmh5AGWo+Z1aMDDf7Uwb56m7/lG91r1mwZ6usN5hBuf/ZR0Ajor37sbz78xIkT6Nq1K3bs2IHvvvsO9evXR79+/VBcXAybzYZZs2bhoosuws6dO3HfffehoKAAVapUwdixYwEAM2fORGFhYVQvJMWhyOQpHAtKEBERRaIAPtz7zNhq3YrGDK0sZzgEY1hwRWllT30dG0hbzAzJ9jeoDWSorVGfh2MF4YrsN3vMBSKYQ5ETEhKwaNEi3HTTTQCA2NhYzJo1C9988w3Gjh2Ll156CQAwbtw4zJo1C0lJSahSpQpuv/123H777Vi+fDk+/PDDoF+jlZixJU/8ho6IiCjyBCPLWdHhuIHWG0ib1MGg1gJRyu162Vsz16x3jBBny7WpyvZVprpdyvO1hlVrbVeWI+8zGrqsdf2hGEoejOHJFX0P+rM/1CpYv5nFo4qKijy2OxwOOBwOr+NjY2NRq1Ytj+Pq1KkDAIiLi0Ns7Nmwb//+/WjUqBEcDgdq166NQ4cOoWbNmkhKSkJMTHTnPKO79URERESVUSiyWnpBlV7dVsyv1avDzJBo5faK3KPV6BghjFdGlq9HayEpl9A+X12WmevTq1fmdPk+P1RCmYENxzxff1SwfjMZ26ysLKSmprofU6dO9auOsrIyPPfcc3j44YcBAOLc+zEpKQkJCQk4cuQIAOD48eNITEys0PWEGzO2RERERJVNRRb/UW83M5fVrIouBFWROa3BOFZ9vN5QZzMZZ8B83/q6V6+/83WDKZT1hTsjG2JmMrYFBQVISUlxb9fK1hq57777cP/996Nhw4YA4M7KfvXVVxgwYACqV68OAEhJScHKlSv9voZIwsCWiIiIKFiCcV9WoOIf5oMZDAQa1IZiZV1fx8uBol6gGYrh1XKdyiA1RtJvi5lMs7rdykBYfZ5WoG2lUAaflTioBQAhJAidwFbenpKS4hHY+mPy5MmoX78+br31Vve29PR07Nq1C2lpafj9998xdepUSJKErl27okePHgHVEykY2BIREREFiy1GO5gJt8qU+TK6Fjng01pBWPmvv+UqqY+R61HPgTXzhYCvjK1WXeqh4uF+XcNdfzCE6ffDBUl3VWS97UZ69eqFbdu24ZdffkGfPn0wceJEtG/fHitXrkTbtm0xdepUTJkyBcOGDUNBQQFiY2NRs2ZNCCHw2GOP4YcffsCYMWMqellhw8CWiIiIKFj8vTepv4Jx+xqr2hCqObpmyjUKFs3ehsjXOcEIMOV2+rPIVbQHkqEYTl5RYepTM0OR/bFkyRKP5+PGjfM6pkmTJvj2229x+eWX47vvvkNSUhIA4KGHHkLbtm2jOrDl4lFERERE0cLKD+B6iwJZsaiU3r5g1F2RLwbUbQlGe/z9osDMbYjkciOR2T6z4r0e5j6ShyLrPUIpJibGHdQCQHJyMmw2W0jrDDUGtkRERETBEu3ZNCUrr8WornD2qdkg0gytFZSB8/NxzTI759mo34J5XdEszL+vwbyPrb+aN2+OESNGYNu2bdi2bRuGDx+O5s2bh7TOUGNgS0RERBQskZolC5dI7Q91u/Ta6c/9a32VZTRkWesetEZlV7Rfg7XKNVVIODO2M2bMQGxsLAYPHoxBgwbBbrdjxowZIa0z1DjHloiIiChY5MWjIkEkLBjla96qnoreYsjXtfu6R2xFbksUquHSVr6ekfDeuQAIg8xsqAPb5ORkvPHGGyGtw2oMbImIiIhCKVxBgq/bDgXarmBdj1EZgSz+ZLZs4GzgrLfQl78LS4Wa1qJRFb0fMEUEAf23eqi/HisrK8Mnn3yC3377DeXl5e7tEyZMCHHNocPAloiIiMgfRkFEeTmgXoAlnAFHsOeuhiKA8neVXLP3CTbKvEbiLZn0hGMF6mjpmyjnggQpiLf78cegQYPw119/oVWrVlG/aJSMgS0RERGRP4w+9EfaB8RgB6KhCHgCGeKrXKVY7/xg3NKHKISM5tKGeijyzp07sWPHDkiVaCExLh5FREREFCRSoIFtqD5cBpqV1fo5UGYXagqkPH+vz6hu9TzbcAtFGypREFMZhHNV5IsuughnzpwJaR1WY8aWiIiIKEiE0xngiREQSMmM5nKqBTLftaJZ0UCCWeWKxL7arNxvVRZXDmL96ftARNL7jMKqfv366Nq1KwYMGID4+Hj39pEjR4axVRXDwJaIiIgoWCq6mm+0icahu74WXqpoMB9IMByN/UgVJoTB4lEh/m/kzJkzuPTSS7Fjx47QVmQhBrYWmzp1KhYsWICff/4ZCQkJaNeuHaZNm4bGjRu7jxk6dCjmzJnjcV7r1q3x3XffWd1cIiIiCrULLRi2mq9b5/izwJaZoNUWo7/icqAief5vJLctwoVzju37778f0vLDgYGtxdasWYORI0fiqquuQnl5OZ5++ml0794dO3fuRFJSkvu4Hj16YNasWe7ndrs9HM0lIiIifwQS0AQrqA1mgBFJwUoo2hLofWLNtCOYKy5r3eon1PxdcTpS3idRKJyBrTqJJhsyZEhI6w0lBrYWW7p0qcfzWbNmoXbt2ti8eTOuueYa93aHw4H09HSrm0dERETRKtJXPw6UVW3xp55Q3B+4om0KVjvCcXuhC5RLSJB0AthQLx61ePFi98/FxcX49ttvcfXVVzOwpcAdP34cAFC9enWP7atXr0bt2rVRtWpV5OTk4Pnnn0ft2rXD0UQiIiIyyxZjzbBiddASSRnWSGVVwBmM90Aw2sr3Q8QL5xzbjz/+2OP5H3/8gTFjxoS20hBjYBtGQgiMHj0aHTp0QLNmzdzbe/bsiZtvvhnZ2dnIz8/Hs88+iy5dumDz5s1wOByaZZWUlKCkpMT9vKioCAAQGx+LuBi+zKEWFx/r8S+FFvvbWuxva7G/rRXs/hZl5ZDiovi1syBArjTvcbmvgtFnZssIYD52penvaOByAX7cQedsYKs3FDlIbTKpbt26+PHHH62tNMgkIbhaQbiMHDkSixcvxrfffou6devqHrd//35kZ2dj/vz56N+/v+Yxubm5mDhxotf2efPmITExMWhtJiIiIiIib6dPn8bgwYNx/PhxpKSk6B5XVFSE1NRUXPyvsbAlxmse4zxdjF/vmOqzrEAphyI7nU6sX78eX3/9NTZu3Bj0uqzCr27C5KGHHsLnn3+Ob775xjCoBYCMjAxkZ2dj165duseMHTsWo0ePdj8vKipCVlYWZo9cjLgYLjwVanHxsRj6Vm/MHrkYZcXl4W5Opcf+thb721rsb2uxv3WEMHMbtD6P9uHXFrU/qt/jUfYal7lK/TpenHvo7Qull19+2f1zbGwsGjZs6DU8OdowsLWYEAIPPfQQPv30U6xevRr169f3ec7hw4dRUFCAjIwM3WMcDofmMOXy4nJAiqlQm8m8suJylJ2Jsj8aUYz9bS32t7XY39YKWn9H2QfxcNLtcyv6MNyvk5XDl8/h/ymhVy78699wroq8cuXKkJYfDgxsTSopKcHGjRuxe/dunD59GrVq1ULLli1NBaZKI0eOxLx58/DZZ58hOTkZhYWFAIDU1FQkJCTg5MmTyM3NxYABA5CRkYHdu3dj3LhxqFmzJvr16xeKSyMiIqIgEU4nJK5tUTFWBJyBrIAczGA0GNfIL1CiXxhTtk6nEz/88IN7XR4AuP/++zFz5kzUr18f2dnZoW1ACPB/Xh/WrVuHv//971i4cCFKS0tRtWpVJCQk4MiRIygpKUGDBg1w33334f7770dycrLP8mbOnAkA6NSpk8f2WbNmYejQobDZbNi+fTvmzp2LY8eOISMjA507d8ZHH31kqnwiIiIKH8keF5pVX8KdYays/Ll3bWUNRoP93vL3PrgXMoOMLUKcse3Xrx9++eUXpKamurcVFBTgiSeewH333Yd77703pPWHAgNbA3379kVeXh4GDx6MZcuWoVWrVh4LMf3+++9Yu3Yt/v3vf+OVV17B3Llz0a1bN8Myfa3VlZCQgGXLlgWl/URERGSxUK3JGawgIBzDfCM5iPGnnZF8HRUR7GvifXBNC+ftfnbv3o1ffvnFY9sVV1yBvLy80FYcQgxsDXTv3h3/93//B7tde/GlBg0aoEGDBhgyZAh27NiBffv2WdxCIiIiiiiRHvyYbVtFrkN9XiT0hz+Z2UD3h5qV761Ifx9XEuGcY9ugQQOvbfXq1QtpnaHGVYUMjBw5UjeolZWXl2Pv3r1o2rSpz2wtERERUVQIZoY4ElSGIM3Ka6gM/RUNhGT88MOJEyfQunVrVKlSxX0/2o8++ght27ZFly5dUFBQAADYuXMnOnTogN9//x2ffPIJgLN3U9m2bRs+/fTT4F6fxRjYVtCOHTv8XkCKiIiI6IJgJkCKlOA30rBfKj15KLLewx8JCQlYtGgRbrrpJgBAWVkZXnnlFaxZswaTJk3CpEmTAADjxo3DrFmzYLPZMG3aNJSVlaFVq1YYPHgwHnrooWBfoqUY2BIREREFCzNd/vN3heJgHBMN+F6q/ISPhx9iY2NRq1Yt9/Ndu3ahadOmsNvtaN++PbZv3w4A2L9/Pxo1agQhBNLS0vD555+jQ4cO2LlzZ9TfAohzbImIiIgouEIxR9NsmQwIqRJR3o4HABwOBxwOh8/zjh07hpSUFPdzp9MJ4PxCti6XC6mpqVixYoX7bi1myo1kzNgSERERBcu5D48XvFAEl3KZVmVkK0vmlyKWvHiU3gMAsrKykJqa6n5MnTrVVNnVqlXzCIptNhsAICbmbPjXrl07LFmyBJ999hl69+6NEydOIDY2unOe0d16C/zwww+G+9XLZBMREdEF7NyHRzIh0MAxVBlZdUY40HpCedugaF+tONrbHwo+fg0KCgo8Mq9ms6oXX3wxdu7cidLSUuTl5aF58+YAgPT0dOzatQtTp07FmjVrsHjxYtSoUQMA8N///jewa4gQDGx9+Nvf/gZJkjTvPytvlyT+ghIREREu7KDFX+G6Vr1+DlZ7QnnboGh/f0R7+4PMzO1+UlJSPAJbI7169cK2bdvwyy+/YPjw4Rg1ahRycnIQHx+PuXPnAgCmTJmCYcOGwel04o033vC47Y+vu8FEOga2PuTn54e7CURERBQtbDH+L2cq44d+a4Syny+0LyeoYowWiQrgv5ElS5Z4bRs4cKDH8yZNmuDbb7/1v/AowMDWh+zs7HA3gYiIiIiiAYNa8ot07qG3j/zBwNaEoqIi9xCAJUuWoLy83L3PZrOhd+/e4WoaERERRZJAs7VU+clzihn8kizIGduKOHLkCKpXr47Dhw+759xGG66K7MOiRYuQk5Pjfn7rrbfixhtvdD9uuOEG/Oc//wljC4mIiChiXOgr6V7o128kRmJQS56CeB/binr00UdRUlKCRx991NqKg4iBrQ/vvPMOHnzwQY9tv/76K1wuF1wuF6ZOnYr3338/TK0jIiKiiGK7wD9aMXCzHr9MiF5CMn5YZM+ePejTpw969uyJPn36YM+ePZbVHUwX+P++vv3www9o0aKF7v6ePXti06ZNFraIiIiIIpbTFe4WUKCiNUDklwlRSwjjh1XmzJmD//73v9iyZQvWrVuH2bNnW1d5EDGw9aGwsNBjnPmqVauQlZXlfl6lShUcP348HE0jIiKiSHOhZ2yVQhkoBlK2r3MYIJLVImQo8vjx4xEfH4+vv/4a8fHxmDBhgnWVBxH/9/WhevXq+O2339zPW7Vqhbi4OPfzXbt2oXr16uFoGhEREUUaLh51XigDxUDKZuBKkSZChiIDQOvWrXHllVfiqquusrTeYGJg68M111yDN954Q3f/G2+8gWuuucbCFhERERERUbSThPHDSjfeeCMAoH///tZWHEQMbH148sknsXz5ctx8883Iy8vD8ePHcfz4cWzcuBEDBgzAV199hSeffDLczSQiIqJIIDEreEGI1vm4FFkiZChyZcH72PrQsmVLfPTRR7jnnnuwYMECj33VqlXD/PnzccUVV4SpdURERBRRnC4OeY0mLsFhzRUVaB+S8ZBji4ciVwYMbE3o27cvunXrhmXLlmHXrl0AgEaNGqF79+5ISkoKc+uIiIiILFLZghi9a6ls1xlK7KfAGWVmmbH1GwNbkxITE9GvXz+PbS6XC1988QXee+89LFy4MDwNIyIioshhi6ncC0hdKEHMhXKdFF5hDGwnTpxouD8aV0bmHNsA7Nq1C2PHjkXdunVxyy23hLs5REREFCmCFdRyDicRhdCpU6dw6tQp/Pjjj/j000/dz+VHNGLG1qQzZ87g448/xnvvvYfvvvsOTqcTr776Ku666y5UqVIl3M0jIiKiSBCsIazMGBJVfmHM2L744osoLCzEtddeC0mS0LNnT3Tu3Dm0lYYYM7Y+bNy4Effddx/S09Px5ptvYsCAASgoKEBMTAy6du3qd1A7depUXHXVVUhOTkbt2rVx44034pdffvE4RgiB3NxcZGZmIiEhAZ06dcKOHTuCeVlEREQUCjZ+tCIik8J4H9vDhw/juuuuw6RJk7B8+XI8/PDD2LlzZ0jrDDX+7+tDu3btkJSUhI0bNyIvLw+PPPII0tLSAi5vzZo1GDlyJL777jusWLEC5eXl6N69u0fK/8UXX8Qrr7yCN998E3l5eUhPT0e3bt1w4sSJYFwSERERhUplnl9Lxjh8nPwUzvvY9uzZE48//jj69++PzMxMzJs3DwMHDgxtpSHGocg+dOnSBe+99x4OHjyIO+64A9dddx2kCtyjbunSpR7PZ82ahdq1a2Pz5s245pprIITAa6+9hqefftp9g+Q5c+YgLS0N8+bNw/Dhwyt0PURERBRCXE33wsXXnfwVxqHId911F+644w7388svvxwvvfRSaCsNMWZsfVi+fDl27NiBxo0b44EHHkBGRgYeeeQRAKhQgCs7fvw4AKB69eoAgPz8fBQWFqJ79+7uYxwOB3JycrBu3boK10dEREQhxOCGlKzI4jJTTAG4//77vbZdd911YWhJ8DBja0JWVhbGjx+P8ePHY8WKFXj//fcRGxuLvn374qabbsJNN92EK664wu9yhRAYPXo0OnTogGbNmgEACgsLAcBruHNaWhr27NmjW1ZJSQlKSkrcz4uKigAAsfGxiIvhyxxqcfGxHv9SaLG/rcX+thb721rsb+tV6j6PwIx9pe7vSONyAWfMHy5Bf8hxqN9FNpsNQghIkgShmELhcrlCXHPoSEJwMkggjh49ig8++ADvv/8+fvjhBzidTr/LGDlyJBYvXoxvv/0WdevWBQCsW7cO7du3x759+5CRkeE+9t5770VBQYHXUGZZbm6u5v2o5s2bh8TERL/bRkRERERE5p0+fRqDBw/G8ePHkZKSontcUVERUlNTkf3C84iJj9c8xlVcjD1PPe2zrIq0VVZcXIyPP/4Yx44dw1NPPRX0uqzCwDYItmzZ4nfG9qGHHsLChQvxzTffoH79+u7tv//+Oxo2bIgtW7agZcuW7u19+/ZF1apVMWfOHM3ytDK2WVlZ6F7tNsTF2P28IvJXXHwshr7VG7NHLkZZcXm4m1Ppsb+txf62FvvbWkHv7wjM2EWaiHyPV+LXLSL7u5Iqc5Vi+dEPzQe2U30EtmNDF9hqad26NTZs2GBJXaHAMQlB4E9QK4TAQw89hE8//RSrV6/2CGoBoH79+khPT8eKFSvcgW1paSnWrFmDadOm6ZbrcDjgcDi8tpcXlwMSp1Jbpay4HGVn+EfDKuxva7G/rcX+tlbQ+rsSB0jBxve4tdjfoVcu/OzfMC4epZzi6HQ6sWXLFhw+fDi0lYYYA1uLjRw5EvPmzcNnn32G5ORk95za1NRUJCQkQJIkjBo1ClOmTEGjRo3QqFEjTJkyBYmJiRg8eHCYW09ERERERMFgdFufUN/u56qrrnLPsS0pKYHT6cRnn30W2kpDjIGtxWbOnAkA6NSpk8f2WbNmYejQoQCAMWPG4MyZMxgxYgSOHj2K1q1bY/ny5UhOTra4tUREROQXZmuJyKwgZWxdLheGDRuG33//HZIkYdasWdi0aRNee+01JCQkYM6cOcjKyvI45+DBgx7Ply5diq+//hrXXnutf9cQQRjYWszMlGZJkpCbm4vc3NzQN4iIiIiIiKwXpMB227ZtKCkpwdq1a7FixQq8+eabWLduHdauXYu8vDxMmjQJ77zzjmEZPXr0wLhx4zBlyhTzFUcYBrZEREREREQWMzMUWb6Fp0xrXR357ipCCBw7dgy1atVC06ZNYbfb0b59ezz++ONe5a9Zs8b9s9PpxObNm1FWVlaBqwk/BrYmHT58GOPHj8eqVatw8OBBr3s8HTlyJEwtIyIiIiKiqCOksw+9fYDXEOIJEyZ4jeqsWbMmYmJicNlll6GkpAQffvihx1BjrduSPvHEE+6fY2Nj0aBBA3z88ccBXkhkYGBr0u23347ffvsNd999N9LS0iBJnENDREREKpIE8E6KRGSGiaHIBQUFHrf70boLyrJly5CQkICff/4ZW7ZswbRp05CUlOTeb7PZvM7ZuHFjRVoekRjYmvTtt9/i22+/RYsWLcLdFCIiIopUDGqJyCQzQ5FTUlJM3ce2WrVqAICqVavi0KFD2LNnD0pLS5GXl4fmzZsHq8kRjYGtSZdeeinOnDkT7mYQEREREVFlEKTFo7p3745//etfyMnJQUlJCV555RXs3bsXOTk5iI+Px9y5c02V8+CDD+LNN980X3GEYWBr0owZM/DUU09h/PjxaNasGeLi4jz2m/kmhYiIiCo5DkUmIovZbDbMmzfPY1u7du0wcOBAv8qJjY3F/fffj7fffjuYzbNMTLgbEC2qVq2K48ePo0uXLqhduzaqVauGatWqoWrVqu7UPxEREV3gGNQSkVni/HBk9cOfjG2wvPbaazh69Cjuuece6ysPAmZsTbrttttgt9sxb948Lh5FREREREQVE6ShyME0ffp0NGrUCP/85z/D04AKYGBr0o8//oitW7eicePG4W4KERERERFFuwgLbE+fPo3+/fvj5Zdftr7yIOBQZJNatWqFgoKCcDeDiIiIiIgqAb1hyEarJYfSLbfcgrvvvhsjR460vvIgYMbWpIceegiPPPIInnjiCVx++eVei0ddKMtoExERERFR5XPjjTdG7fxagIGtabfeeisA4K677nJvkyQJQghIkgSn0xmuphERERERUbSJsKHI0RzUAgxsTcvPzw93E4iIiCjSuQQQwwUmicg3oyHHoR6K/Oeff6JOnTpe20tKSgAADocjtA0IAc6xNSk1NRXZ2dmaj7KysnA3j4iIiCIBg1oi8ofQeYRYVlYWXnnlFa/tq1at8vv+t5GCga1JvXr1QnFxsdf2X375BZ06dbK+QUREREREFL30gloLgtuGDRti3rx5mD59usf2Hj164Keffgpt5SHCwNakatWq4cYbb0R5ebl7208//YROnTphwIABYWwZERERRQze556ITArnqsjJycn4+uuv8fHHH2PKlCke+xITE0NbeYgwsDXpk08+walTpzB48GAIIfDjjz+iU6dOGDRoEF5//fVwN4+IiIgigQjDii9EFJ3CmLEVQiA1NRVff/01Fi9ejCFDhmDz5s146623kJ6eHtrKQ4SBrUnx8fFYtGgRdu3ahZtvvhnXXnst7rzzTs2x6URERHSBcjGwJSJzwpmxjY+PB3A+c1urVi0MGjQIn3/+Od5+++3QVh4iXBXZQFFRkcdzSZLw0UcfoWvXrhgwYACeffZZ9zEpKSnhaCIRERFFEi4eRURmhfF2P+vXr3f/HB8fj+nTp3vNt402DGwNVK1aFZLGXBkhBN5++2384x//4H1siYiIiIjIfxF2H9tox8DWwKpVq8LdBCIiIoomksR5tkRkSjjvY1sZMbA1kJOTE+4mEBERUTRhUEtEZjFjG1RcPMrA3r17/Tr+zz//DFFLiIiIiIiIKm7Dhg3hbkJIMLA1cNVVV+Hee+/Fxo0bdY85fvw43n33XTRr1gwLFiwwVe4333yDPn36IDMzE5IkYeHChR77hw4dCkmSPB5t2rSpyKUQERGRFXgfWyIyK0y3+3n++efRsGFDjBgxAsuXL0d5eXnoKrMQhyIb+OmnnzBlyhT06NEDcXFxaNWqFTIzMxEfH4+jR49i586d2LFjB1q1aoWXXnoJPXv2NFXuqVOn0KJFCwwbNgwDBgzQPKZHjx6YNWuW+7ndbg/KNREREVEIcSgyEZkUrjm2n3/+OU6fPo2lS5fiX//6F+6//360bdsW/fr1Q8+ePZGUlBS6ykOIga2B6tWrY/r06Zg8eTKWLFmCtWvXYvfu3Thz5gxq1qyJ2267Dddddx2aNWvmV7k9e/b0GQQ7HI6ovTkyERERERH5EMY5tomJiejfvz/69+8Pp9OJVatWYeHChXjqqadw6aWXYtGiRaFtQAgwsDUhPj7e/cJbZfXq1ahduzaqVq2KnJwcPP/886hdu7Zl9RMRERERUehEyqrINpsNXbt2RdeuXQEAeXl51lUeRAxsI1DPnj1x8803Izs7G/n5+Xj22WfRpUsXbN68GQ6HQ/OckpISlJSUuJ8XFRUBAGLjYxEXw5c51OLiYz3+pdBif1uL/W0t9re12N/WY59bi/1tIZcLOOPH8RG6KvJVV10VvsorQBKCk0HCSZIkfPrpp7jxxht1j9m/fz+ys7Mxf/583axxbm4uJk6c6LV93rx5SExMDFZziYiIiIhIw+nTpzF48GAcP34cKSkpuscVFRUhNTUVl42YApsjXvMYZ0kxfpoxzmdZstWrV2PSpEkoLy/H6NGjUVxcjNdeew0JCQmYM2cOsrKyAr6uaMGvbqJARkYGsrOzsWvXLt1jxo4di9GjR7ufFxUVISsrC7NHLkZcDBeeCrW4+FgMfas3Zo9cjLLiyrGyXCRjf1uL/W0t9re12N/WY59bi/1tnTJXqV/HS+ceevvMKi4uxssvv4wvv/wSdrsdZWVl6NChA9auXYu8vDxMmjQJ77zzjl9ti0YMbKPA4cOHUVBQgIyMDN1jHA6H5jDl8uJyQOJdnaxSVlyOsjP8o2EV9re12N/WYn9bi/1tPfa5tdjfoVcu/OzfIA1FXrduHRISEtCnTx8kJibiiSeeQNOmTWG329G+fXs8/vjjuucWFRXh9OnTqFmzJmJjozs0ZMQTBidPnsS2bduwbds2AEB+fj62bduGvXv34uTJk3j88cexfv167N69G6tXr0afPn1Qs2ZN9OvXL7wNJyIiIiKioJAXj9J7AGcDT+VDuaaO7MCBA8jPz8cXX3yB++67D7m5uR7Dl51Op8fxe/fuxRNPPIEGDRogLS0Nl112GVJSUtC1a1f83//9X0ivOZQY2IbBpk2b0LJlS7Rs2RIAMHr0aLRs2RLjx4+HzWbD9u3b0bdvX1xyySUYMmQILrnkEqxfvx7JyclhbjkREREREQWF8PEAkJWVhdTUVPdj6tSpXsVUrVoVHTp0gN1uR5cuXbB161b3QrLA2VWPZQsWLMDNN9+M+vXrY+XKlThz5gyOHj2KI0eO4Omnn8ayZcvQoUOH0F1zCEV3vjlKderUCUZrdi1btszC1hARERERUVj4GHJcUFDgkX3Vmnp49dVX47XXXgMAbN26Fd27d8fOnTtRWlqKvLw8NG/e3H1smzZtsGHDBq8y4uPj0blzZ3Tu3BkFBQWBXUuYMbAlIiIiIiKymJn72KakpPhcFblGjRq44YYbcM011yAmJgbvv/8+Nm7ciJycHMTHx2Pu3LnuYzMzM322K1pXUGZgS0RERBQskgTwTopEZEYQ72M7cuRIjBw50v28QYMGGDhwoO7x+fn5mDZtGn777TeUl59f9GrVqlX+VRxBGNgSERERERFZzEzGNlRuvfVWdO/eHb179/aYgxvNGNgSERERBQuztURkVhAztn5XLQQmT54c2kosxlWRiYiIiIiILGbmdj+h0rhxY+Tn54e2EosxY0tERERERHQBOXz4MK644gp06NAB8fHx7u3RfB9bBrZERERERERWC+NQ5EGDBmHQoEGhrcRiDGyJiIiIiIisFsbAduDAgYiNjUVMTOWZmVp5roSIiIiIiChKhHOObdeuXd1zbAcMGICqVavin//8Z2grDTEGtkRERERERFYTPh4hdOzYMTRs2BBbtmzBiRMn8PPPP+PVV18NbaUhxqHIREREkUiStLfxdjJERJWCJAQknf/T9bYHi3zv2tWrV+P6669Heno6HA5HSOsMNQa2RBR6Wh/QZfyQTnSe+ndFL7gF+LtDFAn4ZRNVRBjn2NauXRtjx47Ff/7zHyxZsgROpxNOpzO0lYYYhyITUehIEqTYWEg2m+cjJsb9gCQZB75EFxLlB2QhjB8UmVx8bS4oRr+LWl9U8e8dKYRzju2sWbNQVlaG559/Ho0aNUJJSUnUz7FlxpaIQkuI83/Izw17UX4QkM592y1cLnNlERFFshgGLheUYGRslWVUpDxmj6NPGDO2mZmZmD59uvt5YmIirrrqqtBWGmIMbIkodISAUA5rUfws2e1nf4iJAVyus9lbrSKUAa8cIF9I33grh51q/axF+cFGPWxV/uCj/Fd9jlFbzPQ9P1gR0YXC6P879T69Y9UjNeiCYZSZDXXGtjJiYEtE1lH8wRYlJQAAyWYDYmN1v62W5CyvHOCqgzatQCsY33xrUX+rbsRX8BlI3Vo/+zrW6HwzZQZyHWaPVwbYZs6trB/4zMyr1TuvsvYJEdGFIIwZ28qIgS0RhZVwuc6v/KcOuGKk8/PVbLazH+JdLkhSzLnDzs7T9RrG7E8Q6G6IRgCnFXD5E3QEmea1RrNgZuAr44JKWl/caH0ZQEQE8P+FKMSMbXBx8SgiCi8hIEpLzz9KSs4/zhS7t6O8/OzxcoZX/lljQSpfDwBez9UBhMfCVuf26Q2XNlOP+mdleVpt0iorbEGt1oJGwSzTzLFax6teH69tegFzOIeyB5qdBc73gfpfIiKKTmG8j63aqFGjAAAPPfSQtRUHETO2RBQ5tD6on9smnE73HF1hOxfgyYHeuXm67n991OEOWs+VbRSwmtnmUbxivrBWECsHp1pBrVYZ/gazvtrnd5nBWsXT7LxgM/duNRo2rXdOpDAz587XMcrrj8RrJCIi0yIlM9umTRuMHTsW7du3D3dTAsbAloiijzvYLYcoc54PVM0EbHLA4yMgEBoLWmlt09qnd66vupTH6B6vtfDTOWbapj7eV5DrdV6gQa6vTKWZcv2ZW+zPgi6RSu99Gi3tJyIiY0afRyz8v/6uu+5CeXk5PvzwQwwePBjLli3DrFmzLKs/WBjYElF0k1de1gr2VM+Ngk11kKfOoLrr8hGguoNuHwGjer/X8XpZOvW/ygW5DG6srtUeM5nbs8eEd9aKP18SnD9J/33g3kZERBRGkTLH9v3338eMGTPQoUMHlJeXY8SIEdZVHkScY0tElYNy3qHyG1DFc+F0ejzU+zSP1brdkLJcZd3KY5TtCOShdX1mjjPqn4q2x5/9QSRcLvcD8J6r7JPe8OYL6bZRREQUeSJoju2xY8dw33334fDhw9ZWHETM2BIRGfEVpHEhn7PMzB0NhCr41Bq2Hczyz1d0gb+eRER0QRk3bhwA4Nlnnw1zSwLHjG0YfPPNN+jTpw8yMzMhSRIWLlzosV8IgdzcXGRmZiIhIQGdOnXCjh07wtNYIqJw0sgOq7PrQc2Oy9vUqyvrPYiIiAIkuYwf5B8GtmFw6tQptGjRAm+++abm/hdffBGvvPIK3nzzTeTl5SE9PR3dunXDiRMnLG4pEUW8SLylTbTRCnL9CYoZ5BIRUSAiaChyZcDANgx69uyJyZMno3///l77hBB47bXX8PTTT6N///5o1qwZ5syZg9OnT2PevHlhaC0RRTSthZFkDLasEYJ5xUREVPnJi0fpPfz173//G7Vq1QIAfPTRR2jbti26dOmCgoKCILc8MnGObYTJz89HYWEhunfv7t7mcDiQk5ODdevWYfjw4WFsHRFFJN4ShoiIKPoE8XY/LpcL//nPf5CVlYWysjK88sorWLt2LfLy8jBp0iS88847HsevWbNGs5ycnBy/6o0kDGwjTGFhIQAgLS3NY3taWhr27Nmje15JSQlKSkrcz4uKigAAsfGxiIvhyxxqcfGxHv9SaLG/rcX+thb721rsb+uxz63F/raQywWcMX+4mdv9yJ/pZQ6HAw6Hw+v4efPm4aabbsLLL7+MXbt2oWnTprDb7Wjfvj0ef/xxr+OfeOIJ98/FxcX45Zdf0KRJE2zdutX8BUQYvsMjlKReCVQIr21KU6dOxcSJE722D32rNxITE4PePtI29K3e4W7CBYX9bS32t7XY39Zif1uPfW4t9nfonT59GssHf2j+BKO5tOe2Z2VleWyeMGECcnNzPbY5nU58/PHHWLhwIV5++WUcO3YMKSkpHvvVNm7c6PF8+/btmDFjhvm2RyAGthEmPT0dwNnMbUZGhnv7wYMHvbK4SmPHjsXo0aPdz4uKipCVlYXZIxcjLsYeugYTgLPfgg59qzdmj1yMsuLycDen0mN/W4v9bS32t7XY39Zjn1uL/W2dMlepX8ebydgWFBR4BKla2doPPvgAt9xyC2LO3eO9WrVqHplem83msy2XX345/vvf//rR+sjDwDbC1K9fH+np6VixYgVatmwJACgtLcWaNWswbdo03fP0hiWUF5cDEtcIs0pZcTnKzvCPhlXY39Zif1uL/W0t9rf12OfWYn+HXrnws39NzLFNSUnxCGy17Ny5E1u3bsUHH3yAXbt24Z133sHOnTtRWlqKvLw8NG/e3OucOXPmuH92Op3YsmUL7PboToYxsA2DkydP4tdff3U/z8/Px7Zt21C9enXUq1cPo0aNwpQpU9CoUSM0atQIU6ZMQWJiIgYPHhzGVhMRERERUbCYydiaoUx+tWrVCq+++irmz5+PnJwcxMfHY+7cuV7nLF682P1zcXExtm3bhi+//NJ8pRGIgW0YbNq0CZ07d3Y/l4cQDxkyBLNnz8aYMWNw5swZjBgxAkePHkXr1q2xfPlyJCcnh6vJREREREQUTCbm2Ppr06ZNAICBAwdi4MCBusd9/PHHHs/379+PRx99FPPnzw+s4gjAwDYMOnXqBGGwhLckScjNzfWaGE5ERERERJVDsDK2wZCRkYHt27dbW2mQMbAlIiIiIiKymkucfejtCyHl3VScTic2b96MunXrhrTOUGNgS0REREREZLUQDEX2pbi4GPHx8Th16pR7W2xsLPr16xf16/kwsCUiIiIiIrKYBIOhyCGqs3Xr1vj+++/x4osvhqiG8GFgS0REREREZDUTt/sJtlq1auHBBx9Ey5YtERurHwoOGTIkJPWHEgNbIiIiIiIii4Vj8aj58+dj5syZWLNmDYqLizWPEUIwsCUiIiIiIqLIVLNmTTz77LPhbkZIxIS7AURERERERBcc4eMRAgcPHvR5zIEDB0JTeYgxsCUiIiIiIrKYJIThIxRWrlyJa665BnPnzvUIcsvLy7FhwwaMGjUKffv2DUndocbAloiIiIiIyGouH48QGDhwIP75z39i/fr1aN68OWrUqIE6deogNTUVjz32GFq2bIl169aFpvIQ4xxbIiIiIiIiixllZkOVsQWASy65BDNnzsTMmTOxf/9+nDlzBhkZGUhISAhZnVZgYEtERERERGQ1o7m0oYtrPWRkZFhTkQU4FJmIiIiIiMhq8n1s9R4h8M9//hP/+Mc/cPr0ac39W7duxc033xySukONGVsiIiIiIiKLheM+trfddhteeOEFNGnSBE2aNMFll12G+Ph4FBYWYv369UhLS8PUqVNDU3mIMWNLRERERERktTBkbBMSEjBx4kT8/PPPePDBB1GzZk3YbDa0bt0aX3zxBVatWoU2bdqEpO5QY8aWiIiIiIjIYpLr7ENvXyjFx8ejV69e6NWrV2grshADWyIiIiIiIqsZZWZDuCpyZcXAloiIiIiIyGoRsCpyZcLAloiIiIiIyGLhuo9tZcXAloiIiIiIyGocihxUDGyJiIiIiIisJgDoLRLFuNZvDGyJiIiIiIgsxqHIwcX72BIREREREVlNwOA+tuaL2bx5Mzp27IicnBzccsstKCsrw0cffYS2bduiS5cuKCgoCNklRBIGtkRERERERFbTDWoN5t5qqFOnDpYtW4Y1a9bg4osvxsKFC/HKK69gzZo1mDRpEiZNmhTCi4gcDGwjUG5uLiRJ8nikp6eHu1lERERERBRh0tPTkZiYCACIi4vD//73PzRt2hR2ux3t27fH9u3bw9xCa3CObYRq2rQpvvrqK/dzm80WxtYQERERXSAkiSvSkjVcACSDfQCKioo8NjscDjgcDs1T9u7di6+++gpTpkzBX3/95d7udDqD0NjIx8A2QsXGxjJLS0RERBQq5wJYKTYWwul0B7NSXNzZ/S4XRHl5GBtIlZ2ZxaOysrI8tk+YMAG5ublexxcVFeGOO+7ArFmz4HQ6PQLiCyVBxsA2Qu3atQuZmZlwOBxo3bo1pkyZggYNGoS7WURERESVjhTvgCguAYSAKCs7uy2GM/YoxEzcx7agoAApKSnuzVrZWqfTidtuuw3jx4/HJZdcgrKyMuzcuROlpaXIy8tD8+bNQ9L8SMPANgK1bt0ac+fOxSWXXIIDBw5g8uTJaNeuHXbs2IEaNWponlNSUoKSkhL3c/lbmtj4WMTF8GUOtbj4WI9/KbTY39Zif1uL/W0t9rf1IrPPy4F4raxWJLUxMJHZ35WUywWc8eN4E4FtSkqKR2Cr5eOPP8a6detw4sQJTJo0CQ888ABGjRqFnJwcxMfHY+7cuX40KnpJQnASQaQ7deoUGjZsiDFjxmD06NGax+Tm5mLixIle2+fNm+eeTE5ERERERKFx+vRpDB48GMePHzcMRouKipCamoprL3sMsTbt+bLlzhJ8/dPLPsui8/jVTRRISkrC5Zdfjl27dukeM3bsWI+gt6ioCFlZWZg9cjHiYuxWNPOCFhcfi6Fv9cbskYtRVsz5OKHG/rYW+9ta7G9rsb+txz63FvvbOmWuUv9OMLF4FJnHwDYKlJSU4KeffkLHjh11j9FbIa28uByQOEfEKmXF5Sg7wz8aVmF/W4v9bS32t7XY39Zjn1uL/R165cK//jWzeBSZx4gnAj3++ONYs2YN8vPzsWHDBtx0000oKirCkCFDwt00IiIiIiIKBnmOrd6D/MKMbQT6448/MGjQIBw6dAi1atVCmzZt8N133yE7OzvcTSMiIiIiomBwCUDSCWBdDGz9xcA2As2fPz/cTSAiIiIiolAysSoymcfAloiIiIiIyHJGQ44Z2PqLgS0REREREZHVmLENKga2REREREREVnMJ6GZmOcfWbwxsiYiIiIiIrCZcZx96+8gvDGyJiIiIiIisxqHIQcXAloiIiIiIyGocihxUMeFuABEREREREVFFMGNLRERERERkNQ5FDioGtkRERERERFYTMAhsLW1JpcDAloiIiIiIyGrM2AYVA1siIiIiIiKruVwAdG7r4+LtfvzFwJaIiIiIiMhqzNgGFQNbIiIiIiIiqzGwDSoGtkRERERERFbjfWyDivexJSIiIiKKRJJ0/kebzXC/qeLMlqHa5j5PuV2SjOs/t0+zTgIACOEyfJB/GNgSEREREUUgn0Gh0XBVk0GvFBPjfayqXOF0erfHaBjtuXIZ1PogxNnMrNaDQ5H9xsCWiIiIiCgCifLy8z+fCy4NyVlSu127PGUZ544VytV3fQTDyvZ4VOtwaNYlXC5z7b5QyV8O6D3ILwxsiYiIiIgqwo8hwSHNYgpxti1Op3dgpJeVVQZR/gZTcnBcUqLfHtLnchk//PD444+jY8eOuO2221BaWhqiBkc2BrZERERERIGSJECIswGriQDXbAbTVACsU5+7DuV+X0Gmn/N13WUGcl6grKzLCkHK2G7duhWFhYVYu3YtmjRpgv/85z8hbHTkYmBb2ckT+wN56J2vVQcRERFVHvzb7jetYDIo2Vm9z17qhZzcDTkfEEkxMYqfKzBfV68dUSJS5/oKl8vwYdb69evRvXt3AECPHj2wbt26UDU5ovF2P6RP7z8wo+BW/uZO/s9R62flNqso65ZxeAwREZEmKSaGcyMDpOw7f4IT9ecj+Vz1ayE5HEB5+fnjbTYIo6Gn8nBh4dL/DKbernVcJNxvVd1GeZtWexXbvN7L4fgsqkUY3O7nXPuKioo8NjscDjhUc5qPHTuGzMxMAEBqaiqOHDkS9KZGA2ZsKbjUmV6tn5XbrHoo61a21Vc2moiI6EIjSWcDgcr4d1Hrs4D8o83mNZzYK9MnL84UG3t+n9YKwsqgyyydY9VBmSgpcS/KJJxOiLIy/SJdLkiS6uO+1usqD6XGuYWnghH06Xy20upTd7+fe7jbotqu+ZnOYJvXbYqMPhdWVCDl6a2ILD8AZGVlITU11f2YOnWqVzHVqlVzB8DHjh1D9erVK3Qp0YoZW4puWplhJb3/ZCLhWzoiIqJIozeyySg75itzFmg7zAyN1fscoP77r7c4kuJ8d3ZVmTVVZ/rkDKDO6sABL8LkL2X5PuryuB+qwbHC5YJksxkGyh58ZEk161P2s3w8vDPbwuXyPlZ1jm6bVOVIsbEQTqf7X9NluQvRmEesNULRbHnqcqCT1T9XdkFBAVJSUtyb1dlaAGjTpg1efvll3HnnnVi2bBnat2/vXzsqCWZsI9iMGTNQv359xMfH48orr8TatWuDX4nVAV4w6tOaUG80PIbLpxMRUbTQmzdpZpvR6CONUUpe2Sxfzv39lOdtKjNv7rmcwQpqtZ77EzT4u9JvZf184Gc/CJer4v1QkaxoCF4H97DwQIfWB/o7FyQpKSkeD63AtmXLlkhPT0fHjh2xc+dODBgwIKRtilTM2Eaojz76CKNGjcKMGTPQvn17/OMf/0DPnj2xc+dO1KtXz3xB8th9o7ml0RjcBrMcIiIiPYEMKZWHQcbEnM0YnftXuc19qHyMzeb+4C3ZbJrnKYcIy/Mu3ceeCzLlDJW7zRoZODkIlbNicp3u8tXXrrz+c8fI9cpZK/c1KNrtca16o6rMZGZ9ZVv1yjfa7quuSBCOtlR06LQyu6m3Hzj/uutl+Y3mzWrVr96uN4ogwobXC5eAkHSGoPv5+k+fPj0YTYpqzNhGqFdeeQV333037rnnHlx22WV47bXXkJWVhZkzZwZWILOVRERUGfiYZ6eck6f+WfsRe+68WEh2+9ltsbHnHzExZ7fJ++SHvP9cHcqfAe2gFoBXoOc+RhFQyseoz/M6RjFUU7mKqnvupct1PqhVfA6Q9yt5lK8MJNWfHRTt9ghMztUrl6/81+eoKrPbKbSCmXgw+2WFUb1G7w2tQFidndb6QkRvJF8wH/4QLuMH+YWBbQQqLS3F5s2b3ct2y7p3737BLt9NRERRyo+hsXrDZT0WkAHcwSaAswGnHICey1TK++Usom5wCUC4zgdzoqzMc1EeOTg8t0CPXvCotYKwVnAq16P+oO0RJCqP8edDs96xZgLLQD6YawULROQX4RKGD/IPhyJHoEOHDsHpdCItLc1je1paGgoLCzXPKSkpQUlJifv58ePHz/7gEIDEb3xCzu7C6dOnAbsL8GdpfwoM+9ta7G9rsb89CHjPi1NuE3ACereotGkcq/45LhanT5+GiC0D7Obm4KnbJD/Xaitp4HvcWuxv6wgBnDE/jLhclOhmZsthchEvcpOEvwO4KeT27duHOnXqYN26dWjbtq17+/PPP49//etf+Pnnn73Oyc3NxcSJE61sJhERERERqRQUFKBu3bq6+4uLi1G/fn3dhJUsPT0d+fn5iI+PD3YTKyVmbCNQzZo1YbPZvN7sBw8e9MriysaOHYvRo0e7n7tcLhw5cgQ1atSAFGET5SujoqIiZGVleS3JTqHB/rYW+9ta7G9rsb+txz63FvvbOkIInDhxApmZmYbHxcfHIz8/H6WlpYbH2e12BrV+YGAbgex2O6688kqsWLEC/fr1c29fsWIF+vbtq3mOw+HwWv67atWqoWwmaZCXYidrsL+txf62FvvbWuxv67HPrcX+tkZqaqqp4+Lj4xm0BhkD2wg1evRo3HHHHWjVqhXatm2Ld955B3v37sX9998f7qYRERERERFFFAa2EerWW2/F4cOH8dxzz2H//v1o1qwZlixZguzs7HA3jYiIiIiIKKIwsI1gI0aMwIgRI8LdDDLB4XBgwoQJXsPBKTTY39Zif1uL/W0t9rf12OfWYn/ThYKrIhMREREREVFUiwl3A4iIiIiIiIgqgoEtERERERERRTUGtkRERERERBTVGNgSAZgxYwbq16+P+Ph4XHnllVi7dq17X25uLi699FIkJSWhWrVq6Nq1KzZs2OCzzO3btyMnJwcJCQmoU6cOnnvuOaintK9ZswZXXnkl4uPj0aBBA7z99ttBv7ZIZNTfAPDTTz/hhhtuQGpqKpKTk9GmTRvs3bvXsEz2tz6j/j5w4ACGDh2KzMxMJCYmokePHti1a5fPMtnf2r755hv06dMHmZmZkCQJCxcudO8rKyvDk08+icsvvxxJSUnIzMzEnXfeiX379vksl/2tzai/AWDo0KGQJMnj0aZNG5/lsr+1+ervkydP4sEHH0TdunWRkJCAyy67DDNnzvRZLvtb29SpU3HVVVchOTkZtWvXxo033ohffvnF45gFCxbguuuuQ82aNSFJErZt22aqbPY5VUqC6AI3f/58ERcXJ959912xc+dO8cgjj4ikpCSxZ88eIYQQH374oVixYoX47bffxI8//ijuvvtukZKSIg4ePKhb5vHjx0VaWpoYOHCg2L59u/jkk09EcnKymD59uvuY33//XSQmJopHHnlE7Ny5U7z77rsiLi5O/Oc//wn5NYeTr/7+9ddfRfXq1cUTTzwhtmzZIn777TexaNEiceDAAd0y2d/6jPrb5XKJNm3aiI4dO4qNGzeKn3/+Wdx3332iXr164uTJk7plsr/1LVmyRDz99NPik08+EQDEp59+6t537Ngx0bVrV/HRRx+Jn3/+Waxfv160bt1aXHnllYZlsr/1GfW3EEIMGTJE9OjRQ+zfv9/9OHz4sGGZ7G99vvr7nnvuEQ0bNhSrVq0S+fn54h//+Iew2Wxi4cKFumWyv/Vdd911YtasWeLHH38U27ZtE7179/b6/3nu3Lli4sSJ4t133xUAxNatW32Wyz6nyoqBLV3wrr76anH//fd7bLv00kvFU089pXn88ePHBQDx1Vdf6ZY5Y8YMkZqaKoqLi93bpk6dKjIzM4XL5RJCCDFmzBhx6aWXepw3fPhw0aZNm0AvJSr46u9bb71V3H777X6Vyf7WZ9Tfv/zyiwAgfvzxR/e+8vJyUb16dfHuu+/qlsn+Nkfrg7/axo0bBQD3Fzta2N/m6AW2ffv29asc9rc5Wv3dtGlT8dxzz3lsu+KKK8QzzzyjWw7727yDBw8KAGLNmjVe+/Lz800Htuxzqqw4FJkuaKWlpdi8eTO6d+/usb179+5Yt26d5vHvvPMOUlNT0aJFC/f2oUOHolOnTu7n69evR05Ojsc946677jrs27cPu3fvdh+jrve6667Dpk2bUFZWFoSrizy++tvlcmHx4sW45JJLcN1116F27dpo3bq15vBC9rdvvvq7pKQEABAfH+/eZ7PZYLfb8e2337q3sb9D5/jx45AkCVWrVnVvY38H1+rVq1G7dm1ccskluPfee3Hw4EGP/ezv4OnQoQM+//xz/PnnnxBCYNWqVfjf//6H6667zn0M+ztwx48fBwBUr17dr/PY53ShYGBLF7RDhw7B6XQiLS3NY3taWhoKCwvdzxctWoQqVaogPj4er776KlasWIGaNWu692dkZKBevXru54WFhZplyvuMjikvL8ehQ4eCc4ERxld/Hzx4ECdPnsQLL7yAHj16YPny5ejXrx/69++PNWvWuI9nf5vjq78vvfRSZGdnY+zYsTh69ChKS0vxwgsvoLCwEPv373cfz/4OjeLiYjz11FMYPHgwUlJS3NvZ38HTs2dPfPjhh1i5ciVefvll5OXloUuXLu4vdQD2dzC98cYbaNKkCerWrQu73Y4ePXpgxowZ6NChg/sY9ndghBAYPXo0OnTogGbNmvl1LvucLhSx4W4AUSSQJMnjuRDCY1vnzp2xbds2HDp0CO+++y5uueUWbNiwAbVr1wZwdoEHM2Wqt5s5pjLS62+XywUA6Nu3Lx599FEAwN/+9jesW7cOb7/9NnJycgCwv/2l199xcXH45JNPcPfdd6N69eqw2Wzo2rUrevbs6XE8+zv4ysrKMHDgQLhcLsyYMcNjH/s7eG699Vb3z82aNUOrVq2QnZ2NxYsXo3///gDY38H0xhtv4LvvvsPnn3+O7OxsfPPNNxgxYgQyMjLQtWtXAOzvQD344IP44YcfPEbTmMU+pwsFA1u6oNWsWRM2m80jOwsABw8e9PimMikpCRdffDEuvvhitGnTBo0aNcJ7772HsWPHapabnp6uWSZw/ltRvWNiY2NRo0aNCl9bJPLV3zVr1kRsbCyaNGnisf+yyy4z/GPO/tZm5v195ZVXYtu2bTh+/DhKS0tRq1YttG7dGq1atdItl/1dMWVlZbjllluQn5+PlStXemRrtbC/gycjIwPZ2dmGK3+zvwNz5swZjBs3Dp9++il69+4NAGjevDm2bduG6dOnuwNbNfa3bw899BA+//xzfPPNN6hbt26Fy2OfU2XFoch0QbPb7bjyyiuxYsUKj+0rVqxAu3btdM8TQngMZVNr27YtvvnmG5SWlrq3LV++HJmZmbjooovcx6jrXb58OVq1aoW4uLgAriby+epvu92Oq666yut2Bv/73/+QnZ2tWy77W5s/7+/U1FTUqlULu3btwqZNm9C3b1/dctnfgZOD2l27duGrr74y9QGR/R08hw8fRkFBATIyMnSPYX8HpqysDGVlZYiJ8fxoabPZ3KNxtLC/9Qkh8OCDD2LBggVYuXIl6tevH5Ry2edUaVm6VBVRBJJvh/Lee++JnTt3ilGjRomkpCSxe/ducfLkSTF27Fixfv16sXv3brF582Zx9913C4fD4bGS7FNPPSXuuOMO9/Njx46JtLQ0MWjQILF9+3axYMECkZKSormU/qOPPip27twp3nvvvQtiKX2j/hZCiAULFoi4uDjxzjvviF27dom///3vwmazibVr17rLYH+b56u/P/74Y7Fq1Srx22+/iYULF4rs7GzRv39/jzLY3+adOHFCbN26VWzdulUAEK+88orYunWr2LNnjygrKxM33HCDqFu3rti2bZvHLWhKSkrcZbC/zTPq7xMnTojHHntMrFu3TuTn54tVq1aJtm3bijp16oiioiJ3Gexv84z6WwghcnJyRNOmTcWqVavE77//LmbNmiXi4+PFjBkz3GWwv8174IEHRGpqqli9erXH/xenT592H3P48GGxdetWsXjxYgFAzJ8/X2zdulXs37/ffQz7nC4UDGyJhBBvvfWWyM7OFna7XVxxxRXupfTPnDkj+vXrJzIzM4XdbhcZGRnihhtuEBs3bvQ4f8iQISInJ8dj2w8//CA6duwoHA6HSE9PF7m5ue5l9GWrV68WLVu2FHa7XVx00UVi5syZIb3OSKHX37L33ntPXHzxxSI+Pl60aNHC6x6I7G//GPX366+/LurWrSvi4uJEvXr1xDPPPOMRZAnB/vbHqlWrBACvx5AhQ9y349B6rFq1yl0G+9s8o/4+ffq06N69u6hVq5b7/T1kyBCxd+9ejzLY3+YZ9bcQQuzfv18MHTpUZGZmivj4eNG4cWPx8ssve/Qd+9s8vf8vZs2a5T5m1qxZmsdMmDDBfQz7nC4UkhDnZoITERERERERRSHOsSUiIiIiIqKoxsCWiIiIiIiIohoDWyIiIiIiIopqDGyJiIiIiIgoqjGwJSIiIiIioqjGwJaIiIiIiIiiGgNbIiIiIiIiimoMbImIiIiIiCiqMbAlIiIiIiKiqMbAloiIiIiIiKIaA1siIiIiIiKKagxsiYiIiIiIKKoxsCUiIiIiIqKoxsCWiIgs9cMPP2DYsGGoX78+4uPjUaVKFVxxxRV48cUXceTIEfdxnTp1QqdOncLSxtzcXEiS5PO4Tp06oVmzZha0KHiGDh2KKlWqBLXMirxWZttz+vRp5ObmYvXq1QHVQ0RElVtsuBtAREQXjnfffRcjRoxA48aN8cQTT6BJkyYoKyvDpk2b8Pbbb2P9+vX49NNPw91M8tOMGTNCXsfp06cxceJEAAjbFx5ERBS5GNgSEZEl1q9fjwceeADdunXDwoUL4XA43Pu6deuGxx57DEuXLg1jCylQTZo0CXcTiIjoAsehyEREZIkpU6ZAkiS88847HkGtzG6344YbbjAs48iRIxgxYgTq1KkDu92OBg0a4Omnn0ZJSYn7mN27d0OSJMyePdvrfEmSkJub67Ft8eLF+Nvf/gaHw4H69etj+vTpfl/b2rVr0aZNGyQkJKBOnTp49tln4XQ6PY6ZOHEiWrdujerVqyMlJQVXXHEF3nvvPQghPI5buXIlOnXqhBo1aiAhIQH16tXDgAEDcPr0afcxpaWlmDx5Mi699FI4HA7UqlULw4YNw19//WW6zb/++it69eqFKlWqICsrC4899phHP/pTj9ZQ5D/++AM33XQTkpOTUbVqVdx2223Iy8vTfW2M2rN7927UqlXL3Y+SJEGSJAwdOtT09RIRUeXGjC0REYWc0+nEypUrceWVVyIrKyugMoqLi9G5c2f89ttvmDhxIpo3b461a9di6tSp2LZtGxYvXux3mV9//TX69u2Ltm3bYv78+XA6nXjxxRdx4MAB02UUFhZi4MCBeOqpp/Dcc89h8eLFmDx5Mo4ePYo333zTfdzu3bsxfPhw1KtXDwDw3Xff4aGHHsKff/6J8ePHu4/p3bs3OnbsiPfffx9Vq1bFn3/+iaVLl6K0tBSJiYlwuVzo27cv1q5dizFjxqBdu3bYs2cPJkyYgE6dOmHTpk1ISEgwbHNZWRluuOEG3H333XjsscfwzTffYNKkSUhNTXW3pSL1nDp1Cp07d8aRI0cwbdo0XHzxxVi6dCluvfXWgNqTkZGBpUuXokePHrj77rtxzz33AIA72CUiIoIgIiIKscLCQgFADBw40PQ5OTk5Iicnx/387bffFgDExx9/7HHctGnTBACxfPlyIYQQ+fn5AoCYNWuWV5kAxIQJE9zPW7duLTIzM8WZM2fc24qKikT16tWFmT+ROTk5AoD47LPPPLbfe++9IiYmRuzZs0fzPKfTKcrKysRzzz0natSoIVwulxBCiP/85z8CgNi2bZtunf/+978FAPHJJ594bM/LyxMAxIwZMwzbPGTIEM1+7NWrl2jcuHFA9ahfq7feeksAEF9++aXHucOHD/d6bcy256+//vJ6/YiIiGQcikxERFFh5cqVSEpKwk033eSxXR6O+vXXX/tV3qlTp5CXl4f+/fsjPj7evT05ORl9+vQxXU5ycrLXEOrBgwfD5XLhm2++8Wh/165dkZqaCpvNhri4OIwfPx6HDx/GwYMHAQB/+9vfYLfbcd9992HOnDn4/fffvepbtGgRqlatij59+qC8vNz9+Nvf/ob09HRTqwZLkuR1jc2bN8eePXuCUs+aNWuQnJyMHj16eGwfNGhQwO0hIiIywsCWiIhCrmbNmkhMTER+fn7AZRw+fBjp6elet+GpXbs2YmNjcfjwYb/KO3r0KFwuF9LT0732aW3Tk5aWpnu+3KaNGzeie/fuAM6uDP3f//4XeXl5ePrppwEAZ86cAQA0bNgQX331FWrXro2RI0eiYcOGaNiwIV5//XV32QcOHMCxY8dgt9sRFxfn8SgsLMShQ4d8tjkxMdEjmAcAh8OB4uLioNRz+PBhzX7R2ma2PUREREY4x5aIiELOZrPh2muvxZdffok//vgDdevW9buMGjVqYMOGDRBCeAS3Bw8eRHl5OWrWrAkA7gBJvRCSOvCtVq0aJElCYWGhV11a2/RozceVz69RowYAYP78+YiLi8OiRYs8AriFCxd6nduxY0d07NgRTqcTmzZtwt///neMGjUKaWlpGDhwIGrWrIkaNWroriCdnJxsuu1GKlJPjRo1sHHjRq/t/vQrERGRP5ixJSIiS4wdOxZCCNx7770oLS312l9WVoYvvvhC9/xrr70WJ0+e9AoG586d694PnM0KxsfH44cffvA47rPPPvN4npSUhKuvvhoLFizwyAyeOHHCsB1qJ06cwOeff+6xbd68eYiJicE111wD4OxQ29jYWNhsNvcxZ86cwb/+9S/dcm02G1q3bo233noLALBlyxYAwPXXX4/Dhw/D6XSiVatWXo/GjRubbruRitSTk5ODEydO4Msvv/TYPn/+/IDbI6+kLWe3iYiIlJixJSIiS7Rt2xYzZ87EiBEjcOWVV+KBBx5A06ZNUVZWhq1bt+Kdd95Bs2bNdOe33nnnnXjrrbcwZMgQ7N69G5dffjm+/fZbTJkyBb169ULXrl0BnA0ib7/9drz//vto2LAhWrRogY0bN2LevHleZU6aNAk9evRw30fX6XRi2rRpSEpKwpEjR0xdV40aNfDAAw9g7969uOSSS7BkyRK8++67eOCBB9wrIPfu3RuvvPIKBg8ejPvuuw+HDx/G9OnTvW579Pbbb2PlypXo3bs36tWrh+LiYrz//vsA4L6+gQMH4sMPP0SvXr3wyCOP4Oqrr0ZcXBz++OMPrFq1Cn379kW/fv3MvSgGKlLPkCFD8Oqrr+L222/H5MmTcfHFF+PLL7/EsmXLAAAxMf5/r56cnIzs7Gx89tlnuPbaa1G9enXUrFkTF110UUUuk4iIKotwr15FREQXlm3btokhQ4aIevXqCbvdLpKSkkTLli3F+PHjxcGDB93HqVfaFUKIw4cPi/vvv19kZGSI2NhYkZ2dLcaOHSuKi4s9jjt+/Li45557RFpamkhKShJ9+vQRu3fv1lxV9/PPPxfNmzcXdrtd1KtXT7zwwgtiwoQJpldFbtq0qVi9erVo1aqVcDgcIiMjQ4wbN06UlZV5HPv++++Lxo0bC4fDIRo0aCCmTp0q3nvvPQFA5OfnCyGEWL9+vejXr5/Izs4WDodD1KhRQ+Tk5IjPP//co6yysjIxffp00aJFCxEfHy+qVKkiLr30UjF8+HCxa9cuwzYPGTJEJCUleW3Xumaz9Wi9Vnv37hX9+/cXVapUEcnJyWLAgAFiyZIlXqtI+9Oer776SrRs2VI4HA4BQAwZMsTwWomI6MIhCaG6MzwRERFRCEyZMgXPPPMM9u7dG9A8ayIiIj0cikxERERB9+abbwIALr30UpSVlWHlypV44403cPvttzOoJSKioGNgS0REREGXmJiIV199Fbt370ZJSQnq1auHJ598Es8880y4m0ZERJUQhyITERERERFRVOPtfoiIiIiIiCiqMbAlIiIiIiKKYqtXr8a1116LnJwcfPbZZ/joo4/Qtm1bdOnSBQUFBeFuniU4FJmIiIiIiChKFRcX4+abb8Ynn3wCu92OsrIydOjQAWvXrkVeXh7mzJmDd955J9zNDDkuHlVJuVwu7Nu3D8nJyZAkKdzNISIiIiKq1IQQOHHiBDIzMxETYzwwtri4GKWlpT7LU3+OdzgccDgcHtvWrVuHhIQE9OnTB4mJiXjiiSfQtGlT2O12tG/fHo8//nhgFxRlGNhWUvv27UNWVla4m0FEREREdEEpKCgwvK1ZcXEx6mdXQeFBp2E5VapUwcmTJz22TZgwAbm5uR7bDhw4gPz8fPz3v//F119/jdzcXDRp0sS93+k0rqeyYGBbSSUnJwMAOqAXYhEX5tZUfrEJcbjrvf54/+7/Z+/O46Mq7/7/v885M5kkQFJFKyhxqagIirK4UISwK3Vfqmjrir37rdpbau1itRXk9ka7oPddK4oilt9dKta91VsFZHMPm4LUfSF3FXElLElm5pzr98dkJplkJsnAyZlJeD0fj/OYmetc5zqfc83kTD5zneURxWtj+Q6ny6O/g0V/B4v+Dpaf/R3q3UvxTzb5FFnXxWc8WPR3cOKK6Xk9lfo/PJtoNKpNm119sOoAlfXIPLJbs9XTQUM+UnV1tcrKylLlzUdrJekb3/iGTjjhBBUVFWnMmDG66KKL0hJrx3F2cos6FxLbLip52EJIYYUsEtuOFrbCKi0tVdgKSxz53eHo72DR38Giv4PlZ3+H7CKJ79w28RkPFv0doIYrF7X3NMBu3RNTJm5DW2VlZWmJbSbHHnusbr/9dknSmjVrNGHCBG3YsEHRaFRVVVUaOHBgu+Lp7EhsAQAAACBgnow8Zb6Ob7byTHr27KnTTjtNI0eOlG3buu+++/Tqq6+qsrJSxcXFmjdvnl8hFzQSWwAAAAAImCdPXivzcnHllVfqyiuvTL3+1re+pUmTJu1CdJ0PiS0AAAAABMw1Rm6WO69mK0d2JLYAAAAAEDC/DkVGQus3WAIAAAAAoMAxYgsAAAAAAfNk5DJi6xsSWwAAAAAIGIci+4vEFgAAAAACxsWj/EViCwAAAAAB8xqmbPOQGxJbAAAAAAiY28o5ttnKkR2JLQAAAAAEzDWJKds85IbEFgAAAAACxqHI/iKxBQAAAICAebLkyso6D7khsQUAAACAgHkmMWWbh9yQ2AIAAABAwNxWRmyzlSM7ElsAAAAACBiJrb9IbAEAAAAgYJ6x5Jks59hmKUd2JLYAAAAAEDBGbP1FYgsAAAAAAXNly5WdZR5yRWILAAAAAAEzrRyKbDgUOWeZfyJAQZkxY4Ysy9KUKVPyHQoAAAAAHyQPRc42ITcktgWuqqpKs2fP1sCBA/MdCgAAAAAUJBLbArZt2zZ973vf0z333KM99tgj3+EAAAAA8Ilr7FYn5IZzbAvYlVdeqZNPPlnjxo3Tf/zHf7Rat76+XvX19anXNTU1kqRQSVhhK9yhcUIKl4TSHtGx6O9g0d/Bor+D5Wd/h4odWSV857aFz3iw6O8AGUm17a/uyZKXZZzRk/Enpt2IZYyh1wrQAw88oJtvvllVVVUqLi7WqFGjdPTRR+v222/PWH/q1KmaNm1ai/L58+ertLS0g6MFAAAAdm87duzQBRdcoC1btqisrCxrvZqaGpWXl+uJ1w9Wtx5Oxjrbt7o6beB7bbaFRvx0U4Cqq6t19dVX69lnn1VxcXG7lrnuuut0zTXXpF7X1NSooqJC901+hBHbAIRLQrpsztm6b/LDitXG8x1Ol0d/B4v+Dhb9HSw/+zvUex/FP/nUp8i6Lj7jwaK/gxMzsZzqt3bIscvYY85IbAvQqlWrtHnzZg0ZMiRV5rquli9frjvuuEP19fVynPRfdyKRiCKRSIu24rUxcVG14MRq44rV5rZTw86jv4NFfweL/g6WH/1t6tzE9y7ahc94sOjvjhfPMbFNHIqc+R/1bOXIjsS2AI0dO1br1q1LK7v00kvVr18//eIXv2iR1AIAAADoXDzZcjnH1jcktgWoR48eOuKII9LKunXrpp49e7YoBwAAAND5cCiyv0hsAQAAACBgnmyuiuwjEttOYunSpfkOAQAAAIBPXGPJNZnPpc1WjuxIbAEAAAAgYG4r59i6jNjmjMQWAAAAAALmGVtelnNsPc6xzRmJLQAAAAAEjBFbf5HYAgAAAEDAPGU/l9YLNpQugcQWAAAAAALW+lWRM5cjOxJbAAAAAAhY6/exJbHNFYktAAAAAATMkyVP2Q5F5nY/ueKnAAAAAABAp0ZiCwAAAAABSx6KnG1qrw8//FB77723Ro0apVGjRumzzz7TggULNGzYMI0ZM0bV1dUduBWFg0ORAQAAACBgrd/uJ7fxx8rKSj300EOSpFgsppkzZ2rFihWqqqrS9OnTNXv27F2Ot9AxYgsAAAAAAfOM1eokSTU1NWlTfX19xrZeeOEFjRgxQr/61a/09ttva8CAASoqKtLw4cO1bt26IDcrb0hsAQAAACBgXsOIbaYpebufiooKlZeXp6YZM2a0aKd379569913tXz5cm3evFmPP/64ysrKUvNd1w1sm/KJQ5EBAAAAIGCeseVlOZc2WV5dXZ2WpEYikRZ1I5FIqvzss8/Wfffdpx49eqTmO47jZ9gFixFbAAAAAAiYK6vVSZLKysrSpkyJ7datW1PPly9frlNOOUUbNmxQNBrVCy+8oIEDBwa2TfnEiC0AAAAABKw9I7bt8fzzz+uGG25QaWmpDjroIE2fPl2RSESVlZUqLi7WvHnz/Aq5oJHYAgAAAEDAXCk1MptpXntNnDhREydOTCubNGmSJk2atPPBdUIktgAAAAAQML9GbJFAYgsAAAAAAXONLTdLAputHNmR2AIAAABAwIwseVkORTZZypEdiS0AAAAABIwRW3+R2AIAAABAwDxjyTOZR2azlSM7fgoAAADwQfzjTxTat3e+wwDQSbiyW52QG3oMANDlxU48JrD1uKOHtGu9zefFxw+VJLmjB6U9Jssz1UVhcfbcQ/GPP8l3GAA6ieSIbbYJueFQZAAAAAAImCdbXpZxxmzlyI4eAwAAAAB0aozYAgAAAEDAXGPJzXLIcbZyZEdiCwAAAAAB46rI/iKxBQAAAICAGWPLy3K/WsN9bHNGYgsAAAAAAXNlyVWWQ5GzlCM7ElsAAAAACJhnsh9y7JmAg+kCSGwBAAAAIGBeK4ciZytHdiS2AAAAABAwT5a8LIccZytHdiS2AAAAABAwbvfjLxJbAAAAAAgYhyL7i8QWAAAAAALmqZX72HIocs5IbAEAAAAgYKaVc2wNiW3OSGwBAAAAIGCeaWXElnNsc0ZiCwAAAAAB4xxbf5HYAgAAAEDAGLH1F4ktAAAAAASM+9j6i8QWAAAAAALGiK2/SGwBAAAAIGAktv7irGQAAAAAQKdGYluAZs2apYEDB6qsrExlZWUaNmyY/vd//zffYQEAAADwSXLENtuE3JDYFqA+ffrolltu0cqVK7Vy5UqNGTNGp59+ut544418hwYAAADAByS2/uIc2wJ06qmnpr2++eabNWvWLL388ssaMGBAnqICAAAA4Bej7Fc/NsGG0iWQ2BY413X1t7/9Tdu3b9ewYcPyHQ4AAAAAH3DxKH+R2BaodevWadiwYaqrq1P37t316KOPqn///lnr19fXq76+PvW6pqZGkhQqCStshTs83t1duCSU9oiORX8Hqyv0txW2FSrp+H2hFbZlS7Ib1tXaepvPs8O2nJKw7HDiLKFw2JbX8Npp1kamMuwcPz/fTnEo9d4ju66wT+lM6O8AGUm17a9OYusvyxjDSHcBikaj2rhxo77++ms9/PDDuvfee7Vs2bKsye3UqVM1bdq0FuXz589XaWlpR4cLAAAA7NZ27NihCy64QFu2bFFZWVnWejU1NSovL9fIv1+hULdIxjrx7fVafuqdbbaFRiS2ncS4ceN08MEH6+677844P9OIbUVFhcaVnMOIbQDCJSFdNuds3Tf5YcVq4/kOp8ujv4PVFfo7PnawQotXB7IeO+bJXr62zfU2n+eOHiRnyRrZE4boB5f01T33vyvv2VWp8qYylWHn+Pn5dvb4htyvvvYnsC6sK+xTOhP6OzgxE9Oi2ofandie8MSVrSa2z5/2JxLbHHBMQidhjElLXJuLRCKKRFr+YcRrY8pyTjo6QKw2rlhtLN9h7Dbo72B15v6OxTyZAGKPxTzZUU9Ow7paW2/zefGYp1BtTHbMS833amOp8qYylWHX+PH59kricnlf2q0z71M6I/q748VNbv1rjCWT5ZDjbOXIjsS2AP3qV7/SxIkTVVFRoa1bt+qBBx7Q0qVL9fTTT+c7NAAAAAA+8GRlvSpytnJkR2JbgD799FNdeOGF+uSTT1ReXq6BAwfq6aef1vjx4/MdGgAAAAAfcPEof9n5DgAtzZkzRx9++KHq6+u1efNmLVq0iKQWAAAA6EKShyJnm3L117/+VXvvvbckacGCBRo2bJjGjBmj6upqv0MvSCS2AAAAABCw5IhttimntjxPDz30kCoqKhSLxTRz5kwtW7ZM06dP1/Tp0ztoCwoLiS0AAAAABMzPEdv58+frnHPOkW3beueddzRgwAAVFRVp+PDhWrduXQdtQWEhsQUAAACAgJlWRmuTiW1NTU3alOkuKa7r6sEHH9R5550nSfr666/TbhHkum4wG5RnJLYAAAAAEDAjyZgsU0OdiooKlZeXp6YZM2a0aOd//ud/dO6558q2E6ndHnvsoZqamtR8x3EC2Jr846rIAAAAABAwT5asNm73U11dnTb6GolEWtTdsGGD1qxZo//5n//RO++8o9mzZ2vDhg2KRqOqqqrSwIEDO2YDCgyJLQAAAAAUoLKysrTENpNbb7019Xzo0KG67bbb9MADD6iyslLFxcWaN29eR4dZEEhsAQAAACBgrV0kamdu9yNJK1eulCRNmjRJkyZN2unYOiMSWwAAAAAImGcsWVkS2Fxv9wMSWwAAAAAIXPJCUdnmITcktgAAAAAQsI44FHl3RmILAAAAAAEjsfUXiS0AAAAABIxzbP1FYgsAAAAAAeMcW3+R2AIAAABAwBKJbbZDkQMOpgsgsQUAAACAgHGOrb9IbAEAAAAgYKZhyjYPuSGxBQAAAICAMWLrLxJbAAAAAAgaQ7a+IrEFAAAAgKC1MmIrRmxzRmILAAAAAAHjdj/+IrEFAAAAgIBxjq2/SGwBAAAAIGjGyn7IMYltzux8BwAAQEcLP1Ol2InHBLIer8iWO3pIm+ttPi+0cKXi44fKWbImVRYfPzRV3lSmMuSf++VXcvbcI99hAMBuiRFbAEBBSyZ3O7tc7MRjZHm5n6zkjk0kp87iVanXyedp9RqS2CSn3pW9dHVacpuJN2pwKrm1PCM75kmSzIijJEl23EvVa57IJrdnZ/sGHcMuLZX75Vf5DgPtEJswVOFnd4+/Hadbt5yXab6/80YNlr10tZ9hQZxj6zcSWwAAAAAIGrf78RWHIncw13W1du1affUVv+ACAAAASEhePCrbhNyQ2PpsypQpmjNnjqREUltZWanBgweroqJCS5cuzW9wAAAAAAqHyTIhZyS2PnvooYd01FGJ86P+/ve/64MPPtCbb76pKVOm6Prrr89zdAAAAAAKASO2/iKx9dnnn3+uXr16SZKeeuopffe739Whhx6qyZMna926dXmODgAAAEBByDZay6jtTiGx9dk+++yjDRs2yHVdPf300xo3bpwkaceOHXIcJ8/RAQAAACgMVhsTcsFVkX126aWX6txzz1Xv3r1lWZbGjx8vSXrllVfUr1+/PEcHAAAAoCBwVWRfkdj6bOrUqTryyCO1ceNGffe731UkEpEkOY6jX/7yl3mODgAAAEBBILH1FYmtj2KxmCZMmKC7775bZ599dtq8iy++OE9RAQAAACg4xkpM2eYhJyS2PgqHw1q/fr0siw8iAAAAgOyMSUzZ5iE3XDzKZxdddFHqPrYAAAAAkBFXRfYVI7Y+i0ajuvfee7Vw4UINHTpU3bp1S5s/c+bMPEUGAAAAoGBwKLKvSGx9tn79eg0ePFiS9Pbbb6fN4xBlAAAAAJJkmcSUbZ4ftm3bpvvvv18PP/ywXn/9dW3fvl19+vTRqFGj9MMf/lDHHHOMPysqACS2PluyZEm+QwAAAABQ6Dr4qsjPPfecpkyZopNOOkk33nij+vXrp5KSEm3atEkvvviirrvuOpWXl+vhhx/e9ZUVABLbDvLuu+/qvffe08iRI1VSUiJjDCO2AAAAABI6+FDkXr166ZVXXlFJSUlaeXl5uQ477DBdeumleuWVV3Z5PYWCi0f57IsvvtDYsWN16KGH6jvf+Y4++eQTSdLll1+un/70p3mODgAAAMDuoH///i2S2uaOO+64gKLpeIzY+uwnP/mJwuGwNm7cqMMPPzxVft555+knP/mJ/vCHP+QxOgAAAAAFoYMPRU667LLLZFq5f9DcuXP9W1kekdj67Nlnn9UzzzyjPn36pJUfcsgh+uijj/IUFQAAAICCElBiO3ToUEnS6tWrtXbtWl122WX+NV5ASGx9tn37dpWWlrYo//zzzxWJRPIQEQAAAICCE1Bie8UVV+jNN9/U7bffrr322kt77rmnJk2a5N8KCgTn2Pps5MiRmjdvXuq1ZVnyPE+/+93vNHr06DxGBgAAAKBgJC8elW3yyfvvv6/TTjtNc+fO1TPPPKPf/e53WrFihW/tFwpGbH32u9/9TqNGjdLKlSsVjUb185//XG+88Ya+/PJLvfDCC+1qY8aMGXrkkUf05ptvqqSkRN/+9rd166236rDDDuvg6AEAAAAEIYj72ErSKaecojvvvFPDhw+XJD322GOaOHGi1q9f799KCgAjtj7r37+/Xn/9dR177LEaP368tm/frrPOOktr1qzRwQcf3K42li1bpiuvvFIvv/yyFi5cqHg8rgkTJmj79u0dHD0AAACAQJg2pnZav369hg8frsrKSp188snatm2bFixYoGHDhmnMmDH66U9/qnHjxqXqV1RUpB1h2lUwYtsBevXqpWnTpu308k8//XTa67lz5+qb3/ymVq1apZEjR+5qeAAAAAC6iMMOOyx1ZOi0adP06KOP6o477tCKFStUVVWlO+64Iy2xlaSePXvmI9QORWLrswMPPFCXXXaZLr30UlVUVPjS5pYtWyRJe+65Z9Y69fX1qq+vT72uqamRJIVKwgpbYV/iQHbhklDaIzoW/R2sfPe3HbbllOS+H0suZ4VtWZ6RsS2FcmjHCScOarIblnHCdup5Wr2i9IOfLGNklYRT5ZmWkSRTZMtqEp9tSV7IVrjhtKpw2JZnJNuSrJKw7HDjepLbI2mn+gaN/Px82yUheYb3oy353qdIkhW2c9ofdGbh4tz7u/n+Lrm/QhuMpNr2V7fUyqHIDY/J/+mTIpFIiwvShsON782OHTu0//77a8CAASoqKtLw4cM1atQoLVq0SJZlqb6+Xlu3blXPnj312WeftT/YTsAyrd3UCDn74x//qPvvv1+vvfaaRo8ercmTJ+vMM8/c6SsiG2N0+umn66uvvmr1JO+pU6dmHCWeP39+xqs0AwAAAPDPjh07dMEFF2jLli0qKyvLWq+mpkbl5eU64JabZRcXZ6zj1dXpo19e36L8xhtv1NSpU1uUL1y4UD//+c8VDod1++2368EHH9Ttt98uSTr22GP16quvpuo+/fTTeuGFFzR9+vTcNrDAkdh2kNdee0333Xef/vrXvyoej+uCCy7QZZddpsGDB+fUzpVXXqknn3xSzz//fIt74zaVacS2oqJC40rOYcQ2AOGSkC6bc7bum/ywYrXxfIfT5dHfwcp3f7ujB8lZsmanl4uPHdw4Yrt4dbuX9yqPliTZy9amXiefp9UbeXTaa8sYWSteS5Xby1suI0lmxFGyVryWis+Oe/JCtiKWNPnyQzXn3rdVbyQ77sla8Zrc0YMa19F0xHYn+gaN/Px826Ul8nbkMFyzm8r3PkWS4mMGKfTc7vG3U9yzXBf/8aSc+rv5/i65v0LrYiamRbUPtT+xndFGYnvd9aqurk5rK9OIbVO//e1v5bqu3nnnHd13332SpGHDhumll15Kqzd48GCtXt3+78TOgOP4OshRRx2l//qv/9Lvf/973XnnnfrFL36hWbNm6YgjjtDVV1+tSy+9VJbV+mW8f/zjH+uJJ57Q8uXLW01qpewf8nhtrPFYBnS4WG1csdpYvsPYbdDfwcpXf8djnkI7sd7kcrGYl0oETQ7tuDFPkuQ0LOPGvNTztHpRL+21ZYzs2liqPNMykuRFPdlN4rNjXurQY0mKxTxFjWTHEvXiscb1NE1sd6Zv0JIfn2/bCsvj/Wi3fO7DYzEvp/1BZ+bUJZLZXPq7+f4uub9C6+Imxz5qx31sy8rKWk2SpcQAVzIPKC8vVzQa1YYNGxSNRlVVVaW99tpLy5YtkyS5rqvVq1errq4ut1g7ARLbDhKLxfToo49q7ty5WrhwoY4//nhNnjxZH3/8sa6//notWrRI8+fPz7isMUY//vGP9eijj2rp0qU66KCDAo4eAAAAQEfy63Y/Cxcu1O9+9zvZtq29995b999/v/bee29VVlaquLhYX331lX72s59Jkurq6lRdXa1Fixb5sAWFhcTWZ6tXr9bcuXP117/+VY7j6MILL9Rtt92mfv36pepMmDCh1asbX3nllZo/f74ef/xx9ejRQ5s2bZKU+AWmpKSkw7cBAAAAQAdrx4hte5xyyik65ZRT0somTZqkSZMmZay/bt063XHHHbr77rvbv5JOgMTWZ8ccc4zGjx+vWbNm6Ywzzki7SllS//79s37QJGnWrFmSpFGjRqWVz507V5dccomf4QIAAADIB58S21wdeeSRevnllztuBXlCYuuz999/XwcccECrdbp166a5c+dmnc/1vAAAAICuza9Dkdvy5z//OfXcdV2tWrWqSx4FSmLrs7aSWgAAAACQsRJTtnk+efLJJ1PPQ6GQvvWtb+mxxx7zrf1CQWLrM9d1ddttt+nBBx/Uxo0bFY1G0+Z/+eWXeYoMAAAAQMEI6FDkBx980L/GCpid7wC6mmnTpmnmzJk699xztWXLFl1zzTU666yzZNt2xpspAwAAAIDf3n77bcVird+C6PXXXw8omo5HYuuzv/zlL7rnnnt07bXXKhQK6fzzz9e9996r3/zmN13yJG0AAAAAuUueY5tt2lXvv/++jj32WN1000169dVXtXXrVsViMVVXV2vBggU644wzdN111+36igoEia3PNm3apCOPPFKS1L17d23ZskVS4jLcTY9vBwAAALAbM21Mu+ikk07S4sWLFYlEdNVVV2nvvfdWcXGxjj32WP3973/XNddc06XyE86x9VmfPn30ySefaP/991ffvn317LPPavDgwaqqqlIkEsl3eAAAAAAKQWsjsz6dY7vnnnvqF7/4hX7xi19ISlwPyHEcfxovMIzY+uzMM8/U4sWLJUlXX321fv3rX+uQQw7RRRddpMsuuyzP0QEAAAAoCB08YptJV01qJUZsfXfLLbeknp9zzjnq06ePXnzxRfXt21ennXZaHiMDAAAAUDACuiry7oLEtoMdf/zxOv744/MdBgAAAIAC0tpFovy4eNTuhsTWB0888US76zJqCwAAAAD+IrH1wRlnnNGuepZlyXXdjg0GAAAAQOHjUGRfkdj6wPO8fIcAAAAAoBPhUGR/kdh2gMWLF2vx4sXavHlzWtJrWZbmzJmTx8gAAAAAFAwSWN+Q2Pps2rRpuummmzR06FD17t1blmXlOyQAAAAAhYZDkX1FYuuzu+66S/fff78uvPDCfIcCAAAAoEBxKLK/SGx9Fo1G9e1vfzvfYQAAAAAoZIzY+srOdwBdzeWXX6758+fnOwwAAAAABSw5YpttQm4YsfVZXV2dZs+erUWLFmngwIEKh8Np82fOnJmnyAAAAAAUDEZsfUVi67PXX39dRx99tCRp/fr1afO4kBQAAAAASSS2PiOx9dmSJUvyHQIAAAAA7FZIbAEAAAAgYFwV2V9cPAoAUNBCC1cqPn7oTi8XfqZKxrZkeUaxE49p9/LO4lWSJHfskNTr5PO0ektWpb02liVv1OBUuTu65TKSZC9dLW/U4FR8XtiWHfPkhRq/mu2YJy9syxs1WKGFKxvX0bA9knaqb9AxvB07ZJeW5jsMtEP42ZWKTdg9/nbc7dtzXqb5/i65v4LPTBsTcsKILdrFHtRf3poN+Q4D2C15lbv+z4S9bHXLNo2RLCttnlc5WPay1al1Np1nhg+UF/Vatr18jbyRg7Kve/ma1PPWEsvwM1Wp5/HxQ1OJ6c4mcFa8cTk76smErLQY7IZt8YoaE8nwM1Vp/7xZrpFxLLljhyTa81omqpZnZJkm/4F4iX71KgfL8oyMbaWWSW6LrETbkhJJcL2bWM62ZNe7iToN8506V26xI40aLDuW3v+Wa2QvTX9vO4v4uKEKLVqZ9lqW0hL4zsYuLZW3Y0e+w9gldkmJvNra1KPU9nZZobBMPBZUiIqdeEza/qJdy0wYmpbMOnWuSt7cJB14gGQ8ybIV//AjhQ48QLWH7i3LtP+zGB83VEUvvJHqL3fskNQPY97IQfKKnMQ+wkv8EOaOHiLZiR/BQotWpuon9xNNfyxzuneXu21bot3RQ7LOyzQ/yS4pTtTv1k2x2q8TcY0anNp3ON27y7huYr+1Y4ec7t0VG3KonMWrGr8TRg5K1U9+f4TXvCt369aMfdI8tlx5IwfJXr5Gdmmp4sf0a/EdltwGmcbvqfj4oTJW4keLTJLfIcn3NbkPSn7fJOsEug/iHFtfkdgCAAAAQMA4FNlfJLYAAAAAEDRGbH1FYgsAAAAAAWPE1l8ktgAAAAAQNEZsfUViCwAAAABBI7H1FYktAAAAAATMapiyzUNuSGwBAAAAIGiM2PqKxBYAAAAAAsbFo/xFYgsAAAAAQWPE1lcktgAAAACQDySwviGxBQAAAICAcSiyv0hsAQAAACBoHIrsKzvfAQAAAADA7iY5Ypttaq9Vq1ZpxIgRqqys1LnnnqtYLKYFCxZo2LBhGjNmjKqrqztuIwoIiS0AAAAAdFL77befnnnmGS1btkx9+/bVY489ppkzZ2rZsmWaPn26pk+fnu8QA0FiCwAAAABBM21MkmpqatKm+vr6Fs306tVLpaWlkqRwOKy3335bAwYMUFFRkYYPH65169YFsjn5RmILAAAAAAFrz6HIFRUVKi8vT00zZszI2t7GjRu1aNEinXDCCSorK0uVu67b0ZtSELh4FAAAAAAErR0Xj6qurk5LUiORSMbqNTU1uvDCCzV37ly5rquamprUPMdxfAq4sJHYAgAAAEDQ2pHYlpWVpSW2mbiuq+9973v6zW9+o0MPPVSxWEwbNmxQNBpVVVWVBg4c6GvYhYrEFgAAAAAC5td9bB988EG9+OKL2rp1q6ZPn64f/ehHmjJliiorK1VcXKx58+b5E3CBI7EFAAAAgKD5dB/b888/X+eff36L8kmTJu1UWJ0VF48qUMuXL9epp56qfffdV5Zl6bHHHst3SAAAAAB8YhnT6oTckNgWqO3bt+uoo47SHXfcke9QAAAAAPitHbf7QftxKHKBmjhxoiZOnJjvMAAAAAB0AL/OsUUCiW0XUV9fn3bD5uQlvkMlYYWt8C63bxfZ8kp2vZ2uKlwSSntEx9rd+tsU7frBNVazv19TZEvGSJaVNs8U2bJKwql1WiXhxv4OZ46jaf221m1laUNK7K+S7LAtpyQsO2zL8oyMbbWydVnWaxsZx0p73rSdZCRek5hCGbYluYxlZ47D8ozUtNhL9Gvz5VN1pUR910iOlb6cbSncUD8ctiXHSK6RE7aVrQuav7edRfI9bvpaltLKguDn/sQuCckznfP9SLJLQvIUTj2mylrZLisUkom3fx272udW2E7bX+SyTHIf5HhG4Ygj2Y5kLMlK7PtCEUdu2JZl2v9ZtMO2wk36ywnbshuWNUW2vLCdOKzUSHZJWE5R4rNuLEtOSThV32nY99hN1usUh2S7De0W2VnnZZqfFC4OpR7dJnEl9x1OcUjGsyTLkmfCcopDspp8FzR9TC4rYxQqDsmOZ+6j5rHlKrk+uyQku8m6m9eRadwH2mFbxlLWz4adfO+b1E9+zzQv2/nAJdXmWN+Hc2yRYBnDAdyFzrIsPfroozrjjDOy1pk6daqmTZvWonz+/PkqLS3twOgAAAAA7NixQxdccIG2bNnS6i16ampqVF5ersHn3yynqDhjHTdap9V/vb7NttBo9xju2A1cd911uuaaa1Kva2pqVFFRofsmP+LPiO3Aw+S9/tYut9NVhUtCumzO2bpv8sOK1ebwszV2yu7W3+aEo3a5Dev511q2mRyxbTLPnHCUrOdfS63Tev61VH/Pmf2WYjGvZdsvvC4zPPs98qwXXk89j48dnLVeaPHq1HN39CA5S9bIHT1o50ds3SYjtm6GEduGbUkbsV28WmZEen+nRmzd4EZsJ//wMM25+y3FvMSIrRdxZMdb9r0kWStey1he6NxRg+QsXZP2WpbkLFmTfaEO4Of+xC4tkbcjl+GawmOXFMurrUs9Sm1vV2LEtv19t6t9Hh87OG1/0a5lxgxS6Lk1io8ZJEly6l2FP/q8YcTWkyxb8Y3VCu1fobq+eyVGbNv5WXRHDVL4lX+m+surPFr2srWSJDN8oLyw0zhiu3ytvJFHN47YLl2Tqu+NPFpSok6S062b3O3bE+2OPDrrvEzzkyJ7dNclfzpZf/7x06r7YksirhFHpfYdTrduMp6bGLHdUSunWzfFB/VNfRdYz78mM3xgal+e/P4Ivfa+3G3bMvZJ89hylVyfXVoid/ChLb7Dktsg0/j95o4elBixfS7z++aObnjvG97X5D4o+X2TrLMr+6CYieW2ACO2viKx7SIikYgikUiL8nhtLP0frp1kRz15tTn+se6GYrVxxeinwOwu/e1FMyc0ubCb9ZMX9VKJbdN5XtSTXRtLrbPpvFjMUzRDLE3rt7XuTIlxkmlSLx7zFKqNKR7zdj6xjRuZkJX2PC2xbYi5aUQmw7akkuO4v4lt08Q7tZxtpZLdWMxT1DWyXCPXtlKJeHPN39vOIvkeN30tS2llQfJjf2Jb4U7/XWkrJK82lnqU2t4uKySZeO7bvbN9Hot5afuLXJZJ7oOcqCfVu4lzEownWUbx2phMvatozJNl2v9ZjMc8qTae6iM35slpeO5FPXmyEomtJzm1MblRT7ITiW2oNpaq7zbse5ym+2QnLjfZbtTLOi/T/CS7JPHjQayusb+T+/pkO8ZtSGxrY/KcuGJNvguaPiaXlTEydenrb6p5bLlKrs+2woo3WXfzOjKN+8B4zJOxlPWzEW9470NN6ie/Z5qX7ax4jokt59j6i8QWAAAAAILGiK2vSGwL1LZt2/Tuu++mXn/wwQdau3at9txzT+2///55jAwAAACAHxiZ9Q+JbYFauXKlRo8enXqdPH/24osv1v3335+nqAAAAAD4wpjElG0eckJiW6BGjRolLlgNAAAAAG0jsQUAAACAgHHxKH+R2AIAAABA0Lh4lK9IbAEAAAAgYJaXmLLNQ25IbAEAAAAgaIzY+orEFgAAAAACxjm2/iKxBQAAAICgcbsfX5HYAgAAAEDAGLH1F4ktAAAAAASNc2x9RWILAAAAAAFjxNZfJLYAAAAAEDTOsfUViS0AAAAABIwRW3+R2AIAAABA0DjH1lcktgAAAAAQMEZs/UViCwAAAABB80xiyjYPOSGxBQAAAICgcSiyr0hsAQAAACBgllo5FDnQSLoGElsAAAAACBq3+/GVne8AAAAAAADYFYzYAgAAAEDAuCqyv0hsAQAAACBoXDzKVyS2AAAAABAwyxhZWc6lzVaO7EhsAQAAACBoXsOUbR5ywsWj0C7emg2yB/XPdxjAbsletnqX2/AqB7ds07IkY9Lm2ctWy6scnFpn8+Uytj1ykOzla1qdnxR+piprvdiJx6SehxauVHz8UIUWrpSxLVk7caN6E7JkxU3a86bteEWJr0A72vjfQ+zEY2QvTe9vy23SRoY4jG2lHzJmW2lXs2y6jLEbbuBgJONYqbZTy3lGchpv8mC5Rsax5NS58sKZv7K9UW2/R4UotGil4uOGpr2WkeLjh7ayVGHzduyQXVqa7zB2iVdbK7ukJPUotb1dJh6TFQoHFaLCz1Sl7S/atcyzKxWbMFThZ1dKktxiR9ED95Y8V7JsyXgKHXiA4h9+pJK3P5Ox2v9ZDC1aqejwAan+chavkjt2iCTJXr5GdtSVsSzJltzRQ+QsWSV5iRG5+LihqfrOklWJ2EYPSbXtbtsmp3v3RLtLVmWdl2l+kldbl6i/fXuqvr10dWrf4W7bJstxJGNkl5bK3bZN4VVvp74LvMrBspevSe3Lk98fsUF95fTokbFPmseWq+T6vB07FKp6M+N3kb10tWQ1fk+FFq6UZaTYhMzvW2hh4r1Pvq/JfVDy+yZZJ8h9UHLENtuE3DBiizTOwMPlvv5POQMOlfvG25Ika+iRsutjctdsaFHfOm6gzCuvt9qmdeyRMq+uk6TEjt5TaueNrqvpP6zNpf0zn4PkTt40JCTmhKPkRXP8SdOYVELnu2RikmTt5F3okrE1LG+5nix313+6NSccnfa6sU2TNs+OedK3j048StJxRybqx43suJGxJcuTTJM8y5xwdNbk09iWzIhBMg3d4dTG0+NoWMxYiSTYMonnTp0rM2KQnDp3Zza3UTzL8ybseMO2GiMzYlDiM9rks2I1Wc5qFo6V7a0xzWZk+dxZ8ZbVEusxMpZk17vyIo7s+sSKk/1lPf9ak4Xa/zv1M//K/iNEcyfuNyitftPXJ+43KNti7RZavDot9tDihh8VsmyPZVsyDZ8zy/bnLo+WY6ceLcfZ5fZMfX32dhoSqNSj35L91uKzl+XzkYylyTKW40ieSW1DKmGNxxNlTWO3bD29caVOqhgsGS/rdhs3/Y/GCrX899MdO0TGshRatFJWKCwTjyk2YaiKnntN0TFHqei51xSvHChn8SrFxw9V0eK1ijX8+JWsn2wnvGKdvGhUdlGRvGg0lfhElq1XvHKwnDpXRR99Lu+TT6Vv7i1v0yepOOyiIklSydufyfvkU0WTSY6RilasV3TkESpavl6SFB1xhIpWrJccO/EoyY4UJ+JIPndseZLsaKIPQivfUnzkIBVVvaXY4ENkihx5Iwcp/PwbckcMkh335EYc2ZFiWeGQTCyu2NBDFV75duLxxTekhnVIkonFZZeUKH7s4Qqvelv2K/+Ukj9CuJ5kPNmRoobYIvIa9jdO9+7yjBI/WLieTDTa2AeRYsUHHSLLS+wPw6velikpkbP6HcUaktvw6ndkXFemIRmODz1MoZffSPT3yEEKvfyGTCwup3t3xQYfkmo79Mo/FT++fyJhbmgr9PIbiY/YsCMkYxR65Z+JuF9u2NaG76rkdoZeWp/63NovJf43jY0eIssziR8qrcb30Xgmta+IDztCdtST88I6xcYNTXzHNUmM4+OHyo56ieR23NC0H99CixqS4obypprXyQnn2PqKxBYAAAAAgsZ9bH1FYgsAAAAAAeN2P/7iHFsAAAAACFpyxDbblIOtW7fquOOOU/fu3bV+feLQ+AULFmjYsGEaM2aMqqurO2ILCgqJLQAAAAAEzPJan3JRUlKif/zjHzrnnHMkSbFYTDNnztSyZcs0ffp0TZ8+vQO2oLCQ2AIAAABA0HwcsQ2FQtp7771Tr9955x0NGDBARUVFGj58uNatW+d39AWHc2wBAAAAIGjtuCpyTU1NWnEkElEkEmmz6a+//lplZWWp1667i3cY6AQYsQUAAACAgLXnPrYVFRUqLy9PTTNmzGhX23vssUdaUuz4cDuzQseILQAAAAAErR23+6murk4beW3PaK0k9e3bVxs2bFA0GlVVVZUGDhy4y+EWOhJbAAAAAAiakZTtIlEN+W5ZWVlaYtua73znO1q7dq3eeust/fCHP9SUKVNUWVmp4uJizZs3z5eQCxmJLQAAAAAErOkhx5nm5eqpp55qUTZp0qSc2+msOMcWAAAAANCpMWILAAAAAEEzauUc20Aj6RJIbAEAAAAgaO24eBTaj8QWAAAAAILmSbJamYeckNgCAAAAQMD8vnjU7o7EFgAAAACCxqHIviKxBQAAAICgkdj6isQWAAAAAIJGYusrElsAAAAACBoXj/KVne8AkN2dd96pgw46SMXFxRoyZIhWrFiR75AAAAAA+CB58ahsE3JDYlugFixYoClTpuj666/XmjVrNGLECE2cOFEbN27Md2gAAAAAdlXyUORsE3JCYlugZs6cqcmTJ+vyyy/X4Ycfrttvv10VFRWaNWtWvkMDAAAAsKs80/qEnJDYFqBoNKpVq1ZpwoQJaeUTJkzQiy++mKeoAAAAAPiGEVtfcfGoAvT555/LdV3ts88+aeX77LOPNm3alHGZ+vp61dfXp17X1NRIkkIlYYWtcLvX7URs2SXh1KMkWUW2bDW+bsoqsmUylGer44RtyShjW51ZuCSU9gjJDmf/3cxydm5nnTzfJNzQdriVdWRljGRZHfOFYVvpv7Ba2a4I0YZkbMnl3V0LK1dWswtWhIvstEdjJ+qY9na/bUlG2S+Qkewyq8lrq0l5kJKfj6bP2/isNO+vtLbSKrbvc9e0v41nEosV2Y2freTHYyf3N7F4pN11wyWhtPpNX+djf2fZlkxDP1j2Tv59NRMuDqU9dijLlozX+NgR7Ust27ay/LEmY2myjOU4kmXLxI2sopBMKNmEJeNa6bFbtmLxSJufBeOmrz/V502Wc8K2jGXJKQnLCoVk4pIVthN1Gh7tcOJ/ETtsK1QSkhW20+on2wmVhOQ5RnZR4tE0/E05JSHZRbZkjMIRR15JSHZx4rEpO+JIUqI8+T1jlBZLYgManju25Kb3udUQhxw7sc7kdjbEECoOySqy5YVt2TKNsTVsQ7gklNiukNLqO5n62rZSdUzT9971JOOlfcatuGS7YTlN22uo1xh8esyh4pCMZ8myHVkN5cmy5H7Sbmgr2d9OSUiybFmhUGqZpttvlYTT3hdJqffGKQmlfcaS85LLJttO6+8iW1ZyH2k1tmk8k9pXJLfJbvjs2JYa+j/xKCVG/KyGz5jT8JiIMfF/a7I8rfub1jGSalu+Rdm1lsCS2ObKMoafAwrNxx9/rP32208vvviihg0bliq/+eab9f/9f/+f3nzzzRbLTJ06VdOmTWtRPn/+fJWWlnZovAAAAMDubseOHbrgggu0ZcsWlZWVZa1XU1Oj8vJyjTvoxwrZmX9sjHv1WvTBH9tsC40YXipAe+21lxzHaTE6u3nz5hajuEnXXXedrrnmmtTrmpoaVVRU6L7Jj+Q2YnvEoXLXvy3n8IPl/vM9SZI1qL/saEzuG++0qG8NHSCz8o1W27SG9JdZtUGS5FUenRixXb623TF1BuGSkC6bc7bum/ywYrXxfIdTENxRg7LOs3byvJGmI7aTLz9Uc+55W7FYjqMenXLENthr/mcasb3sR/1036w3FYt6jNg20xEjtpddcbjum/Wmop6RFfNkIk7LEduX1rXZViaPvvl6u+ue2W9gWv2mr8/sN3Cn1r8rOmrE9tJ7ztTcHzyqWF0H77871YhtTFZRkUws3tCEJeO6LUZsH3ljjc7qf1SrYZlm+7Ci7sW6dPbpad+ZXuXRiRHbpWsaRmDjio8ZpPDydYqNPFLh5evkDj9C9rK1ckcPUmjFOsVHHClnSWP9ZDuhl96QF43JLgrLi8ZkTkjE51S9KfeYfokR241fyPt0s+y995L32edp8dm9eiXa+nSzYt/u37ARUvilDYp9u7/CLyb+p4kN66/wSxsyjtjGjz9coZf/KTm23EGHpMqdNe/IHXSIQmvfU/zog+WFHdkxV87Kt+QOOSyx3iJH4Zc2pLbLPbafQqvfVXxwXzmvthzckG3JHXKYQmvezTxiW1KkS2adovt/9A/F45K7fbucbt0UH9RXzuq3M47YJmORpNBr78l4rizbUfzogxvK3pfx3NR+0h10iJyqfyb6e/hAOVX/TI3YJpeRlNjOY/rJev41meEDG96XfybCPbZ/YsR25VstRmzdY/rJWZXoI+fVDS0+0/HjDk8fsX0l8R41HbF1j028l/bLbyh+wpGy415ixDbmNY7YxjxZz78md9QgOUvXpP6XcZauSbTRUN5U0zoxE2v5/rTGM8r6Zcc5tjkjsS1ARUVFGjJkiBYuXKgzzzwzVb5w4UKdfvrpGZeJRCKKRFr+4hOvjWX/ZzIDr96TWxtLPUqSFfVkN3ndlBX1ZDKUZ6vjxjzJk5w2lumsYrVxxbrotuUq3krCabm7ltgmxWKeotGun9haeU5sk2JRb6cSW2NbsoxksnSHlczXrMbXxmosD1QBJLZJsainqGcS/3RZjZ+tZL9YO/kjWjhU33alZAy18bT6TV/n40e8jkhsk2J18Y7fpk6R2JqGxDYuy7XbTGzDofo2+8246edTWKHEv59NvzPdmCdjWQrVxmSFJBOPJX64rI0r2vAYj3lyamOKxzyZ2rhiMS+tfqqd2ngisXUtedGYvIbviVBtXPGol/g7rHfl1cZl1yUem7LrE/F6yXVLDYeYNsYiqfF5hsQ21hCHHDuxzgbJGExdXLGoJ0+W7KinUG1cbkM917Kk2rissGRi6fVDmfratlJ1jNcysU2+x7G6eCKxrY3Jc5q0lymxbRKzqYvLuK4sxyjWUJ4sS+4n4w1tJfs7VBtPJLZhpZZpuv12bfr7Iin13oRq4y0S22T78SZtp/V31EtLbJNtNk1sk++D0/DZsWOePEl21EvdMtZuiC3e8NlK/i8TavicJsubalonnmtia7zs+4KO2Ed0cSS2Beqaa67RhRdeqKFDh2rYsGGaPXu2Nm7cqP/3//5fvkMDAAAAgIJCYlugzjvvPH3xxRe66aab9Mknn+iII47QU089pQMOOCDfoQEAAADYVa1d/ZjLIOWMxLaAXXHFFbriiivyHQYAAAAAv3GOra9IbAEAAAAgaIzY+orEFgAAAACCZtRKYhtoJF0CiS0AAAAABI0RW1+R2AIAAABA0DxPUpbb+njc7idXJLYAAAAAEDRGbH1FYgsAAAAAQSOx9RWJLQAAAAAEjdv9+IrEFgAAAAACZownYzKfS5utHNmR2AIAAABA0IzJPjLLocg5I7EFAAAAgKCZVg5FJrHNGYktAAAAAATN8yQryyHHHIqcMxJbAAAAAAgaI7a+IrEFAAAAgIAZz5PJMmLLxaNyR2ILAAAAAEFjxNZXdr4DAAAAAABgVzBiCwAAAABB84xkMWLrFxJbAAAAAAiaMZKyXRWZxDZXHIoMAAAAAAEznml1ysW1116rESNG6Hvf+56i0WgHRVzYSGwBAAAAIGjGa31qpzVr1mjTpk1asWKF+vfvr4ceeqgDgy5cJLYAAAAAEDC/RmxfeuklTZgwQZJ00kkn6cUXX+yokAsa59h2UabhuPy4YlmvIp5xObderomlHiXJcutku/HU66aseJ1MhvJsddx4neSpzWU6HSPt2LFDMRNTvKtt206Kx+uyzrPcnTtvxGr4XNu2rR07diger1M8nuN93oyRLKtjzl2xrcSFIJIsa+faScbWsLzlBnsvuxa31Gvo71hDfxs7Uce086dRY1uyjGSydEfyuhnJ+cm62a6n0aGSn4+mz9v4rGS5BWHL5dr7uWva356RHffkOU7qs5XsF2sn9zU1W912142bWFr9pq/zsa+zjJX6frOyfaByZUyT/XfcnzazshtGYeycRmNyal/K0HaWP9ZkLE2WsYwnyZIx8cTfonEbyq2G501jt1Wz1W3zs5BsI8kyTovvTDdeJ2NZkok1rDemeLxOdrPHZLnV8Ni0frIdy8TkmZhsY8kzMXnJ7yMTbVjGyPbqE3UaHtN6y6uXJHnJ+pJkJLvhtW0Sh3mmnpuW72cixsS8tO/DhjashkfPdmTHXclEE/8jSXIdR7aJyjKejImn1ZfJcIipsVJ10u59arzEPif1P0pUrlHi/7ym7SXrNb5DqViSMRvjyjJOalushrLEclbqvUj1t4lJsmQZL+P222nvSyzVZ4l4o03isZrMi6bWkyxP6+/k96/V2KYxJrWvSMaR+mzFPXm2nXqUJDvupT5rSj62iDH989K0TlyN622PuKnPui9ItlVTU5NWHolEFIlE0sq+/vpr7bvvvpKk8vJyffnll+1af1djmfb2PDqV//u//1NFRUW+wwAAAAB2K9XV1erTp0/W+XV1dTrooIO0adOmVtvp3r27tm3bllZ24403aurUqWlls2bNUrdu3XTRRRdp5cqVuv/++3XHHXfsdPydFSO2XdS+++6r6upq9ejRQ9bOjhqh3WpqalRRUaHq6mqVlZXlO5wuj/4OFv0dLPo7WPR38OjzYNHfwTHGaOvWranR02yKi4v1wQcftHmRJ2NMi//jm4/WStLxxx+vP/zhD7rooov0zDPPaPjw4bkH3wWQ2HZRtm23+ksROkZZWRlfGgGiv4NFfweL/g4W/R08+jxY9HcwysvL21WvuLhYxcXFvqxz0KBB6tWrl0aMGKH9999fP/vZz3xpt7MhsQUAAACATuz3v/99vkPIO66KDAAAAADo1EhsAR9EIhHdeOONGc97gP/o72DR38Giv4NFfwePPg8W/Y3dBVdFBgAAAAB0aozYAgAAAAA6NRJbAAAAAECnRmILAAAAAOjUSGwBAAAAAJ0aiS0g6c4779RBBx2k4uJiDRkyRCtWrEjNmzp1qvr166du3bppjz320Lhx4/TKK6+02ea6detUWVmpkpIS7bfffrrpppvU/Fpty5Yt05AhQ1RcXKxvfetbuuuuu3zftkLUWn9L0j//+U+ddtppKi8vV48ePXT88cdr48aNrbZJf2fXWn9/+umnuuSSS7TvvvuqtLRUJ510kt55550226S/M1u+fLlOPfVU7bvvvrIsS4899lhqXiwW0y9+8QsdeeSR6tatm/bdd19ddNFF+vjjj9tsl/7OrLX+lqRLLrlElmWlTccff3yb7dLfmbXV39u2bdNVV12lPn36qKSkRIcffrhmzZrVZrv0d2YzZszQMcccox49euib3/ymzjjjDL311ltpdR555BGdeOKJ2muvvWRZltauXduutulzdEkG2M098MADJhwOm3vuucds2LDBXH311aZbt27mo48+MsYY85e//MUsXLjQvPfee2b9+vVm8uTJpqyszGzevDlrm1u2bDH77LOPmTRpklm3bp15+OGHTY8ePczvf//7VJ3333/flJaWmquvvtps2LDB3HPPPSYcDpuHHnqow7c5n9rq73fffdfsueee5mc/+5lZvXq1ee+998w//vEP8+mnn2Ztk/7OrrX+9jzPHH/88WbEiBHm1VdfNW+++ab5t3/7N7P//vubbdu2ZW2T/s7uqaeeMtdff715+OGHjSTz6KOPpuZ9/fXXZty4cWbBggXmzTffNC+99JI57rjjzJAhQ1ptk/7OrrX+NsaYiy++2Jx00knmk08+SU1ffPFFq23S39m11d+XX365Ofjgg82SJUvMBx98YO6++27jOI557LHHsrZJf2d34oknmrlz55r169ebtWvXmpNPPrnF/nnevHlm2rRp5p577jGSzJo1a9pslz5HV0Vii93esccea/7f//t/aWX9+vUzv/zlLzPW37Jli5FkFi1alLXNO++805SXl5u6urpU2YwZM8y+++5rPM8zxhjz85//3PTr1y9tuR/+8Ifm+OOP39lN6RTa6u/zzjvPfP/738+pTfo7u9b6+6233jKSzPr161Pz4vG42XPPPc0999yTtU36u30y/ePf3KuvvmokpX7YyYT+bp9sie3pp5+eUzv0d/tk6u8BAwaYm266Ka1s8ODB5oYbbsjaDv3dfps3bzaSzLJly1rM++CDD9qd2NLn6Ko4FBm7tWg0qlWrVmnChAlp5RMmTNCLL76Ysf7s2bNVXl6uo446KlV+ySWXaNSoUanXL730kiorK9Nuhn7iiSfq448/1ocffpiq03y9J554olauXKlYLObD1hWetvrb8zw9+eSTOvTQQ3XiiSfqm9/8po477riMhxfS321rq7/r6+slScXFxal5juOoqKhIzz//fKqM/u44W7ZskWVZ+sY3vpEqo7/9tXTpUn3zm9/UoYceqh/84AfavHlz2nz62z8nnHCCnnjiCf3rX/+SMUZLlizR22+/rRNPPDFVh/7eeVu2bJEk7bnnnjktR59jd0Fii93a559/Ltd1tc8++6SV77PPPtq0aVPq9T/+8Q91795dxcXFuu2227Rw4ULttddeqfm9e/fW/vvvn3q9adOmjG0m57VWJx6P6/PPP/dnAwtMW/29efNmbdu2TbfccotOOukkPfvsszrzzDN11llnadmyZan69Hf7tNXf/fr10wEHHKDrrrtOX331laLRqG655RZt2rRJn3zySao+/d0x6urq9Mtf/lIXXHCBysrKUuX0t38mTpyov/zlL3ruuef0hz/8QVVVVRozZkzqRx2J/vbTf//3f6t///7q06ePioqKdNJJJ+nOO+/UCSeckKpDf+8cY4yuueYanXDCCTriiCNyWpY+x+4ilO8AgEJgWVbaa2NMWtno0aO1du1aff7557rnnnt07rnn6pVXXtE3v/lNSYkLPLSnzebl7anTFWXrb8/zJEmnn366fvKTn0iSjj76aL344ou66667VFlZKYn+zlW2/g6Hw3r44Yc1efJk7bnnnnIcR+PGjdPEiRPT6tPf/ovFYpo0aZI8z9Odd96ZNo/+9s95552Xen7EEUdo6NChOuCAA/Tkk0/qrLPOkkR/++m///u/9fLLL+uJJ57QAQccoOXLl+uKK65Q7969NW7cOEn098666qqr9Prrr6cdTdNe9Dl2FyS22K3ttddechwnbXRWkjZv3pz2S2W3bt3Ut29f9e3bV8cff7wOOeQQzZkzR9ddd13Gdnv16pWxTanxV9FsdUKhkHr27LnL21aI2urvvfbaS6FQSP3790+bf/jhh7f6ZU5/Z9aez/eQIUO0du1abdmyRdFoVHvvvbeOO+44DR06NGu79PeuicViOvfcc/XBBx/oueeeSxutzYT+9k/v3r11wAEHtHrlb/p759TW1upXv/qVHn30UZ188smSpIEDB2rt2rX6/e9/n0psm6O/2/bjH/9YTzzxhJYvX64+ffrscnv0OboqDkXGbq2oqEhDhgzRwoUL08oXLlyob3/721mXM8akHcrW3LBhw7R8+XJFo9FU2bPPPqt9991XBx54YKpO8/U+++yzGjp0qMLh8E5sTeFrq7+Liop0zDHHtLidwdtvv60DDjgga7v0d2a5fL7Ly8u1995765133tHKlSt1+umnZ22X/t55yaT2nXfe0aJFi9r1DyL97Z8vvvhC1dXV6t27d9Y69PfOicViisVisu30fy0dx0kdjZMJ/Z2dMUZXXXWVHnnkET333HM66KCDfGmXPkeXFeilqoAClLwdypw5c8yGDRvMlClTTLdu3cyHH35otm3bZq677jrz0ksvmQ8//NCsWrXKTJ482UQikbQryf7yl780F154Yer1119/bfbZZx9z/vnnm3Xr1plHHnnElJWVZbyU/k9+8hOzYcMGM2fOnN3iUvqt9bcxxjzyyCMmHA6b2bNnm3feecf88Y9/NI7jmBUrVqTaoL/br63+fvDBB82SJUvMe++9Zx577DFzwAEHmLPOOiutDfq7/bZu3WrWrFlj1qxZYySZmTNnmjVr1piPPvrIxGIxc9ppp5k+ffqYtWvXpt2Cpr6+PtUG/d1+rfX31q1bzU9/+lPz4osvmg8++MAsWbLEDBs2zOy3336mpqYm1Qb93X6t9bcxxlRWVpoBAwaYJUuWmPfff9/MnTvXFBcXmzvvvDPVBv3dfj/60Y9MeXm5Wbp0adr+YseOHak6X3zxhVmzZo158sknjSTzwAMPmDVr1phPPvkkVYc+x+6CxBYwxvzpT38yBxxwgCkqKjKDBw9OXUq/trbWnHnmmWbfffc1RUVFpnfv3ua0004zr776atryF198samsrEwre/31182IESNMJBIxvXr1MlOnTk1dRj9p6dKlZtCgQaaoqMgceOCBZtasWR26nYUiW38nzZkzx/Tt29cUFxebo446qsU9EOnv3LTW3//1X/9l+vTpY8LhsNl///3NDTfckJZkGUN/52LJkiVGUovp4osvTt2OI9O0ZMmSVBv0d/u11t87duwwEyZMMHvvvXfq833xxRebjRs3prVBf7dfa/1tjDGffPKJueSSS8y+++5riouLzWGHHWb+8Ic/pPUd/d1+2fYXc+fOTdWZO3duxjo33nhjqg59jt2FZUzDmeAAAAAAAHRCnGMLAAAAAOjUSGwBAAAAAJ0aiS0AAAAAoFMjsQUAAAAAdGoktgAAAACATo3EFgAAAADQqZHYAgAAAAA6NRJbAAAAAECnRmILAAAAAOjUSGwBAAAAAJ0aiS0AAAAAoFMjsQUAAAAAdGoktgCAgvH666/r0ksv1UEHHaTi4mJ1795dgwcP1m9/+1t9+eWXqXqjRo3SqFGj8hLj1KlTZVlWm/Xmz5+v22+/veMDyuLDDz+UZVn6/e9/71ubS5culWVZWrp0aYfG89RTT2nq1Km5BwgA2G2R2AIACsI999yjIUOGqKqqSj/72c/09NNP69FHH9V3v/td3XXXXZo8eXK+Q8xJvhPbjjB48GC99NJLGjx4cIeu56mnntK0adM6dB0AgK4llO8AAAB46aWX9KMf/Ujjx4/XY489pkgkkpo3fvx4/fSnP9XTTz+dxwghSWVlZTr++OPzHQYAAC0wYgsAyLv//M//lGVZmj17dlpSm1RUVKTTTjut1Ta+/PJLXXHFFdpvv/1UVFSkb33rW7r++utVX1+fqpM8HPb+++9vsbxlWS0Of33yySd19NFHKxKJ6KCDDmr3Yb2jRo3Sk08+qY8++kiWZaWmXGJNxnTVVVfp7rvv1qGHHqpIJKL+/fvrgQceaFccSTNnztRBBx2k7t27a9iwYXr55Zdb1Fm5cqVOO+007bnnniouLtagQYP04IMPptXJdijyPffckxbf/Pnzdckll+jAAw/MOZ5LLrlEf/rTn1Lbn5w+/PDDnLYZALB7YcQWAJBXruvqueee05AhQ1RRUbFTbdTV1Wn06NF67733NG3aNA0cOFArVqzQjBkztHbtWj355JM5t7l48WKdfvrpGjZsmB544AG5rqvf/va3+vTTT9tc9s4779S//du/6b333tOjjz66S7E+8cQTWrJkiW666SZ169ZNd955p84//3yFQiGdc845bcbypz/9Sf369UsdFv3rX/9a3/nOd/TBBx+ovLxckrRkyRKddNJJOu6443TXXXepvLxcDzzwgM477zzt2LFDl1xySdb2Z8+erR/+8Ic6++yzddttt2nLli2aNm1aiyS9vfH8+te/1vbt2/XQQw/ppZdeSi3Xu3fvNrcVALAbMwAA5NGmTZuMJDNp0qR2L1NZWWkqKytTr++66y4jyTz44INp9W699VYjyTz77LPGGGM++OADI8nMnTu3RZuSzI033ph6fdxxx5l9993X1NbWpspqamrMnnvuadrz9XnyySebAw44oEV5e2NNxlRSUmI2bdqUKovH46Zfv36mb9++ra4/ua1HHnmkicfjqfJXX33VSDJ//etfU2X9+vUzgwYNMrFYLK2NU045xfTu3du4rmuMMWbJkiVGklmyZIkxxhjXdU2vXr3Mcccdl7bcRx99ZMLhcNr25xLPlVde2a4+BgAgiUORAQCd3nPPPadu3bq1GMFMjjQuXrw4p/a2b9+uqqoqnXXWWSouLk6V9+jRQ6eeemqgsY4dO1b77LNP6rXjODrvvPP07rvv6v/+7//aXN/JJ58sx3FSrwcOHChJ+uijjyRJ7777rt58801973vfkyTF4/HU9J3vfEeffPKJ3nrrrYxtv/XWW9q0aZPOPffctPL9999fw4cP36l4AADYGSS2AIC82muvvVRaWqoPPvhgp9v44osv1KtXrxa34fnmN7+pUCikL774Iqf2vvrqK3mep169erWYl6msI2NtLYb2bFfPnj3TXifPYa6trZWk1KHV1157rcLhcNp0xRVXSJI+//zzrNsiKS3xTspU1p54AADYGZxjCwDIK8dxNHbsWP3v//6v/u///k99+vTJuY2ePXvqlVdekTEmLWHcvHmz4vG49tprL0lKjb42P/+zeYK4xx57yLIsbdq0qcW6MpV1RKytrS9Z1jxJ3BnJ9V133XU666yzMtY57LDDMpYn15/pvONd7ScAAHLBiC0AIO+uu+46GWP0gx/8QNFotMX8WCymv//971mXHzt2rLZt26bHHnssrXzevHmp+VJiFLG4uFivv/56Wr3HH3887XW3bt107LHH6pFHHlFdXV2qfOvWra3G0VQkEsk4CtneWJMWL16clji6rqsFCxbo4IMP3qkfAZo77LDDdMghh+i1117T0KFDM049evTIumyvXr1aXD1548aNevHFF3c6JkZxAQC5YsQWAJB3w4YN06xZs3TFFVdoyJAh+tGPfqQBAwYoFotpzZo1mj17to444ois57dedNFF+tOf/qSLL75YH374oY488kg9//zz+s///E995zvf0bhx4yQlbh/z/e9/X/fdd58OPvhgHXXUUXr11Vc1f/78Fm1Onz5dJ510Uuo+uq7r6tZbb1W3bt305ZdftrlNRx55pB555BHNmjVLQ4YMkW3bGjp0aLtjTdprr700ZswY/frXv05dFfnNN9/M+ZY/rbn77rs1ceJEnXjiibrkkku033776csvv9Q///lPrV69Wn/7298yLmfbtqZNm6Yf/vCHOuecc3TZZZfp66+/1rRp09S7d2/Z9s79fn7kkUdKkm699VZNnDhRjuNo4MCBKioq2ultBAB0bSS2AICC8IMf/EDHHnusbrvtNt16663atGmTwuGwDj30UF1wwQW66qqrsi5bXFysJUuW6Prrr9fvfvc7ffbZZ9pvv/107bXX6sYbb0yr+4c//EGS9Nvf/lbbtm3TmDFj9I9//KPFPVfHjx+vxx57TDfccIPOO+889erVS1dccYVqa2s1bdq0Nrfn6quv1htvvKFf/epX2rJli4wxMsbkFKsknXbaaRowYIBuuOEGbdy4UQcffLD+8pe/6LzzzmtHr7bP6NGj9eqrr+rmm2/WlClT9NVXX6lnz57q379/iwtDNfdv//ZvsixLv/3tb3XmmWfqwAMP1C9/+Us9/vjj2rhx407Fc8EFF+iFF17QnXfeqZtuuknGGH3wwQdZ74sLAIBljDH5DgIAALRkWZauvPJK3XHHHfkOJSdff/21Dj30UJ1xxhmaPXt2vsMBAOwGGLEFAAA7bdOmTbr55ps1evRo9ezZUx999JFuu+02bd26VVdffXW+wwMA7CZIbAEAwE6LRCL68MMPdcUVV+jLL79UaWmpjj/+eN11110aMGBAvsMDAOwmOBQZAAAAANCpcbsfAAAAAECnRmILAAAAAJ3Y0qVLNXbsWFVWVurxxx/XggULNGzYMI0ZM0bV1dX5Di8QHIoMAAAAAJ1UXV2dvvvd7+rhhx9WUVGRYrGYTjjhBK1YsUJVVVX685//vFtcoZ6LR3VRnufp448/Vo8ePWRZVr7DAQAAALo0Y4y2bt2qfffdV7bd+oGxdXV1ikajbbbX/P/4SCSiSCSSVvbiiy+qpKREp556qkpLS/Wzn/1MAwYMUFFRkYYPH65rr7125zaokyGx7aI+/vhjVVRU5DsMAAAAYLdSXV2tPn36ZJ1fV1engw7ork2b3Vbb6d69u7Zt25ZWduONN2rq1KlpZZ9++qk++OADvfDCC1q8eLGmTp2q/v37p+a7buvr6SpIbLuoHj16SJJO0HcUUjjP0XR9oZKwLptzlu6b/IjitbF8h9Pl0d/Bor+DRX8Hy8/+Du3bS/GPN/kUWdfFZzxY9Hdw4orpeT2V+j88m2g0qk2bXX2w6gCV9cg8sluz1dNBQz5SdXW1ysrKUuXNR2sl6Rvf+IZOOOEEFRUVacyYMbrooovSEmvHcXZyizoXEtsuKnnYQkhhhSwS244WtsIqLS1V2ApLHPnd4ejvYNHfwaK/g+Vnf4fsiMR3bpv4jAeL/g5Qw5WL2nsaYLfuiSkTt6GtsrKytMQ2k2OPPVa33367JGnNmjWaMGGCNmzYoGg0qqqqKg0cOLBd8XR2JLYAAAAAEDBPRp4yX8c3W3kmPXv21GmnnaaRI0fKtm3dd999evXVV1VZWani4mLNmzfPr5ALGoktAAAAAATMkyevlXm5uPLKK3XllVemXn/rW9/SpEmTdiG6zof72AIAAAAAOjVGbAEAAAAgYK4xck3mQ46zlSM7ElsAAAAACJhf59gigcQWAAAAAALmycglsfUNiS0AAAAABIwRW3+R2AIAAABAwDjH1l8ktgAAAAAQMK9hyjYPuSGxBQAAAICAua2cY5utHNmR2AIAAABAwFyTmLLNQ25IbAEAAAAgYByK7C8SWwAAAAAImCdLrqys85AbElsAAAAACJhnElO2ecgNiS0AAAAABMxtZcQ2WzmyI7EFAAAAgICR2PqLxBYAAAAAAuYZS57Jco5tlnJkR2ILAAAAAAFjxNZfJLYAAAAAEDBXtlzZWeYhV5l7EgAAAACAToLEthOYMWOGLMvSlClT8h0KAAAAAB+YhnNsM02Gc2xzxqHIBa6qqkqzZ8/WwIED8x0KAAAAAJ9wjq2/GLEtYNu2bdP3vvc93XPPPdpjjz3yHQ4AAAAAn7jGbnVCbhixLWBXXnmlTj75ZI0bN07/8R//0Wrd+vp61dfXp17X1NRIkkIlYYWtcIfGCSlcEkp7RMeiv4NFfweL/g6Wn/0dKnZklfCd2xY+48GivwNkJNW2v7onS16WcUZPxp+YdiOWMYZeK0APPPCAbr75ZlVVVam4uFijRo3S0Ucfrdtvvz1j/alTp2ratGktyufPn6/S0tIOjhYAAADYve3YsUMXXHCBtmzZorKysqz1ampqVF5erideP1jdejgZ62zf6uq0ge+12RYa8dNNAaqurtbVV1+tZ599VsXFxe1a5rrrrtM111yTel1TU6OKigrdN/kRRmwDEC4J6bI5Z+u+yQ8rVhvPdzhdHv0dLPo7WPR3sPzs79C+vRT/eJNPkXVdfMaDRX8HJ2ZiOdVv7ZBjl7HHnJHYFqBVq1Zp8+bNGjJkSKrMdV0tX75cd9xxh+rr6+U46b/uRCIRRSKRFm3Fa2Pi3PPgxGrjitXmtlPDzqO/g0V/B4v+DpYf/W3q3MT3LtqFz3iw6O+OF88xsU0cipz5H/Vs5ciOxLYAjR07VuvWrUsru/TSS9WvXz/94he/aJHUAgAAAOhcPNlyOcfWNyS2BahHjx464ogj0sq6deumnj17tigHAAAA0PlwKLK/SGwBAAAAIGCebK6K7CMS205i6dKl+Q4BAAAAgE9cY8k1mc+lzVaO7EhsAQAAACBgbivn2LqM2OaMxBYAAAAAAuYZW16Wc2w9zrHNGYktAAAAAASMEVt/kdgCAAAAQMA8ZT+X1gs2lC6BxBYAAAAAAtb6VZEzlyM7egwAAAAA0KkxYgsAAAAAAXONLTfLxaOylSM7ElsAAAAACJgnS56ynWPLfWxzxU8BAAAAABCw5Ihttqm9PvzwQ+29994aNWqURo0apc8++0wLFizQsGHDNGbMGFVXV3fgVhQORmwBAAAAIGCt3+4nt/HHyspKPfTQQ5KkWCymmTNnasWKFaqqqtL06dM1e/bsXY630DFiCwAAAAAB84zV6pSLF154QSNGjNCvfvUrvf322xowYICKioo0fPhwrVu3roO2oLAwYgsAAAAAAfNaGbFN3u6npqYmrTwSiSgSiaSV9e7dW++++65KS0v1gx/8QI8//rjKyspS813X9TnywsSILQAAAAAEzDN2q5MkVVRUqLy8PDXNmDGjRTuRSETdunWTZVk6++yztWbNmrSE2HGcwLYpnxixBQAAAICAubLkZrn6cbK8uro6bfS1+WitJG3dulU9evSQJC1fvlynnHKKZs2apWg0qqqqKg0cOLADoi88JLYAAAAAELCmI7OZ5klSWVlZWmKbyfPPP68bbrhBpaWlOuiggzR9+nRFIhFVVlaquLhY8+bN8z32QkRiCwAAAAABc6VWRmzbb+LEiZo4cWJa2aRJkzRp0qSdD64TIrEFAAAAgIC1Z8QW7UdiCwAAAAABc40tN0sCm60c2ZHYAgAAAEDAjCx5WQ5FNlnKkR2JLQAAAAAEjBFbf5HYAgAAAEDAPGPJM5lHZrOVIzsSWwAAAAAImCtbrrKM2GYpR3b0GAAAgA/i//pYof32zXcYADqJ5Ihttgm5IbEFAHR50ZOOCWw97tgh7Vpv7MRjMr6Ojx2c9ti8XrYy5F+ody/F//VxvsMAgN0ShyIDAAAAQMA82fKyjDNmK0d2JLYAAAAAEDDXWHKzHHKcrRzZkdgCAAAAQMC4KrK/SGwBAAAAIGDG2PKy3K/WcB/bnJHYAgAAAEDAXFlyleVQ5CzlyI7EFgAAAAAC5pnshxx7JuBgugASWwAAAAAImNfKocjZypEdiS0AAAAABMyTJS/LIcfZypEdiS0AAAAABIzb/fiLxBYAAAAAAsahyP4isQUAAACAgHlq5T62HIqcMxJbAAAAAAiYaeUcW0NimzMSWwAAAAAImGdaGbHlHNuckdgCAAAAQMA4x9ZfJLYAAAAAEDBGbP1FYgsAAAAAAeM+tv5ijBsAAAAA0KkxYgsAAAAAAeNQZH+R2AIAAABAwEhs/cWhyAVo1qxZGjhwoMrKylRWVqZhw4bpf//3f/MdFgAAAACfJBPbbBNyQ2JbgPr06aNbbrlFK1eu1MqVKzVmzBidfvrpeuONN/IdGgAAAAAfkNj6i0ORC9Cpp56a9vrmm2/WrFmz9PLLL2vAgAF5igoAAACAX4yyX/3YBBtKl0BiW+Bc19Xf/vY3bd++XcOGDct3OAAAAAB8wDm2/iKxLVDr1q3TsGHDVFdXp+7du+vRRx9V//79s9avr69XfX196nVNTY0kKVQSVtgKd3i8u7twSSjtER2L/g5Wl+jvsK1wSQD7wrAtR5KdXFcr67XCtkJN5iVfW+HEWULhsC3T8DrUrI1MZdg5fn6+Q8WOLN6XNnWJfUonQn8HyEiqbX91Elt/WcYYRroLUDQa1caNG/X111/r4Ycf1r333qtly5ZlTW6nTp2qadOmtSifP3++SktLOzpcAAAAYLe2Y8cOXXDBBdqyZYvKysqy1qupqVF5eblG/v0KhbpFMtaJb6/X8lPvbLMtNCKx7STGjRungw8+WHfffXfG+ZlGbCsqKjSu5BxGbAMQLgnpsjln677JDytWG893OF0e/R2srtDfsXGDFV60OpD1ODFP9rK1ba43PnawQotXt3htnTRU/3bRwZo97z2Zp1e2qJdpWew8Pz/foV7fVHzTZp8i67q6wj6lM6G/gxMzMS2qfajdie0JT1zZamL7/Gl/IrHNAcckdBLGmLTEtblIJKJIpOUfRrw2piznpKMDxGrjitXG8h3GboP+DlZn7u9ozJMCiD0a8+TEPDkN62ptvbGYJ9NkXvK1FfPSXjevl2lZ7Do/Pt+mzk1876JdOvM+pTOivzte3OTWv8ZYMlkOOc5WjuxIbAvQr371K02cOFEVFRXaunWrHnjgAS1dulRPP/10vkMDAAAA4ANPVtarImcrR3YktgXo008/1YUXXqhPPvlE5eXlGjhwoJ5++mmNHz8+36EBAAAA8AEXj/KXne8A0NKcOXP04Ycfqr6+Xps3b9aiRYtIagEAAIAuJHkocrYpV3/961+19957S5IWLFigYcOGacyYMaqurvY79IJEYgsAAAAAAUuO2GabcmrL8/TQQw+poqJCsVhMM2fO1LJlyzR9+nRNnz69g7agsJDYAgAAAEDA/ByxnT9/vs455xzZtq133nlHAwYMUFFRkYYPH65169Z10BYUFhJbAAAAAAiYaWW0NpnY1tTUpE2Z7pLiuq4efPBBnXfeeZKkr7/+Ou0WQa7rBrNBeUZiCwAAAAAFqKKiQuXl5alpxowZLer8z//8j84991zZdiK122OPPVRTU5Oa7zhOYPHmE1dFBgAAAICAGUnGZJ8nSdXV1Wmjr5FIpEXdDRs2aM2aNfqf//kfvfPOO5o9e7Y2bNigaDSqqqoqDRw40P/gCxCJLQAAAAAEzJMlq4372JaVlaUltpnceuutqedDhw7VbbfdpgceeECVlZUqLi7WvHnz/Au6gJHYAgAAAEDAWrtI1M7c7keSVq5cKUmaNGmSJk2atNOxdUYktgAAAAAQMM9YsrIksLne7gcktgAAAAAQOGNaOcc2SzmyI7EFAAAAgIB1xKHIuzMSWwAAAAAIGImtv0hsAQAAACBgnGPrLxJbAAAAAAgY59j6i8QWAAAAAAKWSGyzHYoccDBdAIktAAAAAASMc2z9RWILAAAAAAEzDVO2ecgNiS0AAAAABIwRW3+R2AIAAABA0Biy9RWJLQAAAAAErZURWzFimzMSWwAAAAAIGLf78Zed7wAAAAAAANgVjNgCAAAAQMC4eJS/GLEFAHR5RU9XKXrSMYGsxw3bcscOaXO94WeqFDvxmBavQ4tXp8piJx7Tol6mZVEY4p9sUqh3r3yHAaCzMFbrE3LCiC0AoKDFxw9VaOHKnV4uetIxst3cT1ZKJqfO4lWp18nnmeolhepc2UtXpyW32dpvmqBaDTG6owalv25WT5LsuCepMfFFYXB69FD8k035DgPtEJswVOFnc9+vdEZOt245LxMfN1ShRY39444eImdJy/0fdg3n2PqLEVsAAAAACJppY9pF27Zt0x133KHRo0erZ8+eKi4uVt++fXX55Zerqqrr/ShKYgsAAAAAAUueY5tt2hXPPfecvv3tb2vjxo268cYb9cYbb+jTTz/Vk08+qeHDh+u6667T2Wef7dOWFAYORe5grutq3bp1OuCAA7THHnvkOxwAAAAAhaKDDjnu1auXXnnlFZWUlKSVl5eX67DDDtOll16qV155pWNWnieM2PpsypQpmjNnjqREUltZWanBgweroqJCS5cuzW9wAAAAAApCR47Y9u/fv0VS29xxxx23S+soNIzY+uyhhx7S97//fUnS3//+d33wwQd68803NW/ePF1//fV64YUX8hwhAAAAgLxr7Vxan0ZyL7vsMplWrkQ1d+5cf1ZUAEhsffb555+rV6/Epf6feuopffe739Whhx6qyZMn67//+7/zHB0AAACAwmA1TNnm7bqhQ4dKklavXq21a9fqsssu86XdQkRi67N99tlHGzZsUO/evfX000/rzjvvlCTt2LFDjuPkOToAAAAABSGAEdsrrrhCb775pm6//Xbttdde2nPPPTVp0iR/Gi8wJLY+u/TSS3Xuueeqd+/esixL48ePlyS98sor6tevX56jAwAAAFAQAkhs33//fZ122mmaO3euBg4cqFGjRmm//fbTiBEj/FlBASGx9dnUqVN15JFHauPGjfrud7+rSCQiSXIcR7/85S/zHB0AAACAgmCsxJRtng9OOeUU3XnnnRo+fLgk6bHHHtPEiRO1fv16X9ovJCS2PorFYpowYYLuvvvuFveFuvjii/MUFQAAAIBCY0xiyjbPD7fccovGjRuXel1RUaF58+b503iBqi+JCQAAQTtJREFUIbH1UTgc1vr162VZ/vzCAgAAAKCLCuBQ5KOOOkofffRRWlnPnj39abzAkNj67KKLLtKcOXN0yy235DsUAAAAAIUqgEORjznmGBljZFmW6uvrtXXrVvXs2VOfffaZL+0XEhJbn0WjUd17771auHChhg4dqm7duqXNnzlzZp4iAwAAAFAoLJOYss3zw+bNm9NeP/3003rhhRf8abzAkNj6bP369Ro8eLAk6e23306bxyHKAAAAAPLlpJNO0q9+9StNnz4936H4jsTWZ0uWLMl3CAAAAAAKXQDn2C5btiz13HVdrV69WnV1df40XmBIbDvIu+++q/fee08jR45USUlJ6th2AAAAAAjiHNuf/exnqed1dXWqrq7WokWLfGm70JDY+uyLL77QueeeqyVLlsiyLL3zzjv61re+pcsvv1zf+MY39Ic//CHfIQIAAADItwBGbF999dW01+vWrdMdd9yhu+++258VFBA73wF0NT/5yU8UDoe1ceNGlZaWpsrPO+88Pf3003mMDAAAAEDBMG1MHeDII4/Uyy+/3DGN5xkjtj579tln9cwzz6hPnz5p5YccckiLe0gBAAAA2E0FMGL75z//OfXcdV2tWrVKJSUl/jReYEhsfbZ9+/a0kdqkzz//XJFIJA8RAQAAACg4AZxj++STT6aeh0Ihfetb39Jjjz3mS9uFhsTWZyNHjtS8efNSl9C2LEue5+l3v/udRo8e3a42ZsyYoUceeURvvvmmSkpK9O1vf1u33nqrDjvssI4MHQAAAEBAgriP7YMPPuhPQ50A59j67He/+53uvvtuTZw4UdFoVD//+c91xBFHaPny5br11lvb1cayZct05ZVX6uWXX9bChQsVj8c1YcIEbd++vYOjBwAAABAIn86xXb9+vYYPH67KykqdfPLJ2rZtmxYsWKBhw4ZpzJgxqq6u7oDgCw8jtj7r37+/Xn/9dc2aNUuO42j79u0666yzdOWVV6p3797taqP5Rabmzp2rb37zm1q1apVGjhzZEWEDAAAA6IQOO+wwvfDCC5KkadOm6dFHH9Udd9yhFStWqKqqStOnT9fs2bPzHGXHI7HtAL169dK0adN8a2/Lli2SpD333DNrnfr6etXX16de19TUSJJCJWGFrbBvsSCzcEko7REdi/4OVr772w7bckpy34+llgvbsm0jz7EUzqEdJ5w4qMluWMYJ26nnmeolWY6RVRJusXym5eySsKyGepZtZBxLYTtxXlU4bMt4RraltHqSZFuSF0q8Du1E36CRn59vpzgkO8770ZZ871MkyQrbu83fTrg49/5uvt91ijLv/9CMkVTb/uqWWjkUueEx+T99UiQSaXHdnnC48b3ZsWOH9t9/fw0YMEBFRUUaPny4rr322vYH1YlZxpgOupj07unAAw/UZZddpksvvVQVFRW73J4xRqeffrq++uorrVixImu9qVOnZkym58+fn/FiVgAAAAD8s2PHDl1wwQXasmWLysrKstarqalReXm5DrjlZtnFxRnreHV1+uiX17cov/HGGzV16tQW5QsXLtTPf/5zhcNh3X777XrwwQd1++2364orrtCyZcv0xhtv7PR2dRYMd/jspz/9qe6//37ddNNNGj16tCZPnqwzzzxzp6+IfNVVV+n111/X888/32q96667Ttdcc03qdU1NjSoqKnTf5EcYsQ1AuCSky+acrfsmP6xYbTzf4XR59Hew8t3f7uhBcpas2enlYuMGy3YbRmwXrW738l7l0ZIke9na1Ovk80z1kizPyFrxWovlMy1nL1ur+NjBieXcxIhtkW3pB5f21T1z31XUM7LjXlo9SbLjXuOI7eL2bxNa8vPz7XTvLnfbNp8i67ryvU+RpPiYQQo9l/t+pTMq7lmui/94Uk797Y4aJGdpY/94I4+WvXxtxwTYhcRMLLcF2nG7n+rq6rQkOVtOMX78eK1Zs0a//e1vtWzZstRIb/fu3fXuu+9q0KBBuuyyy/T9739fe+yxR25xdhIktj778Y9/rB//+Md67bXXdN999+nf//3fdcUVV+iCCy7QZZddpsGDB7fdSJO2nnjiCS1fvrzFfXGby3RYgiTFa2ONxzKgw8Vq44rV5rhTw06jv4OVr/6OxzyFdmK9yeWiMS+R2HqWlEM7bsyTJDkNy7gxL/U8U70kyzWya2Mtls+0nFMbU6yhnuUaGc+S1XAocizmpRLbpvWkhsS24Z8ew9+AL/z4fHuhuFzej3bL5z48FvN2m78dpy6RzObS3833u2408/4P6eIdkNiWlZW1OvorJU5JTOYB5eXlikaj2rBhg6LRqE4//XRt2bJFf//73/Xyyy/rxhtv1IknnqjJkydr3LhxucVb4Lgqcgc56qij9F//9V/617/+pRtvvFH33nuvjjnmGB111FG677771NoR4MYYXXXVVXrkkUf03HPP6aCDDgowcgAAAAAdLXm7n2xTey1cuFCVlZUaPXq0Fi9erMmTJ2vKlCmqrKzUDTfcoBtuuEHFxcX6y1/+og8//FAjR47UtddeqwMPPLDDti0fGLHtILFYTI8++qjmzp2rhQsX6vjjj9fkyZP18ccf6/rrr9eiRYs0f/78jMteeeWVmj9/vh5//HH16NFDmzZtkpT4BaakpCTIzQAAAADQEdoxYtsep5xyik455ZS0skmTJmnSpEmNzTUMqn344Yd666239PHHH+voo4/OLd4CR2Lrs9WrV2vu3Ln661//KsdxdOGFF+q2225Tv379UnUmTJjQ6m17Zs2aJUkaNWpUWvncuXN1ySWXdETYAAAAAILkU2Lblq+++krbtm3TMccco82bN+uSSy5RVVWVDjjgAP9WUgBIbH12zDHHaPz48Zo1a5bOOOOMtMtvJ/Xv3z/tF5TmuFA1AAAA0LW1dshxLocit+bss8/WwoULNXHiRE2ePFkTJkzwp+ECRGLrs/fff7/NXz+6deumuXPnBhQRAAAAgN3RiBEjNHv2bPXs2TPfoXQ4Lh7ls642pA8AAACgAxir9ckHU6ZM0b/+9S99//vf14ABAzRgwABdeOGFWrdunS/tFxISW5+5rqvf//73OvbYY9WrVy/tueeeaRMAAAAApM6xzTb5YOXKlRo3bpz2339/zZgxQ//5n/+piooKjRkzRitXrvRnJQWCQ5F9Nm3aNN1777265ppr9Otf/1rXX3+9PvzwQz322GP6zW9+k+/wAAAAABSAIM6xveGGGzRv3jyddNJJqbLTTz9dI0aM0G9+8xs99dRT/qyoADBi67O//OUvuueee3TttdcqFArp/PPP17333qvf/OY3evnll/MdHgAAAIBCEMCI7TvvvJOW1CZNnDhRb731lj8rKRAktj7btGmTjjzySElS9+7dtWXLFkmJ+0s9+eST+QwNAAAAQKEwjaO2zSe/Etu99tor9fznP/952ryudpokia3P+vTpo08++USS1LdvXz377LOSpKqqKkUikXyGBgAAAKBQBDBiW1RUpGg0KklauHBhqvzLL7+U53n+rKRAcI6tz84880wtXrxYxx13nK6++mqdf/75mjNnjjZu3Kif/OQn+Q4PAAAAQCFoLYH1KbE95ZRT1LdvXzmOo1gsliq/8cYbdfbZZ/uzkgJBYuuzW265JfX8nHPOUZ8+ffTiiy+qb9++Ou200/IYGQAAAIBCEcTFo6ZMmaJTTz1VkhQOh1Plf/zjH/1ZQQEhse1gxx9/vI4//vh8hwEAAABgNxOJRNS/f/98hxEIElsfPPHEE+2uy6gtAAAAgCAORR49enSr85csWeLPigoAia0PzjjjjHbVsyxLrut2bDAAAAAACl4QhyK/8cYbuv/++/1prMCR2Pqgq11RDAAAAEAAfEpgsykuLtZ3vvOdjl1JgSCx7QCLFy/W4sWLtXnz5rSk17IszZkzJ4+RAQAAACgIARyKbEwHZ84FhMTWZ9OmTdNNN92koUOHqnfv3rIsK98hAQAAACgwQRyKvDvlIiS2Prvrrrt0//3368ILL8x3KAAAAAAKVQAjtn/729/8aagTsPMdQFcTjUb17W9/O99hAAAAAChgyRHbbNOu+Nvf/qZnnnlGxx13XMb5n332ma699tpdW0mBYcTWZ5dffrnmz5+vX//61/kOBQAAAMBuaMSIEfrFL36hf//3f9fYsWN1+OGHq7i4WJs2bdJLL72k999/X1OnTs13mL4isfVZXV2dZs+erUWLFmngwIEKh8Np82fOnJmnyAAAAAAUjA48FLlXr17685//rH/96196/PHH9dprr6m2tlZ9+vTRT3/6U40dO3bXVlCASGx99vrrr+voo4+WJK1fvz5t3u508jYAAACAVgRwju1+++2nK664wp/GChyJrc+WLFmS7xAAAAAAFLggroq8O+HiUQCAghZauFLx8UN3ermip6vkOZZs1yh60jHtXt5ZvEqS5I4dknqdfJ6pXpJxLHmjBrdYPtNy7tghCj9TlVrOco2Mkzi6xzJGlmvkhey0epLkhWzZ8cR90mMntn+b0LHcrVvl9OiR7zDQDuFnVyo2Iff9Smfkbt+e8zKhRSsVH9fYP86SVXJHZ96XYReYNibkhBFbtIs9qL+8NRvyHQawW2qe1NlRT15RDr9LmsQ/KWltjhsqWS3neaMGy166WvFxQ2XHPdlLVzc2c8JRimf4om3+D1AqzrgnL2Sntd80CTNW+i/STRO3+PihqcTU8kyqLBdWvHE5J+bJ2ImEMZncOrFEYuiGG/uy6Omq9H/ePEl2IjlNttc8UbXcRALauGGSCSWS22Simim5taOejGUpPm6oLNfIjrryihw5dfHGdo2RXevKLQlJY4fIqXcTiW9DXHbca5FYdxbJ97jpa0lpZZ2NXVoqd+vWfIexS+ySEnm1tbJLS+Xt2JEoa/I84zJFRfKi0aBCVOzEY9L2F7ksE5swVLIsOXWuSt7+TDrwgFSd+IcfKfStA1V7yN6S1O51xE48RpEVb6T6KDZhqMLPJj7H3shBcosd2a6R51iphNp2jYxtpfZzTX/Aa/o34HTvLnfbtkR8zf5mnB490j5vTdfblF1SnGorVvuVpMR+LLnvcHr0kHFdWbYtd9s2OT16KDr0UIUWrUzV8yoHy1nS8GPd6CGyXE/hNe9m/bw3jy1Xye8iu7RUseMOT627qeS+Ojkv+UNFpj6QWu5jmvZ787LABHAo8u6ExBYAAAAAAsahyP4isQUAAACAoDFi6ysSWwAAAAAIGCO2/iKxBQAAAICgMWLrKxJbAAAAAAgaia2vSGwBAAAAIGBWw5RtHnJDYgsAAAAAQWPE1lcktgAAAAAQMC4e5S8SWwAAAAAIGiO2viKxBQAAAIB8IIH1DYktAAAAAASMQ5H9Zec7AAAAAAAAdgWJLQAAAAAEzbQxtdOqVas0YsQIVVZW6txzz1UsFtOCBQs0bNgwjRkzRtXV1R0QfOEhsQUAAACAgCUPRc42tdd+++2nZ555RsuWLVPfvn312GOPaebMmVq2bJmmT5+u6dOnd9xGFBASWwAAAAAIWjtGbGtqatKm+vr6Fs306tVLpaWlkqRwOKy3335bAwYMUFFRkYYPH65169YFsjn5RmILAAAAAAFrz4htRUWFysvLU9OMGTOytrdx40YtWrRIJ5xwgsrKylLlrut29KYUBK6KDAAAAABBa8d9bKurq9OS1EgkkrF6TU2NLrzwQs2dO1eu66qmpiY1z3EcnwIubCS2AAAAABC0diS2ZWVlaYltJq7r6nvf+55+85vf6NBDD1UsFtOGDRsUjUZVVVWlgQMH+hp2oSKxBQAAAICA+XUf2wcffFAvvviitm7dqunTp+tHP/qRpkyZosrKShUXF2vevHn+BFzgSGwBAAAAIGjtGLFtj/PPP1/nn39+i/JJkybtVFidFRePKlDLly/Xqaeeqn333VeWZemxxx7Ld0gAAAAAfGIZ0+qE3JDYFqjt27frqKOO0h133JHvUAAAAAD4rR23+0H7cShygZo4caImTpyY7zAAAAAAdAC/zrFFAoltF1FfX592w+bkJb5DJWGFrfAut28X2fJKdr2dripcEkp7RMfa3frbDqcfXGNL8sI5HHBjJKfZ368dtiWr5TxTZMsqCcsO27JtySoJN/Z32JaX4YvWaajfIm5b8hw7rX2rST1jpX9xh5rUs8N2ql3LMzK21f7tTa7LNjJOYrlkG57T2E7y5gduk5jCJWE5Rc22xWrZXtpsp1mnGMk4Vqo8W+yWJKfJum0ZeWFb4Yaf6cNhW8YzsqxEjLaVWJexrcQv+Q3N2p1035x8j5u+llp+Vjuan/sTuyQkz3TO9yPJLgnJUzhtW9raLrsoJK/530ErdrXPrbCdtr/IZRkrbEuWJcczCkfSb4FilYQVijipfUJ712GFbYWb9FHT+EyRnfj7tY08x0rFYNuJv+Xkfq7pfrTp34BTHJLtNrwPzf5mnOKQ7Hj6/jVTzOHiUOrRbZjvhO3UvsMpDsl4lizLlu2G5RSHpKLE/GS95HeDJDlFtixXCjVbf1PNY8tV6ruoJCSryM64n0vuq5PzrDbet+b927Tfm5ftfOCSanOs78M5tkiwjOEA7kJnWZYeffRRnXHGGVnrTJ06VdOmTWtRPn/+fJWWlnZgdAAAAAB27NihCy64QFu2bGn1Fj01NTUqLy/X4PNvllNUnLGOG63T6r9e32ZbaLR7DHfsBq677jpdc801qdc1NTWqqKjQfZMf8WfEduBh8l5/a5fb6arCJSFdNuds3Tf5YcVq4/kOp8vb3frbHT0o7bUd83IfsV26Jr3NUYMaR2ybzDMjjpK14jW5owbJdj1ZK15L9fece95WfaYR26VrEu01Y7teYsS2SfvxsYMb19V8xHbx6sb4Rg+Ss2SN3NGDdn7E1m1jxDbmJdbVdMR20Wp5I49u1lDL9tJmNx/GTo7Yem2M2MY8eUWNI0Z2zJUXdhSR0eR/O0xz7nlbUc/Iintyi0Oy415jXzQdsV22to2eKEzJ97jpa0lpZUHwc39il5bI25HLcE3hsUuK5dXWpW1LW9tlF4XlRWPtXseu9nl87OC0/UUuy8THDEqM2Na7Cm/8Mr3OxmqFDtxfdQfvJUntXkd87GAVvfTPVB/FxwxS6LnE59gMHyg34sh2G0Zsn1uj+JhBst2GEduG/VzyUUr/G3C6dZO7fbukln8zTvfucrdta4yjyXqbiuzRXZf86WT9+d+fUd3nX0uSvMqjU/sOp3t3Gc+VZdlyt2+X0727YoP7yl6+NlXPnHCUrOdfSyw78mhZrqfQa++nrb+p5rHlKvldZJeWKD70MNnL17aok9xXJ+fFxyT6L1MfSC33MU37vXnZzoqZ9v8dSGLE1mcktl1EJBJRJBJpUR6vjaX++dkVdtSTV5vjH+tuKFYbV4x+Cszu0t/xhgQsyY568rLUzchIoWb9FI95qcS26Twv6smujSke82THE8+TYjFP0QxftKGG+s3ZcU9eKL39WJN6zRNb06RePOal2t3pxDZuZELNElsvQ2LbdKHamNxos22xW7aXth43Q2IbslLlmZJhKfE+upaV9tqTJbvhv5lYzEsktjFPrpN4P1LJtdcYl9NJ/waS73HT11LLz2pQ/Nif2Fa4039X2grJq42lbUtb22W7Vk6JbdLO9nks5qXtL3JZJhbzEolt1JPq0/76Fa+NydS7ijZ8Ftu7jljMk1UbT/VR0/i8qCfXthKJrWelYkgmtsn9XNP9aNo+2YnLbXjd/G/GCzXOa61f7JLEjwexusb+dmNeat/hheIyrivLtuXWxuSF4opGE/OT9ZLfDZLkRj1ZridTl77+pprHlqvk+mwrrFjUy7ifS+6rnSb9LmV/35r3b9N+b162s+I5JracY+svElsAAAAACBojtr4isS1Q27Zt07vvvpt6/cEHH2jt2rXac889tf/+++cxMgAAAAAoLCS2BWrlypUaPXp06nXy/NmLL75Y999/f56iAgAAAOAXDjn2D4ltgRo1apS4YDUAAADQRRmTmLLNQ05IbAEAAAAgYFw8yl8ktgAAAAAQNC4e5SsSWwAAAAAImOUlpmzzkBsSWwAAAAAIGiO2viKxBQAAAICAcY6tv0hsAQAAACBoXBXZVyS2AAAAABAwRmz9RWILAAAAAEHjHFtfkdgCAAAAQMAYsfUXiS0AAAAABI1zbH1FYgsAAAAAAWPE1l8ktgAAAAAQNM6x9RWJLQAAAAAEjBFbf5HYAgAAAEDQPJOYss1DTkhsAQAAACBoHIrsKzvfAQAAAAAAsCsYsQUAAACAgFlq5RzbQCPpGkhsAQAAACBo3MfWVyS2AAAAABAwrorsLxJbAAAAAAgaF4/yFYktAAAAAATMMkZWlkOOs5UjOxJbAAAAAAia1zBlm4eccLsftIu3ZoPsQf3zHQawWwotXJn22iuyZUdz+MazpPi4oeltLlqZOMyp2Tx76Wp5owYrtGilvJAtb9TgxvWGbNmxluuNjxuaaK8ZL2TLjntp7YefqWoMy0imyWUfYyce0xjfwpWKjx+q0MKVMrYlayduVG9Clqx4YrlkG7bb2I4bTnwFOk22KXrSMXKWrGq2IS3bS1uP0+zalZZkxU2q3HIzx+4V2XLq3SavHdlRV17YaWwqbmTCtpzauLyQLeNYifbsxrjcsUOyd0IBS77HTV9LSivrbLwdO2SXluY7jF3i1dbKLilJ25a2tsuLRmUXFQUVosLPVKXtL3JZJvzsSskYucWOogf0TKsTOvAAxd//UCXvfCZJ7V5H+Jkq1Y8YkOqj8LMrFZuQ+Bzby9fIqXPlOZZs1yg2YajCz66U5yT2Scn9XPJRSv8bcLdtk9O9eyK+Zn8z7tatcnr0aIyjyXqb8mrrGttqqO8sXpXad7hbt8pyHBnPk9O9u9ytW1W08m25o4ek6tnLVsurTHwfOEtWyTi2YoP6pq2/qeax5Sr5XeTt2KHwK/+UO7rlfi65r07OCz+b6L9MfSC13Mc07ffmZUFJjthmm5AbRmyRxj6qv7zXNsgZeLjc1/8pSbKOPVJWvStvzYYW9a3jBsq88nqrbTatk9yJOotXtbYIuoD4+KFpSUuS1XA+icnys1p7LpZgNyQk7qhBimdItDK3u/NfEMayZMc8eeH2/RZoGSNjtXKhfkuppLLNc2gsyamNy6lzE1dIbNKuU+e2smA641iJBNNqXNapi0uS3OJQ4h+IUCLR8kK2rBGD5NS7ciOO3FGDJEl21JXlmbRkTJLciCNzwtGp9loI2ank1jItl08EmHjwRg1uTAg9I69ycCqJt5p0lgm1cSMEL7G8sa1UMm4cS3bUS/9Ft6GZUCzeEJ/kjRwky232uWoSshVreB+MSfu8Giv982tFTeo9S0tum30Wnbp4qo4XsmTXN8QS8yRj5NR6ciNO4/ttTCJJX/GaLDv5hjpqjWnyw8Az/1rTat2mTtxvUFr9pq9P3G9Qav3GM42x5CD83Jq02MPPNayrje3JidXwjhuv8bVpfH+tUOJfIcuxZYXCafPSls3UTlKTMhONyXKclvOt3MYSLNtKe992RbKtTO9R03VYjpPaFisUlqTGhNX1ZIXCaXVk2frfD17RSRWDU3WT7TVdpxeNpscTavnvZ3IfEVq0UnZRkbxoVLEJQ1X03GuKjj1aRYvXKjbqKIUWJRKPyJLXFG1ISpL1k+0ULX9dXjSq0L69Ff/4k1RCFlm+XvHKwXLqPRV99IW8Tz6V9u4p77MvUnHYkWJJUsk7n8n71ybVT0juu6Si5esVHXmEipavlyTVVx6hyPOJ/40iS9clli8pkRr6wC4pkSxLnmUl/n4tS8WvvKXYyEEqfvFNxYYcKi9iyRs5SEXL1ys+cpDseldexJFdWppINuNx1Q/rp8jLbyk25BAVPf+GVFLS+P7F47JLS1V/Qn8Vv/SW7Oc3yOreXbIsGTfxvWFHnIZti8hr+PJ1uneXK8np0UMmGpOJxiTjyVi27EixYoP6ynK9RGyvvi1TUiJn9TuKVQ6WjFHR6ndlYjGZhrZigw9R6OU35EWjckcPUfjFNxLr6dFD0WMPTfRh3Cj08gbFvj1AzpJVjQlpQ934sMQASuilDZLxZL+0QYoUJz57xsguLVXsuMMT9W1L8ozshmVjDW3ZsSZ9n/xsO7YUjyv+7SNkxzyFXtqg+glD5TR8t3iVg2WsxP8tdtTLmvAmP1/Nf8ht+tnNGefY+orEFgAAAACCxu1+fEViCwAAAAAB43Y//uIcWwAAAAAIWnLENtuUg61bt+q4445T9+7dtX594nD5BQsWaNiwYRozZoyqq6s7YgsKCoktAAAAAATM8lqfclFSUqJ//OMfOueccyRJsVhMM2fO1LJlyzR9+nRNnz69A7agsJDYAgAAAEDQfByxDYVC2nvvvVOv33nnHQ0YMEBFRUUaPny41q1b53f0BYdzbAEAAAAgaO24KnJNTU1acSQSUSQSabPpr7/+WmVlZanXrtv+Oyl0VozYAgAAAEDA2nMf24qKCpWXl6emGTNmtKvtPfbYIy0pdvy8jVqBYsQWAAAAAILWjtv9VFdXp428tme0VpL69u2rDRs2KBqNqqqqSgMHDtzlcAsdiS0AAAAAFKCysrK0xLY13/nOd7R27Vq99dZb+uEPf6gpU6aosrJSxcXFmjdvXgdHmn8ktgAAAAAQNCMp29WPd+I+tk899VSLskmTJuXeUCdFYgsAAAAAAWt6Lm2mecgNiS0AAAAABM2olXNsA42kSyCxBQAAAICgtePiUWg/ElsAAAAACJonyWplHnJCYgsAAAAAAeMcW3+R2AIAAABA0DgU2VcktgAAAAAQNBJbX5HYAgAAAEDQSGx9RWILAAAAAEHj4lG+svMdALK78847ddBBB6m4uFhDhgzRihUr8h0SAAAAAB8kLx6VbUJuSGwL1IIFCzRlyhRdf/31WrNmjUaMGKGJEydq48aN+Q4NAAAAwK5KHoqcbUJOSGwL1MyZMzV58mRdfvnlOvzww3X77beroqJCs2bNyndoAAAAAHaVZ1qfkBMS2wIUjUa1atUqTZgwIa18woQJevHFF/MUFQAAAADfMGLrKy4eVYA+//xzua6rffbZJ618n3320aZNmzIuU19fr/r6+tTrmpoaSVKoJKywFW73uu2ILa8kLCdiyy5JLGcV2bJk5JW0bMcqsmUylGer44QTv6XYbSzT2YRLQmmPkOywLZPhggiWkWQkk+VnNasd+/Fww+co+dgeu3KuirEs2Zbkhdq3PssYGSvb1SCUuFCEafLYamOS7dqSZSW+5FprtzWOJc+xUxepsL3GL00nbMu2Jc+xZRuTeN1kXlv97YRt2V72vnGaLGcZI8vJsNHJIksyduM2WnbmDjJOG/1gJMszMraVeu+NbbVsL9lM8pdxo+wX8khbruH9aNpcpvcz03uW6bNoWbJcIxOyFG7YtnCRLeMZWbaRU2TLSsWYeLRKQrLs9n0eTJNf/mPxSLuWkRL7tKb1m74ON1l/Is6d/Gx2NKvh82e8xtem8aos4eJQ46Plpc1LWzZTO0nNy5rWz/S6XWFbae/brki2lek9aroOy3ESsTqOTEiyikJSPNWIjGs11mkoi8Ujic+C46S113SdXrO/+VSfN/nOtBv2E05JWHZRSJ5jZIXtRJ2GRytsJ+Y3KW9aP9lOuCTxOlTsyCoJyxQl2w7JbngejjjySkKyixOPTdmRxLZ4JSGZhrgso7RYJMk0eS63oU9sS/JM4zzLSqyzYV8QKg7JKrJTj27YluOZtNi85PbajoyrxDob6juZ/s+wrMT6ikMyniXLSnxnGC+xnwoXOal+t4wjOxqWUxxKtWuS70+Tz2kyFkkKFYdkXMlyEjHImERZqOG9tuxUbJ6T2F+FSkKywg2xJvvQTmynVZT4H9NpWEeoYZvsJu/T/9/enQdJUd//H3/1zF5cu7Dc6+KCoiyHCi4KRPkiKQJYRkGSiEcUIjkIaqmoiRgrolVRK6WmQko0EsRKSktTgpSlVhRlOQzI5RLhxyEgCApISQQk7DLd0+/fHzM77OzO7IHLLA3PR9UWTPenP939nt6eec2nt6f270wo3r8T77u6zokS5CT/fmW1yjoxPxySvOT+LTuksGKv6yFJ0eyQHIuN+DnxY6z6WIstE3vfWj096Xip2cYkVdZ9itKrL8ASbJvKMePjgNPN3r17dc4552jFihUaNmxYYvof/vAH/eMf/9CWLVvqLDNz5kw9+uijdaa/8sorat269SndXgAAAOBsd+zYMd188806fPiw8vPz07Y7cuSICgoKNKrXXcoKpf6w0fOP6/2df2mwL5zA8NJpqFOnTgqHw3VGZw8cOFBnFLfajBkzNH369MTjI0eOqEePHnpxyoKmjdhe1Ef+hq0KD7hQ0Y2fSpKcsn5yIlH5G7bWae8M7i9b+//q7bNmG3/EwNh6lq5v9DYFQXarLN0+90d6ccp8uZVewwucBaIjB53SEdtfTO6tOfO2y3Ubdz/87zxi6/ktN2Jb5TX/iO3xaOJTYj83S6GoHxuxjUTl54QVikRj83LCsXr/rLfm/nVrynr7OWGFjqc/7v3cEy81jtmJkcea0o3YRpt5xLZ2f6fpiO2UqaV68bktivgmxzP5ueG6I7YrNpzUiO0bWz5p1DKSdH3pxUntaz6+vvTiM2PEtlWOfvbCOM37xRtyjzNimxixdT05OdmS51V3IotG64zYzv9krSb0u6T+EduIm7TOnLZ5+tkL45JeM6NXDZIkhZdUKJSTLT/iyvv+IGUv2yB3xMXKXvqJvCsvUnhJhaIjByn73xvlXjFA4fIT7av7yV6xUX7EVVb3rvL2fSW78pJY3+u2KlrWR5KUvfu/8r86oFCnQvlf/zdp+0LdY++1/H1fKXJFv9j+mJS9YpPc7/VT9opNkqTIFf2U81F8sKHWiG3kin7KWblZchxFL73wxIhtxXZ5A89X1vod8gb1VjQnpPDxqMJrtyo6OLZtfk5Y2au2xEdsPblD+ypnzTZ5g85XeG3d92JyHEWGlip39TaZH60xYhtNjNhOfu6HeunXb8mzsKJHjijcpo3cwRcoe912metWHwyJ47R6WyQp6z+fyaKenHCWvIHnx0ZsN+ySeW589SF5A89XeM1m+RFX/v8NVNaqzXKyY+8/3bLesXZRU3jNFnlD+iq0bL38/xsY63/V5tg6Ly+NPU+rt9T5nYleXqrwuk/lDe4Ta19rxNYb0jepJFlrttQasfUUHdIv0X/kin4Ku7HX9ZDrK5oTH7F1fTkf/kfRkYMULo8da5IULq9IHF/hJRVJ66p57LqWfKwjswi2p6GcnByVlZVp0aJFuv766xPTFy1apHHjxqVcJjc3V7m5dT/x8Srdxr1Jiwsd9+VXuvKP+4pWxk9YEV9OfHptTsSXpZierk00/qY43MAyQeVWenLP0H1rKs/1T1mwrea6viKZCrau3+j7ODR3sA1H/O8cbC3syM9S4nwQjviJgBQN+fHgHpsedZzYfEnRGutzXV9upG69a7ZPJRo6Mc8xSx1WawbbGqHV8dIE26wG6uCnCLZhp25/tYKtY0p53NYRfz5qHq/mpDh+mxhsfd+R4vvmRnxFfFPIM0Udp06wDVV6JxVss7OO19MymVvpJbWv+ditsf4gB9vq+W6VJ7fqbA+2Fg+2Fgu2USdFsLWk/c3OOh47FuKXszYm2DpZsbefNV8zvfi5PKvSVSgaW8Z1fanSi53nKz25rq+sSjfWNj69ZvtEP5We/Igrq4rKq3TlR6r79uRVn6uOR+VXegpVxf6tKXQ8/sFe9boV/92usS2SFHF9OdXL1g621fMcJ7bO+LnAqjy5ET/xbzR+jq+5bb7jSPGamhdbpxNvn5Xqw3PHUcT1FaryYs9RKB5so/EPMOPPiVvlyTNTtNKVH47vS5Uni3jVB0PiOPVqnNOtypN5npys2HlJZrFprhd/rkOJbfMjrqIRX1YZP37idZJi5/Osyth+hCtj7STJ4vvk1Xieav/OePH+3XjftYNt7dcmq/TqBNua/UdcX+FI7HU9FH8enPj/Q/FjLHGsxY/L6uMrq9b7vJptvKYGWz/+pijtPDQFwfY0NX36dN16660aPHiwhg0bphdeeEG7d+/W1KlTW3rTAAAAAHxXVuvDtNrz0CQE29PUxIkTdfDgQT322GPat2+fBgwYoHfeeUclJSUtvWkAAAAAvqv67n7MbZCajGB7Gps2bZqmTZvW0psBAAAAoLlxKXKzItgCAAAAQKYxYtusCLYAAAAAkGmmeoJtRrfkjECwBQAAAIBMY8S2WRFsAQAAACDTfF9Smrsf+9wVuakItgAAAACQaYzYNiuCLQAAAABkGsG2WRFsAQAAACDT+LqfZkWwBQAAAIAMM/NllvpvadNNR3oEWwAAAADINLP0I7NcitxkBFsAAAAAyDSr51Jkgm2TEWwBAAAAINN8X3LSXHLMpchNRrAFAAAAgExjxLZZhVp6AwAAAAAA+C4YsQUAAACADDPfl6W5FJm7IjcdwRYAAAAAMo1LkZsVwRYAAAAAMs03ySHYNheCLQAAAABkmpmkdHdFJtg2FcEWAAAAADLMfJOlGbE1gm2TcVdkAAAAAMg08+v/aYL7779fw4cP1y233KJIJHKKNvj0RrAFAAAAgAwz3+r9aayKigrt379fy5cvV79+/fT666+fwq0+fXEp8hmq+vIFT27am62lEooel2+uLHpcUXMlSY5XJSfqy48/rsnxqmQppqdrE/Wq4ttX/zKBY9KxY8fkmivvTNu3k+R5VTKn7nQnfgNAS/OxWrp7KNQUUkjHjh2T51bJ8xr3iabzHS7pMcdRyPPlO437LNAxkzkpdj7RQLHfy+p/6+1MMs+THCf29zb19VsPM0e+QrF1SjIvmvj7naiXFds/hWReVNFwODZfUjQcPlFvL3W9Y+29tOuOeideahwzOdEUO109yYlta6K9l+YSLTVQB19yfJOFnMRzb+bU7a+6m/gbCMeU8ritI/581DxezUlx/KZ6zlIdi44jJ2ry5SgkJ3Y+8ark+aaQZ4qGw3Kq3+TElw+ZK6dRG5t8SduRb6ONWkaSPHOT2td87NVYv5k1elsyL/57mxj9CCWPhCSdv2uPktRcNkU/iT789O1TPW4Ex5xmuxSxuq9Uz1HNdTgWPy9YVGbR2O+OVT//jsz8E23i0458G40fC35SfzXXWfv9g2PhOq+ZXvz9gcxVKL6M51UpVOtfpXgcqrGO6nm+uZIfkWeu/ETfkcR6Qv5x+RZJ/FtTyD8uSfJrtHdMCsUfh+LtPa9K4eplq48Bi50bTsxz4tsZOxc48T6q/42GQ/Hz8Yl1+eGwQhaRY2GZeYm+Yv2kGomLrcOzSPx5C8Wfr+rns7reEUUtrKi5shr7knhPZtUvTDWeD0mORWTmyTE/sS9OjeUcCyWeC99cReP7V33uTdTQs8R+Wrxddf/Jx0AkaVsS82rUrrrOSfOTjrHIifkWkiya1L/nVcnir+shz1c0HIo9x55f51iLLVPjOK11PNds48mNl7Jxv7ueHU87Mlvd15EjR5Km5+bmKjc3N2naypUrNXr0aEnS2LFjNW/ePN18882N2oYziWNcwH1G+uKLL9SjR4+W3gwAAADgrLJnzx4VFxennV9VVaVevXpp//799fbTtm1bHT16NGnaI488opkzZyZNe/zxx9WvXz+NHz9e27dv1+9//3u98sorJ739QcWI7RmqqKhIe/bsUbt27eSc5AgPGu/IkSPq0aOH9uzZo/z8/JbenDMe9c4s6p1Z1DuzqHfmUfPMot6ZY2b69ttvVVRUVG+7vLw87dy5s8G/hTWzOu/ja4/WSlKHDh0SI7uHDh1SYWFhE7f8zECwPUOFQqF6PynCqZGfn8+LRgZR78yi3plFvTOLemceNc8s6p0ZBQUFjWqXl5envLy8Zlnn0KFD9fTTT+u2227Tu+++qyuuuKJZ+g0abh4FAAAAAAE1aNAgdevWTcOHD9emTZv0ox/9qKU3qUUwYgsAAAAAAfbUU0+19Ca0OEZsgWaQm5urRx55JOXfPaD5Ue/Mot6ZRb0zi3pnHjXPLOqNswV3RQYAAAAABBojtgAAAACAQCPYAgAAAAACjWALAAAAAAg0gi0AAAAAINAItoCk2bNnq1evXsrLy1NZWZmWL1+emDdz5kyVlpaqTZs26tChg0aNGqVVq1Y12OeGDRs0YsQItWrVSuecc44ee+wx1b5X29KlS1VWVqa8vDydd955ev7555t9305H9dVbkjZv3qzrrrtOBQUFateunYYOHardu3fX2yf1Tq++en/11VeaPHmyioqK1Lp1a40dO1bbtm1rsE/qndqyZct07bXXqqioSI7jaOHChYl5ruvqt7/9rS666CK1adNGRUVFuu2227R3794G+6XeqdVXb0maPHmyHMdJ+hk6dGiD/VLv1Bqq99GjR3XnnXequLhYrVq1Ut++ffXcc8812C/1Tu2JJ57QZZddpnbt2qlLly4aP368tm7dmtRmwYIFGjNmjDp16iTHcbR+/fpG9U3NcUYy4Cz36quvWnZ2ts2ZM8c2bdpkd999t7Vp08Y+//xzMzN7+eWXbdGiRbZjxw7buHGjTZkyxfLz8+3AgQNp+zx8+LB17drVbrzxRtuwYYPNnz/f2rVrZ0899VSizWeffWatW7e2u+++2zZt2mRz5syx7Oxse/3110/5Prekhuq9fft2KywstAceeMA+/vhj27Fjh7311lv21Vdfpe2TeqdXX71937ehQ4fa8OHDbfXq1bZlyxb75S9/aeeee64dPXo0bZ/UO7133nnHfve739n8+fNNkr3xxhuJeYcOHbJRo0bZa6+9Zlu2bLGVK1fakCFDrKysrN4+qXd69dXbzGzSpEk2duxY27dvX+Ln4MGD9fZJvdNrqN4///nP7fzzz7fy8nLbuXOn/fWvf7VwOGwLFy5M2yf1Tm/MmDE2b94827hxo61fv96uueaaOufnv//97/boo4/anDlzTJJVVFQ02C81x5mKYIuz3uWXX25Tp05NmlZaWmoPPvhgyvaHDx82Sfb++++n7XP27NlWUFBgVVVViWlPPPGEFRUVme/7Zmb2m9/8xkpLS5OW+9WvfmVDhw492V0JhIbqPXHiRPvpT3/apD6pd3r11Xvr1q0myTZu3JiY53meFRYW2pw5c9L2Sb0bJ9Ub/9pWr15tkhIf7KRCvRsnXbAdN25ck/qh3o2Tqt79+/e3xx57LGnapZdeag8//HDafqh34x04cMAk2dKlS+vM27lzZ6ODLTXHmYpLkXFWi0QiWrdunUaPHp00ffTo0VqxYkXK9i+88IIKCgp0ySWXJKZPnjxZV111VeLxypUrNWLEiKQvQx8zZoz27t2rXbt2JdrUXu+YMWO0du1aua7bDHt3+mmo3r7v6+2339aFF16oMWPGqEuXLhoyZEjKywupd8Maqvfx48clSXl5eYl54XBYOTk5+vDDDxPTqPepc/jwYTmOo/bt2yemUe/mtWTJEnXp0kUXXnihfvGLX+jAgQNJ86l387nyyiv15ptv6ssvv5SZqby8XJ9++qnGjBmTaEO9T97hw4clSYWFhU1ajprjbEGwxVnt66+/VjQaVdeuXZOmd+3aVfv37088fuutt9S2bVvl5eXpT3/6kxYtWqROnTol5nfv3l3nnntu4vH+/ftT9lk9r742nufp66+/bp4dPM00VO8DBw7o6NGjevLJJzV27Fi99957uv766zVhwgQtXbo00Z56N05D9S4tLVVJSYlmzJihb775RpFIRE8++aT279+vffv2JdpT71OjqqpKDz74oG6++Wbl5+cnplPv5nP11Vfr5Zdf1uLFi/X0009rzZo1+v73v5/4UEei3s1p1qxZ6tevn4qLi5WTk6OxY8dq9uzZuvLKKxNtqPfJMTNNnz5dV155pQYMGNCkZak5zhZZLb0BwOnAcZykx2aWNG3kyJFav369vv76a82ZM0c33HCDVq1apS5dukiK3eChMX3Wnt6YNmeidPX2fV+SNG7cON17772SpIEDB2rFihV6/vnnNWLECEnUu6nS1Ts7O1vz58/XlClTVFhYqHA4rFGjRunqq69Oak+9m5/rurrxxhvl+75mz56dNI96N5+JEycm/j9gwAANHjxYJSUlevvttzVhwgRJ1Ls5zZo1Sx999JHefPNNlZSUaNmyZZo2bZq6d++uUaNGSaLeJ+vOO+/UJ598knQ1TWNRc5wtCLY4q3Xq1EnhcDhpdFaSDhw4kPRJZZs2bdS7d2/17t1bQ4cO1QUXXKC5c+dqxowZKfvt1q1byj6lE5+KpmuTlZWljh07fud9Ox01VO9OnTopKytL/fr1S5rft2/fel/MqXdqjTm+y8rKtH79eh0+fFiRSESdO3fWkCFDNHjw4LT9Uu/vxnVd3XDDDdq5c6cWL16cNFqbCvVuPt27d1dJSUm9d/6m3iensrJSDz30kN544w1dc801kqSLL75Y69ev11NPPZUItrVR74bdddddevPNN7Vs2TIVFxd/5/6oOc5UXIqMs1pOTo7Kysq0aNGipOmLFi3S9773vbTLmVnSpWy1DRs2TMuWLVMkEklMe++991RUVKSePXsm2tRe73vvvafBgwcrOzv7JPbm9NdQvXNycnTZZZfV+TqDTz/9VCUlJWn7pd6pNeX4LigoUOfOnbVt2zatXbtW48aNS9sv9T551aF227Ztev/99xv1BpF6N5+DBw9qz5496t69e9o21PvkuK4r13UVCiW/tQyHw4mrcVKh3umZme68804tWLBAixcvVq9evZqlX2qOM1ZGb1UFnIaqvw5l7ty5tmnTJrvnnnusTZs2tmvXLjt69KjNmDHDVq5cabt27bJ169bZlClTLDc3N+lOsg8++KDdeuuticeHDh2yrl272k033WQbNmywBQsWWH5+fspb6d977722adMmmzt37llxK/366m1mtmDBAsvOzrYXXnjBtm3bZn/5y18sHA7b8uXLE31Q78ZrqN7//Oc/rby83Hbs2GELFy60kpISmzBhQlIf1Lvxvv32W6uoqLCKigqTZM8884xVVFTY559/bq7r2nXXXWfFxcW2fv36pK+gOX78eKIP6t149dX722+/tfvuu89WrFhhO3futPLychs2bJidc845duTIkUQf1Lvx6qu3mdmIESOsf//+Vl5ebp999pnNmzfP8vLybPbs2Yk+qHfj/frXv7aCggJbsmRJ0vni2LFjiTYHDx60iooKe/vtt02Svfrqq1ZRUWH79u1LtKHmOFsQbAEze/bZZ62kpMRycnLs0ksvTdxKv7Ky0q6//norKiqynJwc6969u1133XW2evXqpOUnTZpkI0aMSJr2ySef2PDhwy03N9e6detmM2fOTNxGv9qSJUts0KBBlpOTYz179rTnnnvulO7n6SJdvavNnTvXevfubXl5eXbJJZfU+Q5E6t009dX7z3/+sxUXF1t2drade+659vDDDyeFLDPq3RTl5eUmqc7PpEmTEl/HkeqnvLw80Qf1brz66n3s2DEbPXq0de7cOXF8T5o0yXbv3p3UB/VuvPrqbWa2b98+mzx5shUVFVleXp716dPHnn766aTaUe/GS3e+mDdvXqLNvHnzUrZ55JFHEm2oOc4Wjln8L8EBAAAAAAgg/sYWAAAAABBoBFsAAAAAQKARbAEAAAAAgUawBQAAAAAEGsEWAAAAABBoBFsAAAAAQKARbAEAAAAAgUawBQAgjZkzZ2rgwIEZX++SJUvkOI4cx9H48eMzvv5qPXv2TGzHoUOHWmw7AABoCMEWAHBWqg5s6X4mT56s+++/Xx988EGLbePWrVv10ksvJR5fddVVuueee+q0W7hwoRzHSbSpb7969uwpSdq/f7/uuusunXfeecrNzVWPHj107bXXJu3vmjVrNH/+/FO5iwAANIuslt4AAABawr59+xL/f+211/T73/9eW7duTUxr1aqV2rZtq7Zt27bE5kmSunTpovbt2zdpmQULFigSiUiS9uzZo8svv1zvv/+++vfvL0kKh8PatWuXrrjiCrVv315//OMfdfHFF8t1Xb377ru64447tGXLFklS586dVVhY2Kz7BADAqcCILQDgrNStW7fET0FBgRzHqTOt9qXIkydP1vjx4/X444+ra9euat++vR599FF5nqcHHnhAhYWFKi4u1osvvpi0ri+//FITJ05Uhw4d1LFjR40bN067du06JftVWFiY2IfOnTtLkjp27Jg0bdq0aXIcR6tXr9aPf/xjXXjhherfv7+mT5+ujz766JRsFwAApxLBFgCAJli8eLH27t2rZcuW6ZlnntHMmTP1wx/+UB06dNCqVas0depUTZ06VXv27JEkHTt2TCNHjlTbtm21bNkyffjhh2rbtq3Gjh2bGFnNpP/+97/617/+pTvuuENt2rSpM7+pI8QAAJwOCLYAADRBYWGhZs2apT59+uj2229Xnz59dOzYMT300EO64IILNGPGDOXk5Ojf//63JOnVV19VKBTS3/72N1100UXq27ev5s2bp927d2vJkiUZ3/7t27fLzFRaWprxdQMAcKrwN7YAADRB//79FQqd+Fy4a9euGjBgQOJxOBxWx44ddeDAAUnSunXrtH37drVr1y6pn6qqKu3YsSMzG12DmUlS4mZTAACcCQi2AAA0QXZ2dtJjx3FSTvN9X5Lk+77Kysr08ssv1+mr+m9gGys/P1+HDx+uM/3QoUPKz89vVB8XXHCBHMfR5s2bW/SrhAAAaE5cigwAwCl06aWXatu2berSpYt69+6d9FNQUNCkvkpLS7V27do609esWaM+ffo0qo/CwkKNGTNGzz77rP73v//Vmc/31QIAgohgCwDAKXTLLbeoU6dOGjdunJYvX66dO3dq6dKluvvuu/XFF180qa9p06Zpx44duuOOO/Sf//xHn376qZ599lnNnTtXDzzwQKP7mT17tqLRqC6//HLNnz9f27Zt0+bNmzVr1iwNGzasqbsIAECLI9gCAHAKtW7dWsuWLdO5556rCRMmqG/fvrr99ttVWVnZ6MuHq/Xs2VPLly/Xjh07NHr0aF122WV66aWX9NJLL+knP/lJo/vp1auXPv74Y40cOVL33XefBgwYoB/84Af64IMP9NxzzzV1FwEAaHGOVd9FAgAAnBaWLFmikSNH6ptvvmnxr985nbYFAIB0GLEFAOA0VVxcrJtuuqnF1t+/f39dffXVLbZ+AAAaixFbAABOM5WVlfryyy8lSW3btlW3bt1aZDs+//xzua4rSTrvvPOSvuYIAIDTCcEWAAAAABBofPQKAAAAAAg0gi0AAAAAINAItgAAAACAQCPYAgAAAAACjWALAAAAAAg0gi0AAAAAINAItgAAAACAQCPYAgAAAAACjWALAAAAAAg0gi0AAAAAINAItgAAAACAQCPYAgAAAAACjWALAAAAAAg0gi0AAAAAINAItgAAAACAQCPYAgAAAAACjWALAAAAAAg0gi0AAAAAINAItgAAAACAQCPYAgAAAAACjWALAAAAAAg0gi0AAAAAINAItgAAAACAQCPYAgAAAAACjWALAAAAAAg0gi0AAAAAINAItgAAAACAQCPYAgAAAAACjWALAAAAAAg0gi0AAAAAINAItgAAAACAQCPYAgAAAAACjWALAAAAAAg0gi0AAAAAINAItgAAAACAQCPYAgAAAAACjWALAAAAAAg0gi0AAAAAINAItgAAAACAQCPYAgAAAAACjWALAAAAAAg0gi0AAAAAINAItgAAAACAQCPYAgAAAAACjWALAAAAAAg0gi0AAAAAINAItgAAAACAQCPYAgAAAAACjWALAAAAAAg0gi0AAAAAINAItgAAAACAQCPYAgAAAAACjWALAAAAAAg0gi0AAAAAINAItgAAAACAQCPYAgAAAAACjWALAAAAAAg0gi0AAAAAINAItgAAAACAQCPYAgAAAAACjWALAAAAAAg0gi0AAAAAINAItgAAAACAQCPYAgAAAAACjWALAAAAAAg0gi0AAAAAINAItgAAAACAQCPYAgAAAAACjWALAAAAAAg0gi0AAAAAINAItgAAAACAQCPYAgAAAAACjWALAAAAAAg0gi0AAAAAINAItgAAAACAQCPYAgAAAAACjWALAAAAAAg0gi0AAAAAINAItgAAAACAQCPYAgAAAAACjWALAAAAAAg0gi0AAAAAINAItgAAAACAQCPYAgAAAAACjWALAAAAAAg0gi0AAAAAINAItgAAAACAQCPYAgAAAAAC7f8DnpMhZJ59Xa0AAAAASUVORK5CYII=", - "text/html": [ - "\n", - "
\n", - "
\n", - " Figure\n", - "
\n", - " \n", - "
\n", - " " - ], - "text/plain": [ - "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", - "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", - "\n", - "for i,v in enumerate(variables_to_plot):\n", - " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", - " ts_ax.grid()\n", - "\n", - "plt.show()\n" - ] - }, - { - "cell_type": "markdown", - "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", - "metadata": {}, - "source": [ - "## Field selection dropdown menu\n", - "Select variable to be plotted from a dropdown menu" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "ec47d67b4aed4584b5a963f8c19161df", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "AppLayout(children=(Dropdown(description='Field:', index=1, layout=Layout(grid_area='header', margin='0px 30% …" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "plt.ioff()\n", - "\n", - "# populate dropdown menu with available variables \n", - "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", - "d_variable = 'backscatter'\n", - "dropdown = widgets.Dropdown(\n", - " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", - " value= d_variable,\n", - " description='Field:',\n", - " disabled=False,\n", - ")\n", - "dropdown.layout.margin = '0px 30% 0px 20%'\n", - "dropdown.layout.width = '50%'\n", - "\n", - "# set up display\n", - "i_display = act.plotting.TimeSeriesDisplay(ds)\n", - "i_display.add_subplots((1,), figsize = (9.5,5))\n", - "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", - "i_ax.grid()\n", - "i_fig = i_display.fig\n", - "\n", - "# update plot callback function\n", - "def update_plot(change):\n", - " i_ax.cla()\n", - " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", - " i_ax_new.grid()\n", - " i_fig.canvas.draw()\n", - " i_fig.canvas.flush_events()\n", - "\n", - "dropdown.observe(update_plot, names='value')\n", - "\n", - "widgets.AppLayout(\n", - " header=dropdown,\n", - " center=i_fig.canvas,\n", - " pane_heights=[1, 6,1]\n", - ")\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "state": {}, - "version_major": 2, - "version_minor": 0 - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/VAPs/quicklook/OKMSOIL/.ipynb_checkpoints/okmsoil.c1-checkpoint.ipynb b/VAPs/quicklook/OKMSOIL/.ipynb_checkpoints/okmsoil.c1-checkpoint.ipynb deleted file mode 100644 index b2aa8667..00000000 --- a/VAPs/quicklook/OKMSOIL/.ipynb_checkpoints/okmsoil.c1-checkpoint.ipynb +++ /dev/null @@ -1,2048 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "70840257-70e4-45e2-b491-14bff5a257a3", - "metadata": {}, - "source": [ - "# OKMSOIL.C1 Plots\n", - "\n", - "[Click here](https://www.arm.gov/capabilities/vaps/okmsoil) for more information about this vap." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "460fd89f-e034-452c-b837-f65c5958264f", - "metadata": {}, - "outputs": [], - "source": [ - "%matplotlib widget\n", - "import ipywidgets as widgets\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import ipywidgets as widgets\n", - "import numpy as np\n", - "import pandas as pd\n", - "import os\n", - "from datetime import datetime\n", - "\n", - "import act\n", - "import xarray as xr\n", - "\n", - "# Data archive directory\n", - "DATA_DIR = r'/data/archive/'\n", - "\n", - "# Datastream info\n", - "DATASTREAM_NAME = 'okmsoil'\n", - "DATA_LEVEL = 'c1'\n", - "LOCATIONS = [{'end_date': '2020-10-22', 'facility': 'X1', 'site': 'sgp', 'start_date': '1998-01-01'}]" - ] - }, - { - "cell_type": "markdown", - "id": "9faaf875", - "metadata": {}, - "source": [ - "## Define site, facility, and date range" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "ac6764f5", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The following locations and date ranges are available for this VAP:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sitefacilitystart_dateend_date
0sgpX11998-01-012020-10-22
\n", - "
" - ], - "text/plain": [ - " site facility start_date end_date\n", - "0 sgp X1 1998-01-01 2020-10-22" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "print(\"The following locations and date ranges are available for this VAP:\")\n", - "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" - ] - }, - { - "cell_type": "markdown", - "id": "8d132223", - "metadata": {}, - "source": [ - "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "e563983a", - "metadata": {}, - "outputs": [], - "source": [ - "site_facility = ( 'sgp', 'X1' )\n", - "\n", - "date_start = '2020-10-20'\n", - "date_end = '2020-10-22'" - ] - }, - { - "cell_type": "markdown", - "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", - "metadata": {}, - "source": [ - "## Load data files\n", - "Load data files from /data/archive/" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'/data/archive/sgp/sgpokmsoilX1.c1'" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Compile list of files\n", - "site, facility = site_facility\n", - "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", - "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", - "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", - "dir_path\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "6be8f3dc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['20201020', '20201021', '20201022']" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from datetime import date, timedelta\n", - "import pandas as pd\n", - "\n", - "def get_ARM_formated_dates(start_date, end_date):\n", - " \"\"\"\n", - " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", - " EXAMPLE:\n", - " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", - " >> [\"20180219\", \"20180220\", \"20180221\"] \n", - " \"\"\"\n", - " \n", - " _start_date = pd.to_datetime(start_date)\n", - " _end_date = pd.to_datetime(end_date)\n", - " \n", - " delta = _end_date - _start_date # returns timedelta \n", - " dates = []\n", - "\n", - " for i in range(delta.days + 1):\n", - " day = _start_date + timedelta(days=i)\n", - " day_formated = day.strftime(format=\"%Y%m%d\")\n", - " dates.append(day_formated)\n", - " return dates\n", - "\n", - "\n", - "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "51feea2e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['/data/archive/sgp/sgpokmsoilX1.c1/sgpokmsoilX1.c1.20201020.000000.nc',\n", - " '/data/archive/sgp/sgpokmsoilX1.c1/sgpokmsoilX1.c1.20201021.000000.nc',\n", - " '/data/archive/sgp/sgpokmsoilX1.c1/sgpokmsoilX1.c1.20201022.000000.nc']" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Filter a list of files based on date pattern\n", - "import glob\n", - "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", - "files_filter = []\n", - "for date in dates:\n", - " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", - " files_filter\n", - "files_filter" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "b0e5d0d7", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "3 files loaded\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset>\n",
-       "Dimensions:                      (time: 144, bound: 2, station_number: 133,\n",
-       "                                  depth: 3)\n",
-       "Coordinates:\n",
-       "  * time                         (time) datetime64[ns] 2020-10-20 ... 2020-10...\n",
-       "  * station_number               (station_number) float32 110.0 1.0 ... 108.0\n",
-       "  * depth                        (depth) int32 5 25 60\n",
-       "Dimensions without coordinates: bound\n",
-       "Data variables: (12/15)\n",
-       "    base_time                    (time) datetime64[ns] 2020-10-20 ... 2020-10-22\n",
-       "    time_offset                  (time) datetime64[ns] 2020-10-20 ... 2020-10...\n",
-       "    time_bounds                  (time, bound) object dask.array<chunksize=(48, 2), meta=np.ndarray>\n",
-       "    sensor_temperature_rise      (time, depth, station_number) float32 dask.array<chunksize=(48, 3, 133), meta=np.ndarray>\n",
-       "    qc_sensor_temperature_rise   (time, depth, station_number) int32 dask.array<chunksize=(48, 3, 133), meta=np.ndarray>\n",
-       "    matric_potential             (time, depth, station_number) float32 dask.array<chunksize=(48, 3, 133), meta=np.ndarray>\n",
-       "    ...                           ...\n",
-       "    fractional_water_index       (time, depth, station_number) float32 dask.array<chunksize=(48, 3, 133), meta=np.ndarray>\n",
-       "    qc_fractional_water_index    (time, depth, station_number) int32 dask.array<chunksize=(48, 3, 133), meta=np.ndarray>\n",
-       "    station                      (time, station_number) |S20 dask.array<chunksize=(48, 133), meta=np.ndarray>\n",
-       "    lat                          (time, station_number) float32 dask.array<chunksize=(48, 133), meta=np.ndarray>\n",
-       "    lon                          (time, station_number) float32 dask.array<chunksize=(48, 133), meta=np.ndarray>\n",
-       "    alt                          (time, station_number) float32 dask.array<chunksize=(48, 133), meta=np.ndarray>\n",
-       "Attributes: (12/53)\n",
-       "    Version:                          $State: xdc-sgp30okm-9.0-1.el5 $\n",
-       "    command_line:                     idl -R -n okmsoil -s sgp -f X1 -b 20201...\n",
-       "    dod_version:                      okmsoil-c1-1.0\n",
-       "    date:                             \n",
-       "    process_version:                  vap-okmsoil-1.0-1.el7\n",
-       "    idl_version:                      \n",
-       "    ...                               ...\n",
-       "    doi:                              10.5439/1432043\n",
-       "    history:                          created by user dsmgr on machine flint ...\n",
-       "    _file_dates:                      ['20201020', '20201021', '20201022']\n",
-       "    _file_times:                      ['000000', '000000', '000000']\n",
-       "    _datastream:                      sgpokmsoilX1.c1\n",
-       "    _arm_standards_flag:              1
" - ], - "text/plain": [ - "\n", - "Dimensions: (time: 144, bound: 2, station_number: 133,\n", - " depth: 3)\n", - "Coordinates:\n", - " * time (time) datetime64[ns] 2020-10-20 ... 2020-10...\n", - " * station_number (station_number) float32 110.0 1.0 ... 108.0\n", - " * depth (depth) int32 5 25 60\n", - "Dimensions without coordinates: bound\n", - "Data variables: (12/15)\n", - " base_time (time) datetime64[ns] 2020-10-20 ... 2020-10-22\n", - " time_offset (time) datetime64[ns] 2020-10-20 ... 2020-10...\n", - " time_bounds (time, bound) object dask.array\n", - " sensor_temperature_rise (time, depth, station_number) float32 dask.array\n", - " qc_sensor_temperature_rise (time, depth, station_number) int32 dask.array\n", - " matric_potential (time, depth, station_number) float32 dask.array\n", - " ... ...\n", - " fractional_water_index (time, depth, station_number) float32 dask.array\n", - " qc_fractional_water_index (time, depth, station_number) int32 dask.array\n", - " station (time, station_number) |S20 dask.array\n", - " lat (time, station_number) float32 dask.array\n", - " lon (time, station_number) float32 dask.array\n", - " alt (time, station_number) float32 dask.array\n", - "Attributes: (12/53)\n", - " Version: $State: xdc-sgp30okm-9.0-1.el5 $\n", - " command_line: idl -R -n okmsoil -s sgp -f X1 -b 20201...\n", - " dod_version: okmsoil-c1-1.0\n", - " date: \n", - " process_version: vap-okmsoil-1.0-1.el7\n", - " idl_version: \n", - " ... ...\n", - " doi: 10.5439/1432043\n", - " history: created by user dsmgr on machine flint ...\n", - " _file_dates: ['20201020', '20201021', '20201022']\n", - " _file_times: ['000000', '000000', '000000']\n", - " _datastream: sgpokmsoilX1.c1\n", - " _arm_standards_flag: 1" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load files as a single dataset\n", - "files_list = files_filter \n", - "ds = act.io.armfiles.read_netcdf(files_list)\n", - "ds.clean.cleanup()\n", - "print(f'{len(files_list)} files loaded')\n", - "ds\n" - ] - }, - { - "cell_type": "markdown", - "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", - "metadata": {}, - "source": [ - "## Plot time series data\n", - "#### Define the list of variables to be plotted:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "d09b789e-84f1-4605-846b-a72c110c8048", - "metadata": {}, - "outputs": [], - "source": [ - "variables_to_plot = ['sensor_temperature_rise', 'matric_potential', 'volumetric_water_content']" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", - "metadata": {}, - "outputs": [ - { - "ename": "TypeError", - "evalue": "Dimensions of C (133, 3, 144) should be one smaller than X(144) and Y(3) while using shading='flat' see help(pcolormesh)", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[9], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m ts_display\u001b[38;5;241m.\u001b[39madd_subplots((\u001b[38;5;28mlen\u001b[39m(variables_to_plot),), figsize \u001b[38;5;241m=\u001b[39m (\u001b[38;5;241m9.5\u001b[39m,\u001b[38;5;241m4\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mlen\u001b[39m(variables_to_plot)))\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i,v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(variables_to_plot):\n\u001b[0;32m----> 5\u001b[0m ts_ax \u001b[38;5;241m=\u001b[39m \u001b[43mts_display\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mplot\u001b[49m\u001b[43m(\u001b[49m\u001b[43mv\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msubplot_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mi\u001b[49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mset_title\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mds\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvariables\u001b[49m\u001b[43m[\u001b[49m\u001b[43mv\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mattrs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mlong_name\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6\u001b[0m ts_ax\u001b[38;5;241m.\u001b[39mgrid()\n\u001b[1;32m 8\u001b[0m plt\u001b[38;5;241m.\u001b[39mshow()\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/plotting/timeseriesdisplay.py:588\u001b[0m, in \u001b[0;36mTimeSeriesDisplay.plot\u001b[0;34m(self, field, dsname, subplot_index, cmap, set_title, add_nan, day_night_background, invert_y_axis, abs_limits, time_rng, y_rng, use_var_for_y, set_shading, assessment_overplot, overplot_marker, overplot_behind, overplot_markersize, assessment_overplot_category, assessment_overplot_category_color, force_line_plot, labels, cbar_label, cbar_h_adjust, secondary_y, y_axis_flag_meanings, colorbar_labels, cb_friendly, **kwargs)\u001b[0m\n\u001b[1;32m 586\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 587\u001b[0m kwargs[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mface\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m--> 588\u001b[0m mesh \u001b[38;5;241m=\u001b[39m \u001b[43max\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpcolormesh\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 589\u001b[0m \u001b[43m \u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43masarray\u001b[49m\u001b[43m(\u001b[49m\u001b[43mxdata\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 590\u001b[0m \u001b[43m \u001b[49m\u001b[43mydata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 591\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtranspose\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 592\u001b[0m \u001b[43m \u001b[49m\u001b[43mshading\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mset_shading\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 593\u001b[0m \u001b[43m \u001b[49m\u001b[43mcmap\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcmap\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 594\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 595\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 597\u001b[0m \u001b[38;5;66;03m# Set Title\u001b[39;00m\n\u001b[1;32m 598\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m set_title \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/__init__.py:1442\u001b[0m, in \u001b[0;36m_preprocess_data..inner\u001b[0;34m(ax, data, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1439\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 1440\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minner\u001b[39m(ax, \u001b[38;5;241m*\u001b[39margs, data\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 1441\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m data \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1442\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43max\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mmap\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43msanitize_sequence\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1444\u001b[0m bound \u001b[38;5;241m=\u001b[39m new_sig\u001b[38;5;241m.\u001b[39mbind(ax, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 1445\u001b[0m auto_label \u001b[38;5;241m=\u001b[39m (bound\u001b[38;5;241m.\u001b[39marguments\u001b[38;5;241m.\u001b[39mget(label_namer)\n\u001b[1;32m 1446\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m bound\u001b[38;5;241m.\u001b[39mkwargs\u001b[38;5;241m.\u001b[39mget(label_namer))\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/axes/_axes.py:6220\u001b[0m, in \u001b[0;36mAxes.pcolormesh\u001b[0;34m(self, alpha, norm, cmap, vmin, vmax, shading, antialiased, *args, **kwargs)\u001b[0m\n\u001b[1;32m 6217\u001b[0m shading \u001b[38;5;241m=\u001b[39m shading\u001b[38;5;241m.\u001b[39mlower()\n\u001b[1;32m 6218\u001b[0m kwargs\u001b[38;5;241m.\u001b[39msetdefault(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnone\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m-> 6220\u001b[0m X, Y, C, shading \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_pcolorargs\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mpcolormesh\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6221\u001b[0m \u001b[43m \u001b[49m\u001b[43mshading\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mshading\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6222\u001b[0m coords \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mstack([X, Y], axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m 6223\u001b[0m \u001b[38;5;66;03m# convert to one dimensional array, except for 3D RGB(A) arrays\u001b[39;00m\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/axes/_axes.py:5751\u001b[0m, in \u001b[0;36mAxes._pcolorargs\u001b[0;34m(self, funcname, shading, *args, **kwargs)\u001b[0m\n\u001b[1;32m 5749\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m shading \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mflat\u001b[39m\u001b[38;5;124m'\u001b[39m:\n\u001b[1;32m 5750\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (Nx, Ny) \u001b[38;5;241m!=\u001b[39m (ncols \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m, nrows \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m):\n\u001b[0;32m-> 5751\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDimensions of C \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mC\u001b[38;5;241m.\u001b[39mshape\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m should\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 5752\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m be one smaller than X(\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mNx\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m) and Y(\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mNy\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 5753\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m while using shading=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mflat\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 5754\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m see help(\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfuncname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 5755\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m: \u001b[38;5;66;03m# ['nearest', 'gouraud']:\u001b[39;00m\n\u001b[1;32m 5756\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (Nx, Ny) \u001b[38;5;241m!=\u001b[39m (ncols, nrows):\n", - "\u001b[0;31mTypeError\u001b[0m: Dimensions of C (133, 3, 144) should be one smaller than X(144) and Y(3) while using shading='flat' see help(pcolormesh)" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "dc399e8005384c0e9f15675a731f43f7", - "version_major": 2, - "version_minor": 0 - }, - "image/png": "iVBORw0KGgoAAAANSUhEUgAAA7YAAASwCAYAAADPBNYLAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABi0UlEQVR4nO3df3RV5Z3o/08gkKA1sUAJIIjRaqXlqkO4IrF8Ha3GQceWGeeKY5eog/c2Vy0DqV5FZvmD5axMO6tO6w9Qr6DjXWgz/hxmbkbNzFhFwRlJg+MIrb1CDWgiTRwT1DYI7O8fLjJNE5Rf5yQPvF5rnT/Ow7OTZ283cb/Z5+QUZFmWBQAAACRqUH8vAAAAAPaHsAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAnbHHvhhRfiggsuiLFjx0ZBQUE89dRTn7nN888/HxUVFVFcXBzHHnts3HPPPblfKAAAQKKEbY59+OGHcfLJJ8ddd921R/M3btwY5513XkyfPj2amprixhtvjLlz58bjjz+e45UCAACkqSDLsqy/F3GoKCgoiCeffDJmzpy52znXX399rFixItavX989Vl1dHa+++mqsXr06D6sEAABIS2F/L4CeVq9eHVVVVT3Gzj333Fi6dGl8/PHHMWTIkD636+rqiq6uru7nO3fujPfeey9GjBgRBQUFOV0zAAAc6rIsi61bt8bYsWNj0CAvjM03YTvAtLa2RllZWY+xsrKy2L59e7S1tcWYMWP63K62tjZuvfXWfCwRAADYjU2bNsW4ceP6exmHHGE7AP32HdZdrxb/tDuvCxYsiJqamu7nHR0dcfTRR8emTZuipKQkNwsFAAAiIqKzszPGjx8fRxxxRH8v5ZAkbAeY0aNHR2tra4+xLVu2RGFhYYwYMWK32xUVFUVRUVGv8ZKSEmELAAB54m2A/cOLvweYadOmRUNDQ4+xZ599NqZMmbLb99cCAAAcyoRtjn3wwQexdu3aWLt2bUR88nE+a9eujebm5oj45CXEs2fP7p5fXV0db731VtTU1MT69etj2bJlsXTp0rj22mv7Y/kAAAADnpci59iaNWvizDPP7H6+632wl112WTz44IPR0tLSHbkREeXl5VFfXx/z58+Pu+++O8aOHRt33HFHXHjhhXlfOwAAQAp8ju1BqrOzM0pLS6Ojo8N7bAEAIMdcf/cvL0UGAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbPNk8eLFUV5eHsXFxVFRURErV6781PnLly+Pk08+OQ477LAYM2ZMXHHFFdHe3p6n1QIAAKRD2OZBXV1dzJs3LxYuXBhNTU0xffr0mDFjRjQ3N/c5/8UXX4zZs2fHnDlz4vXXX49HH300XnnllbjyyivzvHIAAICBT9jmwe233x5z5syJK6+8MiZOnBg/+MEPYvz48bFkyZI+57/88stxzDHHxNy5c6O8vDy++tWvxre+9a1Ys2ZNnlcOAAAw8AnbHNu2bVs0NjZGVVVVj/GqqqpYtWpVn9tUVlbG5s2bo76+PrIsi3fffTcee+yxOP/883f7fbq6uqKzs7PHAwAA4FAgbHOsra0tduzYEWVlZT3Gy8rKorW1tc9tKisrY/ny5TFr1qwYOnRojB49Oo488si48847d/t9amtro7S0tPsxfvz4A7ofAAAAA5WwzZOCgoIez7Ms6zW2y7p162Lu3Llx0003RWNjYzz99NOxcePGqK6u3u3XX7BgQXR0dHQ/Nm3adEDXDwAAMFAV9vcCDnYjR46MwYMH97o7u2XLll53cXepra2N008/Pa677rqIiDjppJPi8MMPj+nTp8dtt90WY8aM6bVNUVFRFBUVHfgdAAAAGODcsc2xoUOHRkVFRTQ0NPQYb2hoiMrKyj63+eijj2LQoJ7/aQYPHhwRn9zpBQAA4D8J2zyoqamJ+++/P5YtWxbr16+P+fPnR3Nzc/dLixcsWBCzZ8/unn/BBRfEE088EUuWLIkNGzbESy+9FHPnzo1TTz01xo4d21+7AQAAMCB5KXIezJo1K9rb22PRokXR0tISkyZNivr6+pgwYUJERLS0tPT4TNvLL788tm7dGnfddVd85zvfiSOPPDLOOuus+O53v9tfuwAAADBgFWRe23pQ6uzsjNLS0ujo6IiSkpL+Xg4AABzUXH/3Ly9FBgAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmzzZPHixVFeXh7FxcVRUVERK1eu/NT5XV1dsXDhwpgwYUIUFRXFcccdF8uWLcvTagEAANJR2N8LOBTU1dXFvHnzYvHixXH66afHvffeGzNmzIh169bF0Ucf3ec2F110Ubz77ruxdOnS+OIXvxhbtmyJ7du353nlAAAAA19BlmVZfy/iYDd16tSYPHlyLFmypHts4sSJMXPmzKitre01/+mnn46LL744NmzYEMOHD9+n79nZ2RmlpaXR0dERJSUl+7x2AADgs7n+7l9eipxj27Zti8bGxqiqquoxXlVVFatWrepzmxUrVsSUKVPie9/7Xhx11FFxwgknxLXXXhu/+tWv8rFkAACApHgpco61tbXFjh07oqysrMd4WVlZtLa29rnNhg0b4sUXX4zi4uJ48skno62tLa666qp47733dvs+266urujq6up+3tnZeeB2AgAAYABzxzZPCgoKejzPsqzX2C47d+6MgoKCWL58eZx66qlx3nnnxe233x4PPvjgbu/a1tbWRmlpafdj/PjxB3wfAAAABiJhm2MjR46MwYMH97o7u2XLll53cXcZM2ZMHHXUUVFaWto9NnHixMiyLDZv3tznNgsWLIiOjo7ux6ZNmw7cTgAAAAxgwjbHhg4dGhUVFdHQ0NBjvKGhISorK/vc5vTTT4933nknPvjgg+6xN954IwYNGhTjxo3rc5uioqIoKSnp8QAAADgUCNs8qKmpifvvvz+WLVsW69evj/nz50dzc3NUV1dHxCd3W2fPnt09/5JLLokRI0bEFVdcEevWrYsXXnghrrvuuviTP/mTGDZsWH/tBgAAwIDkl0flwaxZs6K9vT0WLVoULS0tMWnSpKivr48JEyZERERLS0s0Nzd3z//c5z4XDQ0N8e1vfzumTJkSI0aMiIsuuihuu+22/toFAACAAcvn2B6kfI4WAADkj+vv/uWlyAAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2ebJ48eIoLy+P4uLiqKioiJUrV+7Rdi+99FIUFhbGKaecktsFAgAAJErY5kFdXV3MmzcvFi5cGE1NTTF9+vSYMWNGNDc3f+p2HR0dMXv27Pja176Wp5UCAACkpyDLsqy/F3Gwmzp1akyePDmWLFnSPTZx4sSYOXNm1NbW7na7iy++OI4//vgYPHhwPPXUU7F27do9/p6dnZ1RWloaHR0dUVJSsj/LBwAAPoPr7/7ljm2Obdu2LRobG6OqqqrHeFVVVaxatWq32z3wwAPx5ptvxs0337xH36erqys6Ozt7PAAAAA4FwjbH2traYseOHVFWVtZjvKysLFpbW/vc5uc//3nccMMNsXz58igsLNyj71NbWxulpaXdj/Hjx+/32gEAAFIgbPOkoKCgx/Msy3qNRUTs2LEjLrnkkrj11lvjhBNO2OOvv2DBgujo6Oh+bNq0ab/XDAAAkII9ux3IPhs5cmQMHjy4193ZLVu29LqLGxGxdevWWLNmTTQ1NcU111wTERE7d+6MLMuisLAwnn322TjrrLN6bVdUVBRFRUW52QkAAIABzB3bHBs6dGhUVFREQ0NDj/GGhoaorKzsNb+kpCRee+21WLt2bfejuro6vvSlL8XatWtj6tSp+Vo6AABAEtyxzYOampq49NJLY8qUKTFt2rS47777orm5OaqrqyPik5cRv/322/HQQw/FoEGDYtKkST22HzVqVBQXF/caBwAAQNjmxaxZs6K9vT0WLVoULS0tMWnSpKivr48JEyZERERLS8tnfqYtAAAAffM5tgcpn6MFAAD54/q7f3mPLQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGGbJ4sXL47y8vIoLi6OioqKWLly5W7nPvHEE3HOOefEF77whSgpKYlp06bFM888k8fVAgAApEPY5kFdXV3MmzcvFi5cGE1NTTF9+vSYMWNGNDc39zn/hRdeiHPOOSfq6+ujsbExzjzzzLjggguiqakpzysHAAAY+AqyLMv6exEHu6lTp8bkyZNjyZIl3WMTJ06MmTNnRm1t7R59ja985Ssxa9asuOmmm/ZofmdnZ5SWlkZHR0eUlJTs07oBAIA94/q7f7ljm2Pbtm2LxsbGqKqq6jFeVVUVq1at2qOvsXPnzti6dWsMHz58t3O6urqis7OzxwMAAOBQIGxzrK2tLXbs2BFlZWU9xsvKyqK1tXWPvsb3v//9+PDDD+Oiiy7a7Zza2tooLS3tfowfP36/1g0AAJAKYZsnBQUFPZ5nWdZrrC+PPPJI3HLLLVFXVxejRo3a7bwFCxZER0dH92PTpk37vWYAAIAUFPb3Ag52I0eOjMGDB/e6O7tly5Zed3F/W11dXcyZMyceffTROPvssz91blFRURQVFe33egEAAFLjjm2ODR06NCoqKqKhoaHHeENDQ1RWVu52u0ceeSQuv/zyePjhh+P888/P9TIBAACS5Y5tHtTU1MSll14aU6ZMiWnTpsV9990Xzc3NUV1dHRGfvIz47bffjoceeigiPona2bNnxw9/+MM47bTTuu/2Dhs2LEpLS/ttPwAAAAYiYZsHs2bNivb29li0aFG0tLTEpEmTor6+PiZMmBARES0tLT0+0/bee++N7du3x9VXXx1XX3119/hll10WDz74YL6XDwAAMKD5HNuDlM/RAgCA/HH93b+8xxYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwzZPFixdHeXl5FBcXR0VFRaxcufJT5z///PNRUVERxcXFceyxx8Y999yTp5UCAACkRdjmQV1dXcybNy8WLlwYTU1NMX369JgxY0Y0Nzf3OX/jxo1x3nnnxfTp06OpqSluvPHGmDt3bjz++ON5XjkAAMDAV5BlWdbfizjYTZ06NSZPnhxLlizpHps4cWLMnDkzamtre82//vrrY8WKFbF+/fruserq6nj11Vdj9erVe/Q9Ozs7o7S0NDo6OqKkpGT/dwIAANgt19/9yx3bHNu2bVs0NjZGVVVVj/GqqqpYtWpVn9usXr261/xzzz031qxZEx9//HHO1goAAJCiwv5ewMGura0tduzYEWVlZT3Gy8rKorW1tc9tWltb+5y/ffv2aGtrizFjxvTapqurK7q6urqfd3R0RMQn/3IEAADk1q7rbi+I7R/CNk8KCgp6PM+yrNfYZ83va3yX2trauPXWW3uNjx8/fm+XCgAA7KP29vYoLS3t72UccoRtjo0cOTIGDx7c6+7sli1bet2V3WX06NF9zi8sLIwRI0b0uc2CBQuipqam+/n7778fEyZMiObmZn+xPkVnZ2eMHz8+Nm3a5L0Qu+EY7RnH6bM5RnvGcdozjtNnc4z2jOP02RyjPdPR0RFHH310DB8+vL+XckgStjk2dOjQqKioiIaGhviDP/iD7vGGhob4xje+0ec206ZNi7/7u7/rMfbss8/GlClTYsiQIX1uU1RUFEVFRb3GS0tL/QDaAyUlJY7TZ3CM9ozj9Nkcoz3jOO0Zx+mzOUZ7xnH6bI7Rnhk0yK8x6g+Oeh7U1NTE/fffH8uWLYv169fH/Pnzo7m5OaqrqyPik7uts2fP7p5fXV0db731VtTU1MT69etj2bJlsXTp0rj22mv7axcAAAAGLHds82DWrFnR3t4eixYtipaWlpg0aVLU19fHhAkTIiKipaWlx2falpeXR319fcyfPz/uvvvuGDt2bNxxxx1x4YUX9tcuAAAADFjCNk+uuuqquOqqq/r8swcffLDX2BlnnBE/+clP9vn7FRUVxc0339zny5P5T47TZ3OM9ozj9Nkcoz3jOO0Zx+mzOUZ7xnH6bI7RnnGc+ldB5vdRAwAAkDDvsQUAACBpwhYAAICkCVsAAACSJmwHqNra2viv//W/xhFHHBGjRo2KmTNnxs9+9rMec7Isi1tuuSXGjh0bw4YNi9/93d+N119/vcecrq6u+Pa3vx0jR46Mww8/PL7+9a/H5s2be8z5j//4j7j00kujtLQ0SktL49JLL433338/17t4QOTzOP35n/95VFZWxmGHHRZHHnlkrnftgMnXMfrFL34Rc+bMifLy8hg2bFgcd9xxcfPNN8e2bdvysp/7K5/n0te//vU4+uijo7i4OMaMGROXXnppvPPOOznfx/2Vz2P0m3NPOeWUKCgoiLVr1+Zq1w6ofB6nY445JgoKCno8brjhhpzv44GQ7/Pp//7f/xtTp06NYcOGxciRI+MP//APc7p/B0K+jtGPf/zjXufRrscrr7ySl33dH/k8l9544434xje+ESNHjoySkpI4/fTT47nnnsv5Ph4I+TxOP/nJT+Kcc86JI488MkaMGBH/43/8j/jggw9yvo/760Ado/vuuy9+93d/N0pKSqKgoKDP6+qUr78HrIwB6dxzz80eeOCB7N///d+ztWvXZueff3529NFHZx988EH3nL/4i7/IjjjiiOzxxx/PXnvttWzWrFnZmDFjss7Ozu451dXV2VFHHZU1NDRkP/nJT7IzzzwzO/nkk7Pt27d3z/m93/u9bNKkSdmqVauyVatWZZMmTcp+//d/P6/7u6/yeZxuuumm7Pbbb89qamqy0tLSfO7mfsnXMfqHf/iH7PLLL8+eeeaZ7M0338z+9m//Nhs1alT2ne98J+/7vC/yeS7dfvvt2erVq7Nf/OIX2UsvvZRNmzYtmzZtWl73d1/k8xjtMnfu3GzGjBlZRGRNTU352M39ls/jNGHChGzRokVZS0tL92Pr1q153d99lc/j9Nhjj2Wf//znsyVLlmQ/+9nPsp/+9KfZo48+mtf93Rf5OkZdXV09zqGWlpbsyiuvzI455phs586ded/vvZXPc+mLX/xidt5552Wvvvpq9sYbb2RXXXVVdthhh2UtLS153ed9ka/j9Pbbb2ef//zns+rq6uynP/1p9q//+q9ZZWVlduGFF+Z9n/fWgTpGf/VXf5XV1tZmtbW1WURk//Ef/9Hre6V8/T1QCdtEbNmyJYuI7Pnnn8+yLMt27tyZjR49OvuLv/iL7jm//vWvs9LS0uyee+7JsizL3n///WzIkCHZj370o+45b7/9djZo0KDs6aefzrIsy9atW5dFRPbyyy93z1m9enUWEdlPf/rTfOzaAZWr4/SbHnjggaTC9rfl4xjt8r3vfS8rLy/P0Z7kVj6P09/+7d9mBQUF2bZt23K0N7mR62NUX1+fnXjiidnrr7+eVNj+tlwepwkTJmR/9Vd/lZ8dybFcHaePP/44O+qoo7L7778/j3uTG/n6ubRt27Zs1KhR2aJFi3K4N7mTq+P0y1/+MouI7IUXXuie09nZmUVE9o//+I/52LUDKlfH6d57781GjRqV7dixo3tOU1NTFhHZz3/+83zs2gGzL8foNz333HN9hu3Bdv09UHgpciI6OjoiImL48OEREbFx48ZobW2Nqqqq7jlFRUVxxhlnxKpVqyIiorGxMT7++OMec8aOHRuTJk3qnrN69eooLS2NqVOnds857bTTorS0tHtOSnJ1nA4m+TxGHR0d3d8nNfk6Tu+9914sX748KisrY8iQIbnanZzI5TF6991347//9/8e/+f//J847LDD8rE7OZPrc+m73/1ujBgxIk455ZT48z//82Re/v/bcnWcfvKTn8Tbb78dgwYNit/5nd+JMWPGxIwZM3q9dDAF+fq5tGLFimhra4vLL788R3uSW7k6TiNGjIiJEyfGQw89FB9++GFs37497r333igrK4uKiop87d4Bk6vj1NXVFUOHDo1Bg/4zM4YNGxYRES+++GJud+oA25djtCcOtuvvgULYJiDLsqipqYmvfvWrMWnSpIiIaG1tjYiIsrKyHnPLysq6/6y1tTWGDh0an//85z91zqhRo3p9z1GjRnXPSUUuj9PBIp/H6M0334w777wzqqurD/Ru5Fw+jtP1118fhx9+eIwYMSKam5vjb//2b3O1OzmRy2OUZVlcfvnlUV1dHVOmTMn1ruRUrs+lP/3TP40f/ehH8dxzz8U111wTP/jBD+Kqq67K5S7lRC6P04YNGyIi4pZbbok/+7M/i7//+7+Pz3/+83HGGWfEe++9l9P9OpDy+fN76dKlce6558b48eMP9G7kXC6PU0FBQTQ0NERTU1McccQRUVxcHH/1V38VTz/9dFK/eyMit8fprLPOitbW1vjLv/zL2LZtW/zHf/xH3HjjjRER0dLSktP9OpD29RjtiYPp+nsgEbYJuOaaa+Lf/u3f4pFHHun1ZwUFBT2eZ1nWa+y3/facvubvydcZaHJ9nA4G+TpG77zzTvze7/1e/Lf/9t/iyiuv3L9F94N8HKfrrrsumpqa4tlnn43BgwfH7NmzI8uy/V98nuTyGN15553R2dkZCxYsOHAL7ie5Ppfmz58fZ5xxRpx00klx5ZVXxj333BNLly6N9vb2A7MDeZLL47Rz586IiFi4cGFceOGFUVFREQ888EAUFBTEo48+eoD2IPfy9fN78+bN8cwzz8ScOXP2b8H9JJfHKcuyuOqqq2LUqFGxcuXK+Nd//df4xje+Eb//+7+fVLBF5PY4feUrX4m//uu/ju9///tx2GGHxejRo+PYY4+NsrKyGDx48IHbiRw70Mfos77Gvn4d/pOwHeC+/e1vx4oVK+K5556LcePGdY+PHj06IqLXv+ps2bKl+1+RRo8e3f0vZZ8259133+31fX/5y1/2+teogSzXx+lgkK9j9M4778SZZ54Z06ZNi/vuuy8Xu5JT+TpOI0eOjBNOOCHOOeec+NGPfhT19fXx8ssv52KXDrhcH6N//ud/jpdffjmKioqisLAwvvjFL0ZExJQpU+Kyyy7L2X4daP3xc+m0006LiIj/9//+3wHZh3zI9XEaM2ZMRER8+ctf7v7zoqKiOPbYY6O5ufnA71AO5PNceuCBB2LEiBHx9a9//UDvRs7l42fT3//938ePfvSjOP3002Py5MmxePHiGDZsWPz1X/91LnftgMrH+XTJJZdEa2trvP3229He3h633HJL/PKXv4zy8vJc7dYBtT/HaE8cLNffA05O38HLPtu5c2d29dVXZ2PHjs3eeOONPv989OjR2Xe/+93usa6urj7f4F9XV9c955133unzl0f9y7/8S/ecl19+OZk3r+frOP2m1H55VD6P0ebNm7Pjjz8+u/jii/v8DbcDWX+cS7s0NzdnEZE999xzB26HciBfx+itt97KXnvtte7HM888k0VE9thjj2WbNm3K8V7uv/48l/7u7/4ui4jsrbfeOoB7lBv5Ok4dHR1ZUVFRj18eteuXI91777252r0DIt/n0s6dO7Py8vJkfpv9Lvk6TitWrMgGDRrU6zePn3DCCdmf//mf52LXDqj+/Nm0dOnS7LDDDuvztwMPJAfiGP2mz/rlUalefw9UwnaA+p//839mpaWl2Y9//OMev37/o48+6p7zF3/xF1lpaWn2xBNPZK+99lr2x3/8x33+SvZx48Zl//iP/5j95Cc/yc4666w+P+7npJNOylavXp2tXr06+y//5b8k8+vG83mc3nrrraypqSm79dZbs8997nNZU1NT1tTUNOA/WiNfx+jtt9/OvvjFL2ZnnXVWtnnz5h7fKwX5Ok7/8i//kt15551ZU1NT9otf/CL753/+5+yrX/1qdtxxx2W//vWv877feyOff99+08aNG5P6rcj5Ok6rVq3Kbr/99qypqSnbsGFDVldXl40dOzb7+te/nvd93hf5PJ/+9E//NDvqqKOyZ555JvvpT3+azZkzJxs1alT23nvv5XWf91a+/8794z/+YxYR2bp16/K2jwdCvo7TL3/5y2zEiBHZH/7hH2Zr167Nfvazn2XXXnttNmTIkGzt2rV53++9lc/z6c4778waGxuzn/3sZ9ldd92VDRs2LPvhD3+Y1/3dFwfqGLW0tGRNTU3Z//7f/7v7N2k3NTVl7e3t3XNSvv4eqITtABURfT4eeOCB7jk7d+7Mbr755mz06NFZUVFR9v/9f/9f9tprr/X4Or/61a+ya665Jhs+fHg2bNiw7Pd///ez5ubmHnPa29uzb37zm9kRRxyRHXHEEdk3v/nNAf8varvk8zhddtllfX6vgX6XLV/H6IEHHtjt90pBvo7Tv/3bv2VnnnlmNnz48KyoqCg75phjsurq6mzz5s352tV9ls+/b78ptbDN13FqbGzMpk6dmpWWlmbFxcXZl770pezmm2/OPvzww3zt6n7J5/m0bdu27Dvf+U42atSo7IgjjsjOPvvs7N///d/zsZv7Jd9/5/74j/84q6yszPVuHXD5PE6vvPJKVlVVlQ0fPjw74ogjstNOOy2rr6/Px27ut3wep0svvTQbPnx4NnTo0Oykk07KHnrooXzs4n47UMfo5ptv/syvk/L190BVkGUJ/bYSAAAA+C1+eRQAAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2OfbCCy/EBRdcEGPHjo2CgoJ46qmnPnOb559/PioqKqK4uDiOPfbYuOeee3K/UAAAgEQJ2xz78MMP4+STT4677rprj+Zv3LgxzjvvvJg+fXo0NTXFjTfeGHPnzo3HH388xysFAABIU0GWZVl/L+JQUVBQEE8++WTMnDlzt3Ouv/76WLFiRaxfv757rLq6Ol599dVYvXp1HlYJAACQlsL+XgA9rV69OqqqqnqMnXvuubF06dL4+OOPY8iQIX1u19XVFV1dXd3Pd+7cGe+9916MGDEiCgoKcrpmAAA41GVZFlu3bo2xY8fGoEFeGJtvwnaAaW1tjbKysh5jZWVlsX379mhra4sxY8b0uV1tbW3ceuut+VgiAACwG5s2bYpx48b19zIOOcJ2APrtO6y7Xi3+aXdeFyxYEDU1Nd3POzo64uijj45NmzZFSUlJbhYKAABERERnZ2eMHz8+jjjiiP5eyiFJ2A4wo0ePjtbW1h5jW7ZsicLCwhgxYsRutysqKoqioqJe4yUlJcIWAADyxNsA+4cXfw8w06ZNi4aGhh5jzz77bEyZMmW3768FAAA4lAnbHPvggw9i7dq1sXbt2oj45ON81q5dG83NzRHxyUuIZ8+e3T2/uro63nrrraipqYn169fHsmXLYunSpXHttdf2x/IBAAAGPC9FzrE1a9bEmWee2f181/tgL7vssnjwwQejpaWlO3IjIsrLy6O+vj7mz58fd999d4wdOzbuuOOOuPDCC/O+dgAAgBT4HNuDVGdnZ5SWlkZHR4f32AIAQI65/u5fXooMAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2ObJ4sWLo7y8PIqLi6OioiJWrlz5qfOXL18eJ598chx22GExZsyYuOKKK6K9vT1PqwUAAEiHsM2Durq6mDdvXixcuDCamppi+vTpMWPGjGhubu5z/osvvhizZ8+OOXPmxOuvvx6PPvpovPLKK3HllVfmeeUAAAADn7DNg9tvvz3mzJkTV155ZUycODF+8IMfxPjx42PJkiV9zn/55ZfjmGOOiblz50Z5eXl89atfjW9961uxZs2aPK8cAABg4BO2ObZt27ZobGyMqqqqHuNVVVWxatWqPreprKyMzZs3R319fWRZFu+++2489thjcf755+/2+3R1dUVnZ2ePBwAAwKFA2OZYW1tb7NixI8rKynqMl5WVRWtra5/bVFZWxvLly2PWrFkxdOjQGD16dBx55JFx55137vb71NbWRmlpafdj/PjxB3Q/AAAABiphmycFBQU9nmdZ1mtsl3Xr1sXcuXPjpptuisbGxnj66adj48aNUV1dvduvv2DBgujo6Oh+bNq06YCuHwAAYKAq7O8FHOxGjhwZgwcP7nV3dsuWLb3u4u5SW1sbp59+elx33XUREXHSSSfF4YcfHtOnT4/bbrstxowZ02uboqKiKCoqOvA7AAAAMMC5Y5tjQ4cOjYqKimhoaOgx3tDQEJWVlX1u89FHH8WgQT3/0wwePDgiPrnTCwAAwH8StnlQU1MT999/fyxbtizWr18f8+fPj+bm5u6XFi9YsCBmz57dPf+CCy6IJ554IpYsWRIbNmyIl156KebOnRunnnpqjB07tr92AwAAYEDyUuQ8mDVrVrS3t8eiRYuipaUlJk2aFPX19TFhwoSIiGhpaenxmbaXX355bN26Ne666674zne+E0ceeWScddZZ8d3vfre/dgEAAGDAKsi8tvWg1NnZGaWlpdHR0RElJSX9vRwAADiouf7uX16KDAAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdjmyeLFi6O8vDyKi4ujoqIiVq5c+anzu7q6YuHChTFhwoQoKiqK4447LpYtW5an1QIAAKSjsL8XcCioq6uLefPmxeLFi+P000+Pe++9N2bMmBHr1q2Lo48+us9tLrroonj33Xdj6dKl8cUvfjG2bNkS27dvz/PKAQAABr6CLMuy/l7EwW7q1KkxefLkWLJkSffYxIkTY+bMmVFbW9tr/tNPPx0XX3xxbNiwIYYPH75P37OzszNKS0ujo6MjSkpK9nntAADAZ3P93b+8FDnHtm3bFo2NjVFVVdVjvKqqKlatWtXnNitWrIgpU6bE9773vTjqqKPihBNOiGuvvTZ+9atf5WPJAAAASfFS5Bxra2uLHTt2RFlZWY/xsrKyaG1t7XObDRs2xIsvvhjFxcXx5JNPRltbW1x11VXx3nvv7fZ9tl1dXdHV1dX9vLOz88DtBAAAwADmjm2eFBQU9HieZVmvsV127twZBQUFsXz58jj11FPjvPPOi9tvvz0efPDB3d61ra2tjdLS0u7H+PHjD/g+AAAADETCNsdGjhwZgwcP7nV3dsuWLb3u4u4yZsyYOOqoo6K0tLR7bOLEiZFlWWzevLnPbRYsWBAdHR3dj02bNh24nQAAABjAhG2ODR06NCoqKqKhoaHHeENDQ1RWVva5zemnnx7vvPNOfPDBB91jb7zxRgwaNCjGjRvX5zZFRUVRUlLS4wEAAHAoELZ5UFNTE/fff38sW7Ys1q9fH/Pnz4/m5uaorq6OiE/uts6ePbt7/iWXXBIjRoyIK664ItatWxcvvPBCXHfddfEnf/InMWzYsP7aDQAAgAHJL4/Kg1mzZkV7e3ssWrQoWlpaYtKkSVFfXx8TJkyIiIiWlpZobm7unv+5z30uGhoa4tvf/nZMmTIlRowYERdddFHcdttt/bULAAAAA5bPsT1I+RwtAADIH9ff/ctLkQEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZs82Tx4sVRXl4excXFUVFREStXrtyj7V566aUoLCyMU045JbcLBAAASJSwzYO6urqYN29eLFy4MJqammL69OkxY8aMaG5u/tTtOjo6Yvbs2fG1r30tTysFAABIT0GWZVl/L+JgN3Xq1Jg8eXIsWbKke2zixIkxc+bMqK2t3e12F198cRx//PExePDgeOqpp2Lt2rV7/D07OzujtLQ0Ojo6oqSkZH+WDwAAfAbX3/3LHdsc27ZtWzQ2NkZVVVWP8aqqqli1atVut3vggQfizTffjJtvvnmPvk9XV1d0dnb2eAAAABwKhG2OtbW1xY4dO6KsrKzHeFlZWbS2tva5zc9//vO44YYbYvny5VFYWLhH36e2tjZKS0u7H+PHj9/vtQMAAKRA2OZJQUFBj+dZlvUai4jYsWNHXHLJJXHrrbfGCSecsMdff8GCBdHR0dH92LRp036vGQAAIAV7djuQfTZy5MgYPHhwr7uzW7Zs6XUXNyJi69atsWbNmmhqaoprrrkmIiJ27twZWZZFYWFhPPvss3HWWWf12q6oqCiKiopysxMAAAADmDu2OTZ06NCoqKiIhoaGHuMNDQ1RWVnZa35JSUm89tprsXbt2u5HdXV1fOlLX4q1a9fG1KlT87V0AACAJLhjmwc1NTVx6aWXxpQpU2LatGlx3333RXNzc1RXV0fEJy8jfvvtt+Ohhx6KQYMGxaRJk3psP2rUqCguLu41DgAAgLDNi1mzZkV7e3ssWrQoWlpaYtKkSVFfXx8TJkyIiIiWlpbP/ExbAAAA+uZzbA9SPkcLAADyx/V3//IeWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacI2TxYvXhzl5eVRXFwcFRUVsXLlyt3OfeKJJ+Kcc86JL3zhC1FSUhLTpk2LZ555Jo+rBQAASIewzYO6urqYN29eLFy4MJqammL69OkxY8aMaG5u7nP+Cy+8EOecc07U19dHY2NjnHnmmXHBBRdEU1NTnlcOAAAw8BVkWZb19yIOdlOnTo3JkyfHkiVLuscmTpwYM2fOjNra2j36Gl/5yldi1qxZcdNNN+3R/M7OzigtLY2Ojo4oKSnZp3UDAAB7xvV3/3LHNse2bdsWjY2NUVVV1WO8qqoqVq1atUdfY+fOnbF169YYPnx4LpYIAACQtML+XsDBrq2tLXbs2BFlZWU9xsvKyqK1tXWPvsb3v//9+PDDD+Oiiy7a7Zyurq7o6urqft7Z2blvCwYAAEiMO7Z5UlBQ0ON5lmW9xvryyCOPxC233BJ1dXUxatSo3c6rra2N0tLS7sf48eP3e80AAAApELY5NnLkyBg8eHCvu7NbtmzpdRf3t9XV1cWcOXPib/7mb+Lss8/+1LkLFiyIjo6O7semTZv2e+0AAAApELY5NnTo0KioqIiGhoYe4w0NDVFZWbnb7R555JG4/PLL4+GHH47zzz//M79PUVFRlJSU9HgAAAAcCrzHNg9qamri0ksvjSlTpsS0adPivvvui+bm5qiuro6IT+62vv322/HQQw9FxCdRO3v27PjhD38Yp512Wvfd3mHDhkVpaWm/7QcAAMBAJGzzYNasWdHe3h6LFi2KlpaWmDRpUtTX18eECRMiIqKlpaXHZ9ree++9sX379rj66qvj6quv7h6/7LLL4sEHH8z38gEAAAY0n2N7kPI5WgAAkD+uv/uX99gCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtnmyePHiKC8vj+Li4qioqIiVK1d+6vznn38+Kioqori4OI499ti455578rRSAACAtAjbPKirq4t58+bFwoULo6mpKaZPnx4zZsyI5ubmPudv3LgxzjvvvJg+fXo0NTXFjTfeGHPnzo3HH388zysHAAAY+AqyLMv6exEHu6lTp8bkyZNjyZIl3WMTJ06MmTNnRm1tba/5119/faxYsSLWr1/fPVZdXR2vvvpqrF69eo++Z2dnZ5SWlkZHR0eUlJTs/04AAAC75fq7f7ljm2Pbtm2LxsbGqKqq6jFeVVUVq1at6nOb1atX95p/7rnnxpo1a+Ljjz/O2VoBAABSVNjfCzjYtbW1xY4dO6KsrKzHeFlZWbS2tva5TWtra5/zt2/fHm1tbTFmzJhe23R1dUVXV1f3846Ojoj45F+OAACA3Np13e0Fsf1D2OZJQUFBj+dZlvUa+6z5fY3vUltbG7feemuv8fHjx+/tUgEAgH3U3t4epaWl/b2MQ46wzbGRI0fG4MGDe92d3bJlS6+7sruMHj26z/mFhYUxYsSIPrdZsGBB1NTUdD9///33Y8KECdHc3OwvFvuls7Mzxo8fH5s2bfJ+EfaLc4kDyfnEgeJc4kDp6OiIo48+OoYPH97fSzkkCdscGzp0aFRUVERDQ0P8wR/8Qfd4Q0NDfOMb3+hzm2nTpsXf/d3f9Rh79tlnY8qUKTFkyJA+tykqKoqioqJe46WlpX5Ic0CUlJQ4lzggnEscSM4nDhTnEgfKoEF+jVF/cNTzoKamJu6///5YtmxZrF+/PubPnx/Nzc1RXV0dEZ/cbZ09e3b3/Orq6njrrbeipqYm1q9fH8uWLYulS5fGtdde21+7AAAAMGC5Y5sHs2bNivb29li0aFG0tLTEpEmTor6+PiZMmBARES0tLT0+07a8vDzq6+tj/vz5cffdd8fYsWPjjjvuiAsvvLC/dgEAAGDAErZ5ctVVV8VVV13V5589+OCDvcbOOOOM+MlPfrLP36+oqChuvvnmPl+eDHvDucSB4lziQHI+caA4lzhQnEv9qyDz+6gBAABImPfYAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2CZs8eLFUV5eHsXFxVFRURErV6781PnPP/98VFRURHFxcRx77LFxzz335GmlDHR7cy498cQTcc4558QXvvCFKCkpiWnTpsUzzzyTx9UykO3tz6VdXnrppSgsLIxTTjkltwskGXt7LnV1dcXChQtjwoQJUVRUFMcdd1wsW7YsT6tloNvb82n58uVx8sknx2GHHRZjxoyJK664Itrb2/O0WgaqF154IS644IIYO3ZsFBQUxFNPPfWZ27j+zh9hm6i6urqYN29eLFy4MJqammL69OkxY8aMHp+H+5s2btwY5513XkyfPj2amprixhtvjLlz58bjjz+e55Uz0OztufTCCy/EOeecE/X19dHY2BhnnnlmXHDBBdHU1JTnlTPQ7O25tEtHR0fMnj07vva1r+VppQx0+3IuXXTRRfFP//RPsXTp0vjZz34WjzzySJx44ol5XDUD1d6eTy+++GLMnj075syZE6+//no8+uij8corr8SVV16Z55Uz0Hz44Ydx8sknx1133bVH811/51lGkk499dSsurq6x9iJJ56Y3XDDDX3O/1//639lJ554Yo+xb33rW9lpp52WszWShr09l/ry5S9/Obv11lsP9NJIzL6eS7Nmzcr+7M/+LLv55puzk08+OYcrJBV7ey79wz/8Q1ZaWpq1t7fnY3kkZm/Pp7/8y7/Mjj322B5jd9xxRzZu3LicrZH0RET25JNPfuoc19/55Y5tgrZt2xaNjY1RVVXVY7yqqipWrVrV5zarV6/uNf/cc8+NNWvWxMcff5yztTKw7cu59Nt27twZW7dujeHDh+diiSRiX8+lBx54IN588824+eabc71EErEv59KKFStiypQp8b3vfS+OOuqoOOGEE+Laa6+NX/3qV/lYMgPYvpxPlZWVsXnz5qivr48sy+Ldd9+Nxx57LM4///x8LJmDiOvv/Crs7wWw99ra2mLHjh1RVlbWY7ysrCxaW1v73Ka1tbXP+du3b4+2trYYM2ZMztbLwLUv59Jv+/73vx8ffvhhXHTRRblYIonYl3Pp5z//edxwww2xcuXKKCz0vyM+sS/n0oYNG+LFF1+M4uLiePLJJ6OtrS2uuuqqeO+997zP9hC3L+dTZWVlLF++PGbNmhW//vWvY/v27fH1r3897rzzznwsmYOI6+/8csc2YQUFBT2eZ1nWa+yz5vc1zqFnb8+lXR555JG45ZZboq6uLkaNGpWr5ZGQPT2XduzYEZdccknceuutccIJJ+RreSRkb34u7dy5MwoKCmL58uVx6qmnxnnnnRe33357PPjgg+7aEhF7dz6tW7cu5s6dGzfddFM0NjbG008/HRs3bozq6up8LJWDjOvv/PFP5AkaOXJkDB48uNe/NG7ZsqXXvwrtMnr06D7nFxYWxogRI3K2Vga2fTmXdqmrq4s5c+bEo48+GmeffXYul0kC9vZc2rp1a6xZsyaamprimmuuiYhP4iTLsigsLIxnn302zjrrrLysnYFlX34ujRkzJo466qgoLS3tHps4cWJkWRabN2+O448/PqdrZuDal/OptrY2Tj/99LjuuusiIuKkk06Kww8/PKZPnx633Xabu2zsMdff+eWObYKGDh0aFRUV0dDQ0GO8oaEhKisr+9xm2rRpveY/++yzMWXKlBgyZEjO1srAti/nUsQnd2ovv/zyePjhh73niIjY+3OppKQkXnvttVi7dm33o7q6Or70pS/F2rVrY+rUqflaOgPMvvxcOv300+Odd96JDz74oHvsjTfeiEGDBsW4ceNyul4Gtn05nz766KMYNKjnJfLgwYMj4j/vtsGecP2dZ/30S6vYTz/60Y+yIUOGZEuXLs3WrVuXzZs3Lzv88MOzX/ziF1mWZdkNN9yQXXrppd3zN2zYkB122GHZ/Pnzs3Xr1mVLly7NhgwZkj322GP9tQsMEHt7Lj388MNZYWFhdvfdd2ctLS3dj/fff7+/doEBYm/Ppd/mtyKzy96eS1u3bs3GjRuX/dEf/VH2+uuvZ88//3x2/PHHZ1deeWV/7QIDyN6eTw888EBWWFiYLV68OHvzzTezF198MZsyZUp26qmn9tcuMEBs3bo1a2pqypqamrKIyG6//fasqakpe+utt7Isc/3d34Rtwu6+++5swoQJ2dChQ7PJkydnzz//fPefXXbZZdkZZ5zRY/6Pf/zj7Hd+53eyoUOHZsccc0y2ZMmSPK+YgWpvzqUzzjgji4hej8suuyz/C2fA2dufS79J2PKb9vZcWr9+fXb22Wdnw4YNy8aNG5fV1NRkH330UZ5XzUC1t+fTHXfckX35y1/Ohg0blo0ZMyb75je/mW3evDnPq2agee655z71Gsj1d/8qyDKvqQAAACBd3mMLAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhm2MvvPBCXHDBBTF27NgoKCiIp5566jO3ef7556OioiKKi4vj2GOPjXvuuSf3CwUAAEiUsM2xDz/8ME4++eS466679mj+xo0b47zzzovp06dHU1NT3HjjjTF37tx4/PHHc7xSAACANBVkWZb19yIOFQUFBfHkk0/GzJkzdzvn+uuvjxUrVsT69eu7x6qrq+PVV1+N1atX52GVAAAAaSns7wXQ0+rVq6OqqqrH2LnnnhtLly6Njz/+OIYMGdLndl1dXdHV1dX9fOfOnfHee+/FiBEjoqCgIKdrBgCAQ12WZbF169YYO3ZsDBrkhbH5JmwHmNbW1igrK+sxVlZWFtu3b4+2trYYM2ZMn9vV1tbGrbfemo8lAgAAu7Fp06YYN25cfy/jkCNsB6DfvsO669Xin3bndcGCBVFTU9P9vKOjI44++ujYtGlTlJSU5GahAABARER0dnbG+PHj44gjjujvpRyShO0AM3r06Ghtbe0xtmXLligsLIwRI0bsdruioqIoKirqNV5SUiJsAQAgT7wNsH948fcAM23atGhoaOgx9uyzz8aUKVN2+/5aAACAQ5mwzbEPPvgg1q5dG2vXro2ITz7OZ+3atdHc3BwRn7yEePbs2d3zq6ur46233oqamppYv359LFu2LJYuXRrXXnttfywfAABgwPNS5Bxbs2ZNnHnmmd3Pd70P9rLLLosHH3wwWlpauiM3IqK8vDzq6+tj/vz5cffdd8fYsWPjjjvuiAsvvDDvawcAAEiBz7E9SHV2dkZpaWl0dHR4jy0AAOSY6+/+5aXIAAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShG2eLF68OMrLy6O4uDgqKipi5cqVnzp/+fLlcfLJJ8dhhx0WY8aMiSuuuCLa29vztFoAAIB0CNs8qKuri3nz5sXChQujqakppk+fHjNmzIjm5uY+57/44osxe/bsmDNnTrz++uvx6KOPxiuvvBJXXnllnlcOAAAw8AnbPLj99ttjzpw5ceWVV8bEiRPjBz/4QYwfPz6WLFnS5/yXX345jjnmmJg7d26Ul5fHV7/61fjWt74Va9asyfPKAQAABj5hm2Pbtm2LxsbGqKqq6jFeVVUVq1at6nObysrK2Lx5c9TX10eWZfHuu+/GY489Fueff34+lgwAAJAUYZtjbW1tsWPHjigrK+sxXlZWFq2trX1uU1lZGcuXL49Zs2bF0KFDY/To0XHkkUfGnXfeudvv09XVFZ2dnT0eAAAAhwJhmycFBQU9nmdZ1mtsl3Xr1sXcuXPjpptuisbGxnj66adj48aNUV1dvduvX1tbG6Wlpd2P8ePHH9D1AwAADFQFWZZl/b2Ig9m2bdvisMMOi0cffTT+4A/+oHv8T//0T2Pt2rXx/PPP99rm0ksvjV//+tfx6KOPdo+9+OKLMX369HjnnXdizJgxvbbp6uqKrq6u7uednZ0xfvz46OjoiJKSkgO8VwAAwG/q7OyM0tJS19/9xB3bHBs6dGhUVFREQ0NDj/GGhoaorKzsc5uPPvooBg3q+Z9m8ODBEfHJnd6+FBUVRUlJSY8HAADAoUDY5kFNTU3cf//9sWzZsli/fn3Mnz8/mpubu19avGDBgpg9e3b3/AsuuCCeeOKJWLJkSWzYsCFeeumlmDt3bpx66qkxduzY/toNAACAAamwvxdwKJg1a1a0t7fHokWLoqWlJSZNmhT19fUxYcKEiIhoaWnp8Zm2l19+eWzdujXuuuuu+M53vhNHHnlknHXWWfHd7363v3YBAABgwPIe24OU1/gDAED+uP7uX16KDAAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdjmyeLFi6O8vDyKi4ujoqIiVq5c+anzu7q6YuHChTFhwoQoKiqK4447LpYtW5an1QIAAKSjsL8XcCioq6uLefPmxeLFi+P000+Pe++9N2bMmBHr1q2Lo48+us9tLrroonj33Xdj6dKl8cUvfjG2bNkS27dvz/PKAQAABr6CLMuy/l7EwW7q1KkxefLkWLJkSffYxIkTY+bMmVFbW9tr/tNPPx0XX3xxbNiwIYYPH75P37OzszNKS0ujo6MjSkpK9nntAADAZ3P93b+8FDnHtm3bFo2NjVFVVdVjvKqqKlatWtXnNitWrIgpU6bE9773vTjqqKPihBNOiGuvvTZ+9atf5WPJAAAASfFS5Bxra2uLHTt2RFlZWY/xsrKyaG1t7XObDRs2xIsvvhjFxcXx5JNPRltbW1x11VXx3nvv7fZ9tl1dXdHV1dX9vLOz88DtBAAAwADmjm2eFBQU9HieZVmvsV127twZBQUFsXz58jj11FPjvPPOi9tvvz0efPDB3d61ra2tjdLS0u7H+PHjD/g+AAAADETCNsdGjhwZgwcP7nV3dsuWLb3u4u4yZsyYOOqoo6K0tLR7bOLEiZFlWWzevLnPbRYsWBAdHR3dj02bNh24nQAAABjAhG2ODR06NCoqKqKhoaHHeENDQ1RWVva5zemnnx7vvPNOfPDBB91jb7zxRgwaNCjGjRvX5zZFRUVRUlLS4wEAAHAoELZ5UFNTE/fff38sW7Ys1q9fH/Pnz4/m5uaorq6OiE/uts6ePbt7/iWXXBIjRoyIK664ItatWxcvvPBCXHfddfEnf/InMWzYsP7aDQAAgAHJL4/Kg1mzZkV7e3ssWrQoWlpaYtKkSVFfXx8TJkyIiIiWlpZobm7unv+5z30uGhoa4tvf/nZMmTIlRowYERdddFHcdttt/bULAAAAA5bPsT1I+RwtAADIH9ff/ctLkQEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZs82Tx4sVRXl4excXFUVFREStXrtyj7V566aUoLCyMU045JbcLBAAASJSwzYO6urqYN29eLFy4MJqammL69OkxY8aMaG5u/tTtOjo6Yvbs2fG1r30tTysFAABIT0GWZVl/L+JgN3Xq1Jg8eXIsWbKke2zixIkxc+bMqK2t3e12F198cRx//PExePDgeOqpp2Lt2rV7/D07OzujtLQ0Ojo6oqSkZH+WDwAAfAbX3/3LHdsc27ZtWzQ2NkZVVVWP8aqqqli1atVut3vggQfizTffjJtvvnmPvk9XV1d0dnb2eAAAABwKhG2OtbW1xY4dO6KsrKzHeFlZWbS2tva5zc9//vO44YYbYvny5VFYWLhH36e2tjZKS0u7H+PHj9/vtQMAAKRA2OZJQUFBj+dZlvUai4jYsWNHXHLJJXHrrbfGCSecsMdff8GCBdHR0dH92LRp036vGQAAIAV7djuQfTZy5MgYPHhwr7uzW7Zs6XUXNyJi69atsWbNmmhqaoprrrkmIiJ27twZWZZFYWFhPPvss3HWWWf12q6oqCiKiopysxMAAAADmDu2OTZ06NCoqKiIhoaGHuMNDQ1RWVnZa35JSUm89tprsXbt2u5HdXV1fOlLX4q1a9fG1KlT87V0AACAJLhjmwc1NTVx6aWXxpQpU2LatGlx3333RXNzc1RXV0fEJy8jfvvtt+Ohhx6KQYMGxaRJk3psP2rUqCguLu41DgAAgLDNi1mzZkV7e3ssWrQoWlpaYtKkSVFfXx8TJkyIiIiWlpbP/ExbAAAA+uZzbA9SPkcLAADyx/V3//IeWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacIWAACApAlbAAAAkiZsAQAASJqwBQAAIGnCFgAAgKQJWwAAAJImbAEAAEiasAUAACBpwhYAAICkCVsAAACSJmwBAABImrAFAAAgacI2TxYvXhzl5eVRXFwcFRUVsXLlyt3OfeKJJ+Kcc86JL3zhC1FSUhLTpk2LZ555Jo+rBQAASIewzYO6urqYN29eLFy4MJqammL69OkxY8aMaG5u7nP+Cy+8EOecc07U19dHY2NjnHnmmXHBBRdEU1NTnlcOAAAw8BVkWZb19yIOdlOnTo3JkyfHkiVLuscmTpwYM2fOjNra2j36Gl/5yldi1qxZcdNNN+3R/M7OzigtLY2Ojo4oKSnZp3UDAAB7xvV3/3LHNse2bdsWjY2NUVVV1WO8qqoqVq1atUdfY+fOnbF169YYPnx4LpYIAACQtML+XsDBrq2tLXbs2BFlZWU9xsvKyqK1tXWPvsb3v//9+PDDD+Oiiy7a7Zyurq7o6urqft7Z2blvCwYAAEiMO7Z5UlBQ0ON5lmW9xvryyCOPxC233BJ1dXUxatSo3c6rra2N0tLS7sf48eP3e80AAAApELY5NnLkyBg8eHCvu7NbtmzpdRf3t9XV1cWcOXPib/7mb+Lss8/+1LkLFiyIjo6O7semTZv2e+0AAAApELY5NnTo0KioqIiGhoYe4w0NDVFZWbnb7R555JG4/PLL4+GHH47zzz//M79PUVFRlJSU9HgAAAAcCrzHNg9qamri0ksvjSlTpsS0adPivvvui+bm5qiuro6IT+62vv322/HQQw9FxCdRO3v27PjhD38Yp512Wvfd3mHDhkVpaWm/7QcAAMBAJGzzYNasWdHe3h6LFi2KlpaWmDRpUtTX18eECRMiIqKlpaXHZ9ree++9sX379rj66qvj6quv7h6/7LLL4sEHH8z38gEAAAY0n2N7kPI5WgAAkD+uv/uX99gCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2ObJ4sWLo7y8PIqLi6OioiJWrlz5qfOff/75qKioiOLi4jj22GPjnnvuydNKAQAA0iJs86Curi7mzZsXCxcujKamppg+fXrMmDEjmpub+5y/cePGOO+882L69OnR1NQUN954Y8ydOzcef/zxPK8cAABg4CvIsizr70Uc7KZOnRqTJ0+OJUuWdI9NnDgxZs6cGbW1tb3mX3/99bFixYpYv35991h1dXW8+uqrsXr16j36np2dnVFaWhodHR1RUlKy/zsBAADsluvv/lXY3ws42G3bti0aGxvjhhtu6DFeVVUVq1at6nOb1atXR1VVVY+xc889N5YuXRoff/xxDBkypNc2XV1d0dXV1f28o6MjIj75CwYAAOTWrutu9w37h7DNsba2ttixY0eUlZX1GC8rK4vW1tY+t2ltbe1z/vbt26OtrS3GjBnTa5va2tq49dZbe42PHz9+P1YPAADsjfb29igtLe3vZRxyhG2eFBQU9HieZVmvsc+a39f4LgsWLIiampru5++//35MmDAhmpub/cViv3R2dsb48eNj06ZNXlbDfnEucSA5nzhQnEscKB0dHXH00UfH8OHD+3sphyRhm2MjR46MwYMH97o7u2XLll53ZXcZPXp0n/MLCwtjxIgRfW5TVFQURUVFvcZLS0v9kOaAKCkpcS5xQDiXOJCcTxwoziUOlEGD/H7e/uCo59jQoUOjoqIiGhoaeow3NDREZWVln9tMmzat1/xnn302pkyZ0uf7awEAAA5lwjYPampq4v77749ly5bF+vXrY/78+dHc3BzV1dUR8cnLiGfPnt09v7q6Ot56662oqamJ9evXx7Jly2Lp0qVx7bXX9tcuAAAADFheipwHs2bNivb29li0aFG0tLTEpEmTor6+PiZMmBARES0tLT0+07a8vDzq6+tj/vz5cffdd8fYsWPjjjvuiAsvvHCPv2dRUVHcfPPNfb48GfaGc4kDxbnEgeR84kBxLnGgOJf6l8+xBQAAIGleigwAAEDShC0AAABJE7YAAAAkTdgCAACQNGGbsMWLF0d5eXkUFxdHRUVFrFy58lPnP//881FRURHFxcVx7LHHxj333JOnlTLQ7c259MQTT8Q555wTX/jCF6KkpCSmTZsWzzzzTB5Xy0C2tz+XdnnppZeisLAwTjnllNwukGTs7bnU1dUVCxcujAkTJkRRUVEcd9xxsWzZsjytloFub8+n5cuXx8knnxyHHXZYjBkzJq644opob2/P02oZqF544YW44IILYuzYsVFQUBBPPfXUZ27j+jt/hG2i6urqYt68ebFw4cJoamqK6dOnx4wZM3p8bNBv2rhxY5x33nkxffr0aGpqihtvvDHmzp0bjz/+eJ5XzkCzt+fSCy+8EOecc07U19dHY2NjnHnmmXHBBRdEU1NTnlfOQLO359IuHR0dMXv27Pja176Wp5Uy0O3LuXTRRRfFP/3TP8XSpUvjZz/7WTzyyCNx4okn5nHVDFR7ez69+OKLMXv27JgzZ068/vrr8eijj8Yrr7wSV155ZZ5XzkDz4Ycfxsknnxx33XXXHs13/Z1nGUk69dRTs+rq6h5jJ554YnbDDTf0Of9//a//lZ144ok9xr71rW9lp512Ws7WSBr29lzqy5e//OXs1ltvPdBLIzH7ei7NmjUr+7M/+7Ps5ptvzk4++eQcrpBU7O259A//8A9ZaWlp1t7eno/lkZi9PZ/+8i//Mjv22GN7jN1xxx3ZuHHjcrZG0hMR2ZNPPvmpc1x/55c7tgnatm1bNDY2RlVVVY/xqqqqWLVqVZ/brF69utf8c889N9asWRMff/xxztbKwLYv59Jv27lzZ2zdujWGDx+eiyWSiH09lx544IF488034+abb871EknEvpxLK1asiClTpsT3vve9OOqoo+KEE06Ia6+9Nn71q1/lY8kMYPtyPlVWVsbmzZujvr4+siyLd999Nx577LE4//zz87FkDiKuv/OrsL8XwN5ra2uLHTt2RFlZWY/xsrKyaG1t7XOb1tbWPudv37492traYsyYMTlbLwPXvpxLv+373/9+fPjhh3HRRRflYokkYl/OpZ///Odxww03xMqVK6Ow0P+O+MS+nEsbNmyIF198MYqLi+PJJ5+Mtra2uOqqq+K9997zPttD3L6cT5WVlbF8+fKYNWtW/PrXv47t27fH17/+9bjzzjvzsWQOIq6/88sd24QVFBT0eJ5lWa+xz5rf1ziHnr09l3Z55JFH4pZbbom6uroYNWpUrpZHQvb0XNqxY0dccsklceutt8YJJ5yQr+WRkL35ubRz584oKCiI5cuXx6mnnhrnnXde3H777fHggw+6a0tE7N35tG7dupg7d27cdNNN0djYGE8//XRs3Lgxqqur87FUDjKuv/PHP5EnaOTIkTF48OBe/9K4ZcuWXv8qtMvo0aP7nF9YWBgjRozI2VoZ2PblXNqlrq4u5syZE48++micffbZuVwmCdjbc2nr1q2xZs2aaGpqimuuuSYiPomTLMuisLAwnn322TjrrLPysnYGln35uTRmzJg46qijorS0tHts4sSJkWVZbN68OY4//vicrpmBa1/Op9ra2jj99NPjuuuui4iIk046KQ4//PCYPn163Hbbbe6yscdcf+eXO7YJGjp0aFRUVERDQ0OP8YaGhqisrOxzm2nTpvWa/+yzz8aUKVNiyJAhOVsrA9u+nEsRn9ypvfzyy+Phhx/2niMiYu/PpZKSknjttddi7dq13Y/q6ur40pe+FGvXro2pU6fma+kMMPvyc+n000+Pd955Jz744IPusTfeeCMGDRoU48aNy+l6Gdj25Xz66KOPYtCgnpfIgwcPjoj/vNsGe8L1d5710y+tYj/96Ec/yoYMGZItXbo0W7duXTZv3rzs8MMPz37xi19kWZZlN9xwQ3bppZd2z9+wYUN22GGHZfPnz8/WrVuXLV26NBsyZEj22GOP9dcuMEDs7bn08MMPZ4WFhdndd9+dtbS0dD/ef//9/toFBoi9PZd+m9+KzC57ey5t3bo1GzduXPZHf/RH2euvv549//zz2fHHH59deeWV/bULDCB7ez498MADWWFhYbZ48eLszTffzF588cVsypQp2amnntpfu8AAsXXr1qypqSlramrKIiK7/fbbs6ampuytt97Kssz1d38Ttgm7++67swkTJmRDhw7NJk+enD3//PPdf3bZZZdlZ5xxRo/5P/7xj7Pf+Z3fyYYOHZodc8wx2ZIlS/K8YgaqvTmXzjjjjCwiej0uu+yy/C+cAWdvfy79JmHLb9rbc2n9+vXZ2WefnQ0bNiwbN25cVlNTk3300Ud5XjUD1d6eT3fccUf25S9/ORs2bFg2ZsyY7Jvf/Ga2efPmPK+agea555771Gsg19/9qyDLvKYCAACAdHmPLQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACRN2AIAAJA0YQsAAEDShC0AAABJE7YAAAAkTdgCAACQNGELAABA0oQtAAAASRO2AAAAJE3YAgAAkDRhCwAAQNKELQAAAEkTtgAAACTt/wcLAF7/XlacegAAAABJRU5ErkJggg==", - "text/html": [ - "\n", - "
\n", - "
\n", - " Figure\n", - "
\n", - " \n", - "
\n", - " " - ], - "text/plain": [ - "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", - "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", - "\n", - "for i,v in enumerate(variables_to_plot):\n", - " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", - " ts_ax.grid()\n", - "\n", - "plt.show()\n" - ] - }, - { - "cell_type": "markdown", - "id": "194399aa-1907-452b-8ba9-bc31d7f60291", - "metadata": {}, - "source": [ - "## Quality check plots\n", - "#### Define variable for QC plot" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", - "metadata": {}, - "outputs": [], - "source": [ - "qc_variable = 'sensor_temperature_rise'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", - "metadata": {}, - "outputs": [], - "source": [ - "# QC Plot\n", - "if ('qc_'+qc_variable) in ds.variables:\n", - "\n", - " # Plot\n", - " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", - " qc_display.add_subplots((2,), figsize = (9.5,10))\n", - " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", - " qc_ax.grid()\n", - " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", - "\n", - " plt.show()\n", - "else:\n", - " print(f'QC not available for the selected field: {qc_variable}')\n" - ] - }, - { - "cell_type": "markdown", - "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", - "metadata": {}, - "source": [ - "## Field selection dropdown menu\n", - "Select variable to be plotted from a dropdown menu" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", - "metadata": {}, - "outputs": [], - "source": [ - "plt.ioff()\n", - "\n", - "# populate dropdown menu with available variables \n", - "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", - "d_variable = 'sensor_temperature_rise'\n", - "dropdown = widgets.Dropdown(\n", - " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", - " value= d_variable,\n", - " description='Field:',\n", - " disabled=False,\n", - ")\n", - "dropdown.layout.margin = '0px 30% 0px 20%'\n", - "dropdown.layout.width = '50%'\n", - "\n", - "# set up display\n", - "i_display = act.plotting.TimeSeriesDisplay(ds)\n", - "i_display.add_subplots((1,), figsize = (9.5,5))\n", - "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", - "i_ax.grid()\n", - "i_fig = i_display.fig\n", - "\n", - "# update plot callback function\n", - "def update_plot(change):\n", - " i_ax.cla()\n", - " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", - " i_ax_new.grid()\n", - " i_fig.canvas.draw()\n", - " i_fig.canvas.flush_events()\n", - "\n", - "dropdown.observe(update_plot, names='value')\n", - "\n", - "widgets.AppLayout(\n", - " header=dropdown,\n", - " center=i_fig.canvas,\n", - " pane_heights=[1, 6,1]\n", - ")\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "state": {}, - "version_major": 2, - "version_minor": 0 - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/VAPs/quicklook/PBLHT/.ipynb_checkpoints/pblhtsonde1mcfarl.c1-checkpoint.ipynb b/VAPs/quicklook/PBLHT/.ipynb_checkpoints/pblhtsonde1mcfarl.c1-checkpoint.ipynb deleted file mode 100644 index b0b96878..00000000 --- a/VAPs/quicklook/PBLHT/.ipynb_checkpoints/pblhtsonde1mcfarl.c1-checkpoint.ipynb +++ /dev/null @@ -1,679 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "70840257-70e4-45e2-b491-14bff5a257a3", - "metadata": {}, - "source": [ - "# PBLHTSONDE1MCFARL.C1 Plots\n", - "\n", - "[Click here](https://www.arm.gov/capabilities/vaps/pblht) for more information about this vap." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "460fd89f-e034-452c-b837-f65c5958264f", - "metadata": {}, - "outputs": [], - "source": [ - "%matplotlib widget\n", - "import ipywidgets as widgets\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import ipywidgets as widgets\n", - "import numpy as np\n", - "import pandas as pd\n", - "import os\n", - "from datetime import datetime\n", - "\n", - "import act\n", - "import xarray as xr\n", - "\n", - "# Data archive directory\n", - "DATA_DIR = r'/data/archive/'\n", - "\n", - "# Datastream info\n", - "DATASTREAM_NAME = 'pblhtsonde1mcfarl'\n", - "DATA_LEVEL = 'c1'\n", - "LOCATIONS = [{'end_date': '2017-01-02', 'facility': 'M1', 'site': 'awr', 'start_date': '2015-11-30'}, {'end_date': '2016-01-18', 'facility': 'S1', 'site': 'awr', 'start_date': '2015-12-01'}, {'end_date': '2017-10-31', 'facility': 'S1', 'site': 'asi', 'start_date': '2016-04-29'}, {'end_date': '2015-02-09', 'facility': 'M1', 'site': 'acx', 'start_date': '2015-01-12'}, {'end_date': '2020-05-31', 'facility': 'M1', 'site': 'anx', 'start_date': '2019-12-02'}, {'end_date': '2023-12-12', 'facility': 'C1', 'site': 'nsa', 'start_date': '2002-04-28'}, {'end_date': '2012-03-31', 'facility': 'M1', 'site': 'pgh', 'start_date': '2011-06-15'}, {'end_date': '2013-06-29', 'facility': 'M1', 'site': 'pvc', 'start_date': '2012-06-25'}, {'end_date': '2021-06-14', 'facility': 'M1', 'site': 'oli', 'start_date': '2013-10-03'}, {'end_date': '2020-10-01', 'facility': 'M1', 'site': 'mos', 'start_date': '2019-10-11'}, {'end_date': '2015-12-01', 'facility': 'M1', 'site': 'mao', 'start_date': '2014-01-01'}, {'end_date': '2012-04-08', 'facility': 'M1', 'site': 'gan', 'start_date': '2011-09-15'}, {'end_date': '2011-01-05', 'facility': 'M1', 'site': 'grw', 'start_date': '2009-04-16'}, {'end_date': '2013-10-03', 'facility': 'M1', 'site': 'mag', 'start_date': '2012-10-01'}, {'end_date': '2018-03-24', 'facility': 'M1', 'site': 'mar', 'start_date': '2017-10-31'}, {'end_date': '2023-06-15', 'facility': 'M1', 'site': 'guc', 'start_date': '2021-09-01'}, {'end_date': '2023-12-12', 'facility': 'M1', 'site': 'epc', 'start_date': '2023-02-06'}, {'end_date': '2008-12-28', 'facility': 'M1', 'site': 'hfe', 'start_date': '2008-05-14'}, {'end_date': '2019-04-29', 'facility': 'M1', 'site': 'cor', 'start_date': '2018-09-27'}, {'end_date': '2022-10-01', 'facility': 'M1', 'site': 'hou', 'start_date': '2021-09-18'}, {'end_date': '2022-09-25', 'facility': 'S1', 'site': 'hou', 'start_date': '2021-08-28'}, {'end_date': '2023-12-13', 'facility': 'C1', 'site': 'ena', 'start_date': '2013-09-28'}, {'end_date': '2008-01-01', 'facility': 'M1', 'site': 'fkb', 'start_date': '2007-03-24'}, {'end_date': '2007-01-08', 'facility': 'M1', 'site': 'nim', 'start_date': '2006-01-07'}, {'end_date': '2005-09-15', 'facility': 'M1', 'site': 'pye', 'start_date': '2005-02-25'}, {'end_date': '2011-04-24', 'facility': 'M1', 'site': 'sbs', 'start_date': '2010-11-08'}, {'end_date': '2007-06-13', 'facility': 'B1', 'site': 'sgp', 'start_date': '2002-05-13'}, {'end_date': '2007-06-29', 'facility': 'B4', 'site': 'sgp', 'start_date': '2002-05-20'}, {'end_date': '2007-06-22', 'facility': 'B5', 'site': 'sgp', 'start_date': '2002-05-20'}, {'end_date': '2002-11-26', 'facility': 'B6', 'site': 'sgp', 'start_date': '2001-06-20'}, {'end_date': '2023-12-11', 'facility': 'C1', 'site': 'sgp', 'start_date': '2001-04-01'}, {'end_date': '2014-09-12', 'facility': 'M1', 'site': 'tmp', 'start_date': '2014-02-01'}, {'end_date': '2014-07-07', 'facility': 'C1', 'site': 'twp', 'start_date': '2001-04-03'}, {'end_date': '2013-08-25', 'facility': 'C2', 'site': 'twp', 'start_date': '2001-04-01'}, {'end_date': '2015-01-14', 'facility': 'C3', 'site': 'twp', 'start_date': '2002-04-28'}]" - ] - }, - { - "cell_type": "markdown", - "id": "9faaf875", - "metadata": {}, - "source": [ - "## Define site, facility, and date range" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "ac6764f5", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The following locations and date ranges are available for this VAP:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sitefacilitystart_dateend_date
0awrM12015-11-302017-01-02
1awrS12015-12-012016-01-18
2asiS12016-04-292017-10-31
3acxM12015-01-122015-02-09
4anxM12019-12-022020-05-31
5nsaC12002-04-282023-12-12
6pghM12011-06-152012-03-31
7pvcM12012-06-252013-06-29
8oliM12013-10-032021-06-14
9mosM12019-10-112020-10-01
10maoM12014-01-012015-12-01
11ganM12011-09-152012-04-08
12grwM12009-04-162011-01-05
13magM12012-10-012013-10-03
14marM12017-10-312018-03-24
15gucM12021-09-012023-06-15
16epcM12023-02-062023-12-12
17hfeM12008-05-142008-12-28
18corM12018-09-272019-04-29
19houM12021-09-182022-10-01
20houS12021-08-282022-09-25
21enaC12013-09-282023-12-13
22fkbM12007-03-242008-01-01
23nimM12006-01-072007-01-08
24pyeM12005-02-252005-09-15
25sbsM12010-11-082011-04-24
26sgpB12002-05-132007-06-13
27sgpB42002-05-202007-06-29
28sgpB52002-05-202007-06-22
29sgpB62001-06-202002-11-26
30sgpC12001-04-012023-12-11
31tmpM12014-02-012014-09-12
32twpC12001-04-032014-07-07
33twpC22001-04-012013-08-25
34twpC32002-04-282015-01-14
\n", - "
" - ], - "text/plain": [ - " site facility start_date end_date\n", - "0 awr M1 2015-11-30 2017-01-02\n", - "1 awr S1 2015-12-01 2016-01-18\n", - "2 asi S1 2016-04-29 2017-10-31\n", - "3 acx M1 2015-01-12 2015-02-09\n", - "4 anx M1 2019-12-02 2020-05-31\n", - "5 nsa C1 2002-04-28 2023-12-12\n", - "6 pgh M1 2011-06-15 2012-03-31\n", - "7 pvc M1 2012-06-25 2013-06-29\n", - "8 oli M1 2013-10-03 2021-06-14\n", - "9 mos M1 2019-10-11 2020-10-01\n", - "10 mao M1 2014-01-01 2015-12-01\n", - "11 gan M1 2011-09-15 2012-04-08\n", - "12 grw M1 2009-04-16 2011-01-05\n", - "13 mag M1 2012-10-01 2013-10-03\n", - "14 mar M1 2017-10-31 2018-03-24\n", - "15 guc M1 2021-09-01 2023-06-15\n", - "16 epc M1 2023-02-06 2023-12-12\n", - "17 hfe M1 2008-05-14 2008-12-28\n", - "18 cor M1 2018-09-27 2019-04-29\n", - "19 hou M1 2021-09-18 2022-10-01\n", - "20 hou S1 2021-08-28 2022-09-25\n", - "21 ena C1 2013-09-28 2023-12-13\n", - "22 fkb M1 2007-03-24 2008-01-01\n", - "23 nim M1 2006-01-07 2007-01-08\n", - "24 pye M1 2005-02-25 2005-09-15\n", - "25 sbs M1 2010-11-08 2011-04-24\n", - "26 sgp B1 2002-05-13 2007-06-13\n", - "27 sgp B4 2002-05-20 2007-06-29\n", - "28 sgp B5 2002-05-20 2007-06-22\n", - "29 sgp B6 2001-06-20 2002-11-26\n", - "30 sgp C1 2001-04-01 2023-12-11\n", - "31 tmp M1 2014-02-01 2014-09-12\n", - "32 twp C1 2001-04-03 2014-07-07\n", - "33 twp C2 2001-04-01 2013-08-25\n", - "34 twp C3 2002-04-28 2015-01-14" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "print(\"The following locations and date ranges are available for this VAP:\")\n", - "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" - ] - }, - { - "cell_type": "markdown", - "id": "8d132223", - "metadata": {}, - "source": [ - "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "e563983a", - "metadata": {}, - "outputs": [], - "source": [ - "site_facility = ( 'sgp', 'B1' )\n", - "\n", - "date_start = '2007-06-10'\n", - "date_end = '2007-06-12'" - ] - }, - { - "cell_type": "markdown", - "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", - "metadata": {}, - "source": [ - "## Load data files\n", - "Load data files from /data/archive/" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'/data/archive/sgp/sgppblhtsonde1mcfarlB1.c1'" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Compile list of files\n", - "site, facility = site_facility\n", - "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", - "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", - "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", - "dir_path\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "6be8f3dc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['20070610', '20070611', '20070612']" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from datetime import date, timedelta\n", - "import pandas as pd\n", - "\n", - "def get_ARM_formated_dates(start_date, end_date):\n", - " \"\"\"\n", - " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", - " EXAMPLE:\n", - " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", - " >> [\"20180219\", \"20180220\", \"20180221\"] \n", - " \"\"\"\n", - " \n", - " _start_date = pd.to_datetime(start_date)\n", - " _end_date = pd.to_datetime(end_date)\n", - " \n", - " delta = _end_date - _start_date # returns timedelta \n", - " dates = []\n", - "\n", - " for i in range(delta.days + 1):\n", - " day = _start_date + timedelta(days=i)\n", - " day_formated = day.strftime(format=\"%Y%m%d\")\n", - " dates.append(day_formated)\n", - " return dates\n", - "\n", - "\n", - "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "51feea2e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['/data/archive/sgp/sgppblhtsonde1mcfarlB1.c1/sgppblhtsonde1mcfarlB1.c1.20070611.150200.cdf',\n", - " '/data/archive/sgp/sgppblhtsonde1mcfarlB1.c1/sgppblhtsonde1mcfarlB1.c1.20070611.173000.cdf',\n", - " '/data/archive/sgp/sgppblhtsonde1mcfarlB1.c1/sgppblhtsonde1mcfarlB1.c1.20070611.202900.cdf',\n", - " '/data/archive/sgp/sgppblhtsonde1mcfarlB1.c1/sgppblhtsonde1mcfarlB1.c1.20070611.232800.cdf',\n", - " '/data/archive/sgp/sgppblhtsonde1mcfarlB1.c1/sgppblhtsonde1mcfarlB1.c1.20070612.052900.cdf',\n", - " '/data/archive/sgp/sgppblhtsonde1mcfarlB1.c1/sgppblhtsonde1mcfarlB1.c1.20070612.112900.cdf',\n", - " '/data/archive/sgp/sgppblhtsonde1mcfarlB1.c1/sgppblhtsonde1mcfarlB1.c1.20070612.143100.cdf',\n", - " '/data/archive/sgp/sgppblhtsonde1mcfarlB1.c1/sgppblhtsonde1mcfarlB1.c1.20070612.082900.cdf',\n", - " '/data/archive/sgp/sgppblhtsonde1mcfarlB1.c1/sgppblhtsonde1mcfarlB1.c1.20070612.233900.cdf',\n", - " '/data/archive/sgp/sgppblhtsonde1mcfarlB1.c1/sgppblhtsonde1mcfarlB1.c1.20070612.173100.cdf',\n", - " '/data/archive/sgp/sgppblhtsonde1mcfarlB1.c1/sgppblhtsonde1mcfarlB1.c1.20070612.023100.cdf']" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Filter a list of files based on date pattern\n", - "import glob\n", - "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", - "files_filter = []\n", - "for date in dates:\n", - " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", - " files_filter\n", - "files_filter" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "b0e5d0d7", - "metadata": {}, - "outputs": [ - { - "ename": "ValueError", - "evalue": "Coordinate variable height_ss is neither monotonically increasing nor monotonically decreasing on all datasets", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[7], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Load files as a single dataset\u001b[39;00m\n\u001b[1;32m 2\u001b[0m files_list \u001b[38;5;241m=\u001b[39m files_filter \n\u001b[0;32m----> 3\u001b[0m ds \u001b[38;5;241m=\u001b[39m \u001b[43mact\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mio\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marmfiles\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_netcdf\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfiles_list\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4\u001b[0m ds\u001b[38;5;241m.\u001b[39mclean\u001b[38;5;241m.\u001b[39mcleanup()\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlen\u001b[39m(files_list)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m files loaded\u001b[39m\u001b[38;5;124m'\u001b[39m)\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/io/armfiles.py:168\u001b[0m, in \u001b[0;36mread_netcdf\u001b[0;34m(filenames, concat_dim, return_None, combine, decode_times, use_cftime, use_base_time, combine_attrs, cleanup_qc, keep_variables, **kwargs)\u001b[0m\n\u001b[1;32m 164\u001b[0m ds \u001b[38;5;241m=\u001b[39m xr\u001b[38;5;241m.\u001b[39mopen_mfdataset(filenames, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 166\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 167\u001b[0m \u001b[38;5;66;03m# When all else fails raise the orginal exception\u001b[39;00m\n\u001b[0;32m--> 168\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exception\n\u001b[1;32m 170\u001b[0m \u001b[38;5;66;03m# If requested use base_time and time_offset to derive time. Assumes that the units\u001b[39;00m\n\u001b[1;32m 171\u001b[0m \u001b[38;5;66;03m# of both are in seconds and that the value is number of seconds since epoch.\u001b[39;00m\n\u001b[1;32m 172\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m use_base_time:\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/io/armfiles.py:143\u001b[0m, in \u001b[0;36mread_netcdf\u001b[0;34m(filenames, concat_dim, return_None, combine, decode_times, use_cftime, use_base_time, combine_attrs, cleanup_qc, keep_variables, **kwargs)\u001b[0m\n\u001b[1;32m 139\u001b[0m except_tuple \u001b[38;5;241m=\u001b[39m except_tuple \u001b[38;5;241m+\u001b[39m (\u001b[38;5;167;01mFileNotFoundError\u001b[39;00m, \u001b[38;5;167;01mOSError\u001b[39;00m)\n\u001b[1;32m 141\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 142\u001b[0m \u001b[38;5;66;03m# Read data file with Xarray function\u001b[39;00m\n\u001b[0;32m--> 143\u001b[0m ds \u001b[38;5;241m=\u001b[39m \u001b[43mxr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen_mfdataset\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilenames\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 145\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m except_tuple \u001b[38;5;28;01mas\u001b[39;00m exception:\n\u001b[1;32m 146\u001b[0m \u001b[38;5;66;03m# If requested return None for File not found error\u001b[39;00m\n\u001b[1;32m 147\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(exception)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mFileNotFoundError\u001b[39m\u001b[38;5;124m'\u001b[39m:\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/backends/api.py:1026\u001b[0m, in \u001b[0;36mopen_mfdataset\u001b[0;34m(paths, chunks, concat_dim, compat, preprocess, engine, data_vars, coords, combine, parallel, join, attrs_file, combine_attrs, **kwargs)\u001b[0m\n\u001b[1;32m 1013\u001b[0m combined \u001b[38;5;241m=\u001b[39m _nested_combine(\n\u001b[1;32m 1014\u001b[0m datasets,\n\u001b[1;32m 1015\u001b[0m concat_dims\u001b[38;5;241m=\u001b[39mconcat_dim,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1021\u001b[0m combine_attrs\u001b[38;5;241m=\u001b[39mcombine_attrs,\n\u001b[1;32m 1022\u001b[0m )\n\u001b[1;32m 1023\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m combine \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mby_coords\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 1024\u001b[0m \u001b[38;5;66;03m# Redo ordering from coordinates, ignoring how they were ordered\u001b[39;00m\n\u001b[1;32m 1025\u001b[0m \u001b[38;5;66;03m# previously\u001b[39;00m\n\u001b[0;32m-> 1026\u001b[0m combined \u001b[38;5;241m=\u001b[39m \u001b[43mcombine_by_coords\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1027\u001b[0m \u001b[43m \u001b[49m\u001b[43mdatasets\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1028\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1029\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_vars\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_vars\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1030\u001b[0m \u001b[43m \u001b[49m\u001b[43mcoords\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcoords\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1031\u001b[0m \u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1032\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombine_attrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcombine_attrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1033\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1034\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1035\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 1036\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m is an invalid option for the keyword argument\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1037\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m ``combine``\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(combine)\n\u001b[1;32m 1038\u001b[0m )\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/combine.py:982\u001b[0m, in \u001b[0;36mcombine_by_coords\u001b[0;34m(data_objects, compat, data_vars, coords, fill_value, join, combine_attrs, datasets)\u001b[0m\n\u001b[1;32m 980\u001b[0m concatenated_grouped_by_data_vars \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m 981\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m \u001b[38;5;28mvars\u001b[39m, datasets_with_same_vars \u001b[38;5;129;01min\u001b[39;00m grouped_by_vars:\n\u001b[0;32m--> 982\u001b[0m concatenated \u001b[38;5;241m=\u001b[39m \u001b[43m_combine_single_variable_hypercube\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 983\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mlist\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mdatasets_with_same_vars\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 984\u001b[0m \u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 985\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_vars\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_vars\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 986\u001b[0m \u001b[43m \u001b[49m\u001b[43mcoords\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcoords\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 987\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 988\u001b[0m \u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 989\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombine_attrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcombine_attrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 990\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 991\u001b[0m concatenated_grouped_by_data_vars\u001b[38;5;241m.\u001b[39mappend(concatenated)\n\u001b[1;32m 993\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m merge(\n\u001b[1;32m 994\u001b[0m concatenated_grouped_by_data_vars,\n\u001b[1;32m 995\u001b[0m compat\u001b[38;5;241m=\u001b[39mcompat,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 998\u001b[0m combine_attrs\u001b[38;5;241m=\u001b[39mcombine_attrs,\n\u001b[1;32m 999\u001b[0m )\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/combine.py:629\u001b[0m, in \u001b[0;36m_combine_single_variable_hypercube\u001b[0;34m(datasets, fill_value, data_vars, coords, compat, join, combine_attrs)\u001b[0m\n\u001b[1;32m 623\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(datasets) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 624\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 625\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAt least one Dataset is required to resolve variable names \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 626\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfor combined hypercube.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 627\u001b[0m )\n\u001b[0;32m--> 629\u001b[0m combined_ids, concat_dims \u001b[38;5;241m=\u001b[39m \u001b[43m_infer_concat_order_from_coords\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mlist\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mdatasets\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 631\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m fill_value \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 632\u001b[0m \u001b[38;5;66;03m# check that datasets form complete hypercube\u001b[39;00m\n\u001b[1;32m 633\u001b[0m _check_shape_tile_ids(combined_ids)\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/combine.py:116\u001b[0m, in \u001b[0;36m_infer_concat_order_from_coords\u001b[0;34m(datasets)\u001b[0m\n\u001b[1;32m 114\u001b[0m ascending \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 115\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 116\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 117\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCoordinate variable \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m is neither \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 118\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmonotonically increasing nor \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 119\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmonotonically decreasing on all datasets\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(dim)\n\u001b[1;32m 120\u001b[0m )\n\u001b[1;32m 122\u001b[0m \u001b[38;5;66;03m# Assume that any two datasets whose coord along dim starts\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;66;03m# with the same value have the same coord values throughout.\u001b[39;00m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28many\u001b[39m(index\u001b[38;5;241m.\u001b[39msize \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m index \u001b[38;5;129;01min\u001b[39;00m indexes):\n", - "\u001b[0;31mValueError\u001b[0m: Coordinate variable height_ss is neither monotonically increasing nor monotonically decreasing on all datasets" - ] - } - ], - "source": [ - "# Load files as a single dataset\n", - "files_list = files_filter \n", - "ds = act.io.armfiles.read_netcdf(files_list)\n", - "ds.clean.cleanup()\n", - "print(f'{len(files_list)} files loaded')\n", - "ds\n" - ] - }, - { - "cell_type": "markdown", - "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", - "metadata": {}, - "source": [ - "## Plot time series data\n", - "#### Define the list of variables to be plotted:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d09b789e-84f1-4605-846b-a72c110c8048", - "metadata": {}, - "outputs": [], - "source": [ - "variables_to_plot = ['pbl_height_heffter', 'pbl_height_liu_liang', 'pbl_height_bulk_richardson_pt25']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", - "metadata": {}, - "outputs": [], - "source": [ - "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", - "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", - "\n", - "for i,v in enumerate(variables_to_plot):\n", - " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", - " ts_ax.grid()\n", - "\n", - "plt.show()\n" - ] - }, - { - "cell_type": "markdown", - "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", - "metadata": {}, - "source": [ - "## Field selection dropdown menu\n", - "Select variable to be plotted from a dropdown menu" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", - "metadata": {}, - "outputs": [], - "source": [ - "plt.ioff()\n", - "\n", - "# populate dropdown menu with available variables \n", - "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", - "d_variable = 'pbl_height_heffter'\n", - "dropdown = widgets.Dropdown(\n", - " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", - " value= d_variable,\n", - " description='Field:',\n", - " disabled=False,\n", - ")\n", - "dropdown.layout.margin = '0px 30% 0px 20%'\n", - "dropdown.layout.width = '50%'\n", - "\n", - "# set up display\n", - "i_display = act.plotting.TimeSeriesDisplay(ds)\n", - "i_display.add_subplots((1,), figsize = (9.5,5))\n", - "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", - "i_ax.grid()\n", - "i_fig = i_display.fig\n", - "\n", - "# update plot callback function\n", - "def update_plot(change):\n", - " i_ax.cla()\n", - " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", - " i_ax_new.grid()\n", - " i_fig.canvas.draw()\n", - " i_fig.canvas.flush_events()\n", - "\n", - "dropdown.observe(update_plot, names='value')\n", - "\n", - "widgets.AppLayout(\n", - " header=dropdown,\n", - " center=i_fig.canvas,\n", - " pane_heights=[1, 6,1]\n", - ")\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "state": {}, - "version_major": 2, - "version_minor": 0 - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/VAPs/quicklook/PSAP/.ipynb_checkpoints/aospsap3w.c1-checkpoint.ipynb b/VAPs/quicklook/PSAP/.ipynb_checkpoints/aospsap3w.c1-checkpoint.ipynb deleted file mode 100644 index 73feaecb..00000000 --- a/VAPs/quicklook/PSAP/.ipynb_checkpoints/aospsap3w.c1-checkpoint.ipynb +++ /dev/null @@ -1,1841 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "70840257-70e4-45e2-b491-14bff5a257a3", - "metadata": {}, - "source": [ - "# AOSPSAP3W.C1 Plots\n", - "\n", - "[Click here](https://www.arm.gov/capabilities/vaps/psap) for more information about this vap." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "460fd89f-e034-452c-b837-f65c5958264f", - "metadata": {}, - "outputs": [], - "source": [ - "%matplotlib widget\n", - "import ipywidgets as widgets\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import ipywidgets as widgets\n", - "import numpy as np\n", - "import pandas as pd\n", - "import os\n", - "from datetime import datetime\n", - "\n", - "import act\n", - "import xarray as xr\n", - "\n", - "# Data archive directory\n", - "DATA_DIR = r'/data/archive/'\n", - "\n", - "# Datastream info\n", - "DATASTREAM_NAME = 'aospsap3w'\n", - "DATA_LEVEL = 'c1'\n", - "LOCATIONS = [{'end_date': '2013-06-24', 'facility': 'M1', 'site': 'pvc', 'start_date': '2012-07-16'}]" - ] - }, - { - "cell_type": "markdown", - "id": "9faaf875", - "metadata": {}, - "source": [ - "## Define site, facility, and date range" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "ac6764f5", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The following locations and date ranges are available for this VAP:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sitefacilitystart_dateend_date
0pvcM12012-07-162013-06-24
\n", - "
" - ], - "text/plain": [ - " site facility start_date end_date\n", - "0 pvc M1 2012-07-16 2013-06-24" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "print(\"The following locations and date ranges are available for this VAP:\")\n", - "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" - ] - }, - { - "cell_type": "markdown", - "id": "8d132223", - "metadata": {}, - "source": [ - "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "e563983a", - "metadata": {}, - "outputs": [], - "source": [ - "site_facility = ( 'pvc', 'M1' )\n", - "\n", - "date_start = '2013-06-22'\n", - "date_end = '2013-06-24'" - ] - }, - { - "cell_type": "markdown", - "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", - "metadata": {}, - "source": [ - "## Load data files\n", - "Load data files from /data/archive/" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'/data/archive/pvc/pvcaospsap3wM1.c1'" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Compile list of files\n", - "site, facility = site_facility\n", - "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", - "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", - "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", - "dir_path\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "6be8f3dc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['20130622', '20130623', '20130624']" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from datetime import date, timedelta\n", - "import pandas as pd\n", - "\n", - "def get_ARM_formated_dates(start_date, end_date):\n", - " \"\"\"\n", - " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", - " EXAMPLE:\n", - " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", - " >> [\"20180219\", \"20180220\", \"20180221\"] \n", - " \"\"\"\n", - " \n", - " _start_date = pd.to_datetime(start_date)\n", - " _end_date = pd.to_datetime(end_date)\n", - " \n", - " delta = _end_date - _start_date # returns timedelta \n", - " dates = []\n", - "\n", - " for i in range(delta.days + 1):\n", - " day = _start_date + timedelta(days=i)\n", - " day_formated = day.strftime(format=\"%Y%m%d\")\n", - " dates.append(day_formated)\n", - " return dates\n", - "\n", - "\n", - "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "51feea2e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['/data/archive/pvc/pvcaospsap3wM1.c1/pvcaospsap3wM1.c1.20130622.000000.cdf',\n", - " '/data/archive/pvc/pvcaospsap3wM1.c1/pvcaospsap3wM1.c1.20130623.000000.cdf',\n", - " '/data/archive/pvc/pvcaospsap3wM1.c1/pvcaospsap3wM1.c1.20130624.000000.cdf']" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Filter a list of files based on date pattern\n", - "import glob\n", - "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", - "files_filter = []\n", - "for date in dates:\n", - " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", - " files_filter\n", - "files_filter" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "b0e5d0d7", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "3 files loaded\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset>\n",
-       "Dimensions:              (time: 3923)\n",
-       "Coordinates:\n",
-       "  * time                 (time) datetime64[ns] 2013-06-22 ... 2013-06-24T18:0...\n",
-       "Data variables: (12/21)\n",
-       "    base_time            (time) datetime64[ns] 2013-06-22 ... 2013-06-24\n",
-       "    time_offset          (time) datetime64[ns] 2013-06-22 ... 2013-06-24T18:0...\n",
-       "    Ba_B_PSAP3W          (time) float32 dask.array<chunksize=(1440,), meta=np.ndarray>\n",
-       "    qc_Ba_B_PSAP3W       (time) int32 dask.array<chunksize=(1440,), meta=np.ndarray>\n",
-       "    Ba_G_PSAP3W          (time) float32 dask.array<chunksize=(1440,), meta=np.ndarray>\n",
-       "    qc_Ba_G_PSAP3W       (time) int32 dask.array<chunksize=(1440,), meta=np.ndarray>\n",
-       "    ...                   ...\n",
-       "    qc_sample_length     (time) int32 dask.array<chunksize=(1440,), meta=np.ndarray>\n",
-       "    impactor_setting     (time) float32 dask.array<chunksize=(1440,), meta=np.ndarray>\n",
-       "    qc_impactor_setting  (time) int32 dask.array<chunksize=(1440,), meta=np.ndarray>\n",
-       "    lat                  (time) float32 42.03 42.03 42.03 ... 42.03 42.03 42.03\n",
-       "    lon                  (time) float32 -70.05 -70.05 -70.05 ... -70.05 -70.05\n",
-       "    alt                  (time) float32 43.0 43.0 43.0 43.0 ... 43.0 43.0 43.0\n",
-       "Attributes: (12/21)\n",
-       "    command_line:             aosmqc_ingest -s pvc -f M1 -n aosmqc -R -D\n",
-       "    process_version:          ingest-aosmqc-1.2-0.el6\n",
-       "    dod_version:              aospsap3w-c1-1.3\n",
-       "    site_id:                  pvc\n",
-       "    facility_id:              M1: Cape Cod, Massachusetts\n",
-       "    data_level:               c1\n",
-       "    ...                       ...\n",
-       "    datastream:               pvcaospsap3wM1.c1\n",
-       "    history:                  created by user dsmgr on machine tin at 2014-06...\n",
-       "    _file_dates:              ['20130622', '20130623', '20130624']\n",
-       "    _file_times:              ['000000', '000000', '000000']\n",
-       "    _datastream:              pvcaospsap3wM1.c1\n",
-       "    _arm_standards_flag:      1
" - ], - "text/plain": [ - "\n", - "Dimensions: (time: 3923)\n", - "Coordinates:\n", - " * time (time) datetime64[ns] 2013-06-22 ... 2013-06-24T18:0...\n", - "Data variables: (12/21)\n", - " base_time (time) datetime64[ns] 2013-06-22 ... 2013-06-24\n", - " time_offset (time) datetime64[ns] 2013-06-22 ... 2013-06-24T18:0...\n", - " Ba_B_PSAP3W (time) float32 dask.array\n", - " qc_Ba_B_PSAP3W (time) int32 dask.array\n", - " Ba_G_PSAP3W (time) float32 dask.array\n", - " qc_Ba_G_PSAP3W (time) int32 dask.array\n", - " ... ...\n", - " qc_sample_length (time) int32 dask.array\n", - " impactor_setting (time) float32 dask.array\n", - " qc_impactor_setting (time) int32 dask.array\n", - " lat (time) float32 42.03 42.03 42.03 ... 42.03 42.03 42.03\n", - " lon (time) float32 -70.05 -70.05 -70.05 ... -70.05 -70.05\n", - " alt (time) float32 43.0 43.0 43.0 43.0 ... 43.0 43.0 43.0\n", - "Attributes: (12/21)\n", - " command_line: aosmqc_ingest -s pvc -f M1 -n aosmqc -R -D\n", - " process_version: ingest-aosmqc-1.2-0.el6\n", - " dod_version: aospsap3w-c1-1.3\n", - " site_id: pvc\n", - " facility_id: M1: Cape Cod, Massachusetts\n", - " data_level: c1\n", - " ... ...\n", - " datastream: pvcaospsap3wM1.c1\n", - " history: created by user dsmgr on machine tin at 2014-06...\n", - " _file_dates: ['20130622', '20130623', '20130624']\n", - " _file_times: ['000000', '000000', '000000']\n", - " _datastream: pvcaospsap3wM1.c1\n", - " _arm_standards_flag: 1" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load files as a single dataset\n", - "files_list = files_filter \n", - "ds = act.io.armfiles.read_netcdf(files_list)\n", - "ds.clean.cleanup()\n", - "print(f'{len(files_list)} files loaded')\n", - "ds\n" - ] - }, - { - "cell_type": "markdown", - "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", - "metadata": {}, - "source": [ - "## Plot time series data\n", - "#### Define the list of variables to be plotted:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "d09b789e-84f1-4605-846b-a72c110c8048", - "metadata": {}, - "outputs": [], - "source": [ - "variables_to_plot = ['transmittance_blue', 'dqrvar_transmittance_blue', 'transmittance_green']" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", - "metadata": {}, - "outputs": [ - { - "ename": "KeyError", - "evalue": "'transmittance_blue'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[9], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m ts_display\u001b[38;5;241m.\u001b[39madd_subplots((\u001b[38;5;28mlen\u001b[39m(variables_to_plot),), figsize \u001b[38;5;241m=\u001b[39m (\u001b[38;5;241m9.5\u001b[39m,\u001b[38;5;241m4\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mlen\u001b[39m(variables_to_plot)))\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i,v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(variables_to_plot):\n\u001b[0;32m----> 5\u001b[0m ts_ax \u001b[38;5;241m=\u001b[39m ts_display\u001b[38;5;241m.\u001b[39mplot(v, subplot_index\u001b[38;5;241m=\u001b[39m(i,), set_title\u001b[38;5;241m=\u001b[39m\u001b[43mds\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvariables\u001b[49m\u001b[43m[\u001b[49m\u001b[43mv\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241m.\u001b[39mattrs[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlong_name\u001b[39m\u001b[38;5;124m'\u001b[39m],)\n\u001b[1;32m 6\u001b[0m ts_ax\u001b[38;5;241m.\u001b[39mgrid()\n\u001b[1;32m 8\u001b[0m plt\u001b[38;5;241m.\u001b[39mshow()\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/utils.py:453\u001b[0m, in \u001b[0;36mFrozen.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 452\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__getitem__\u001b[39m(\u001b[38;5;28mself\u001b[39m, key: K) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m V:\n\u001b[0;32m--> 453\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmapping\u001b[49m\u001b[43m[\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m]\u001b[49m\n", - "\u001b[0;31mKeyError\u001b[0m: 'transmittance_blue'" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "f164bc0d8b484beaa248597fc1245960", - "version_major": 2, - "version_minor": 0 - }, - "image/png": "", - "text/html": [ - "\n", - "
\n", - "
\n", - " Figure\n", - "
\n", - " \n", - "
\n", - " " - ], - "text/plain": [ - "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", - "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", - "\n", - "for i,v in enumerate(variables_to_plot):\n", - " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", - " ts_ax.grid()\n", - "\n", - "plt.show()\n" - ] - }, - { - "cell_type": "markdown", - "id": "194399aa-1907-452b-8ba9-bc31d7f60291", - "metadata": {}, - "source": [ - "## Quality check plots\n", - "#### Define variable for QC plot" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", - "metadata": {}, - "outputs": [], - "source": [ - "qc_variable = 'transmittance_blue'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", - "metadata": {}, - "outputs": [], - "source": [ - "# QC Plot\n", - "if ('qc_'+qc_variable) in ds.variables:\n", - "\n", - " # Plot\n", - " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", - " qc_display.add_subplots((2,), figsize = (9.5,10))\n", - " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", - " qc_ax.grid()\n", - " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", - "\n", - " plt.show()\n", - "else:\n", - " print(f'QC not available for the selected field: {qc_variable}')\n" - ] - }, - { - "cell_type": "markdown", - "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", - "metadata": {}, - "source": [ - "## Field selection dropdown menu\n", - "Select variable to be plotted from a dropdown menu" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", - "metadata": {}, - "outputs": [], - "source": [ - "plt.ioff()\n", - "\n", - "# populate dropdown menu with available variables \n", - "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", - "d_variable = 'transmittance_blue'\n", - "dropdown = widgets.Dropdown(\n", - " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", - " value= d_variable,\n", - " description='Field:',\n", - " disabled=False,\n", - ")\n", - "dropdown.layout.margin = '0px 30% 0px 20%'\n", - "dropdown.layout.width = '50%'\n", - "\n", - "# set up display\n", - "i_display = act.plotting.TimeSeriesDisplay(ds)\n", - "i_display.add_subplots((1,), figsize = (9.5,5))\n", - "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", - "i_ax.grid()\n", - "i_fig = i_display.fig\n", - "\n", - "# update plot callback function\n", - "def update_plot(change):\n", - " i_ax.cla()\n", - " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", - " i_ax_new.grid()\n", - " i_fig.canvas.draw()\n", - " i_fig.canvas.flush_events()\n", - "\n", - "dropdown.observe(update_plot, names='value')\n", - "\n", - "widgets.AppLayout(\n", - " header=dropdown,\n", - " center=i_fig.canvas,\n", - " pane_heights=[1, 6,1]\n", - ")\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "state": {}, - "version_major": 2, - "version_minor": 0 - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/VAPs/quicklook/RADFLUXANAL/.ipynb_checkpoints/radflux1long.c1-checkpoint.ipynb b/VAPs/quicklook/RADFLUXANAL/.ipynb_checkpoints/radflux1long.c1-checkpoint.ipynb deleted file mode 100644 index 771298d9..00000000 --- a/VAPs/quicklook/RADFLUXANAL/.ipynb_checkpoints/radflux1long.c1-checkpoint.ipynb +++ /dev/null @@ -1,3763 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "70840257-70e4-45e2-b491-14bff5a257a3", - "metadata": {}, - "source": [ - "# RADFLUX1LONG.C1 Plots\n", - "\n", - "[Click here](https://www.arm.gov/capabilities/vaps/radfluxanal) for more information about this vap." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "460fd89f-e034-452c-b837-f65c5958264f", - "metadata": {}, - "outputs": [], - "source": [ - "%matplotlib widget\n", - "import ipywidgets as widgets\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import ipywidgets as widgets\n", - "import numpy as np\n", - "import pandas as pd\n", - "import os\n", - "from datetime import datetime\n", - "\n", - "import act\n", - "import xarray as xr\n", - "\n", - "# Data archive directory\n", - "DATA_DIR = r'/data/archive/'\n", - "\n", - "# Datastream info\n", - "DATASTREAM_NAME = 'radflux1long'\n", - "DATA_LEVEL = 'c1'\n", - "LOCATIONS = [{'end_date': '2023-09-14', 'facility': 'C1', 'site': 'nsa', 'start_date': '2021-07-04'}, {'end_date': '2021-06-12', 'facility': 'M1', 'site': 'oli', 'start_date': '2020-10-16'}, {'end_date': '2023-09-28', 'facility': 'E11', 'site': 'sgp', 'start_date': '2020-06-08'}, {'end_date': '2023-10-30', 'facility': 'E12', 'site': 'sgp', 'start_date': '2020-06-07'}, {'end_date': '2023-10-26', 'facility': 'E13', 'site': 'sgp', 'start_date': '2020-06-03'}, {'end_date': '2023-09-28', 'facility': 'E15', 'site': 'sgp', 'start_date': '2021-07-12'}, {'end_date': '2023-09-28', 'facility': 'E9', 'site': 'sgp', 'start_date': '2021-07-04'}, {'end_date': '2021-09-20', 'facility': 'E31', 'site': 'sgp', 'start_date': '2020-06-08'}, {'end_date': '2023-10-31', 'facility': 'E32', 'site': 'sgp', 'start_date': '2021-07-03'}, {'end_date': '2023-10-31', 'facility': 'E33', 'site': 'sgp', 'start_date': '2020-06-11'}, {'end_date': '2023-09-28', 'facility': 'E34', 'site': 'sgp', 'start_date': '2020-06-18'}, {'end_date': '2023-09-28', 'facility': 'E35', 'site': 'sgp', 'start_date': '2019-06-24'}, {'end_date': '2023-09-28', 'facility': 'E36', 'site': 'sgp', 'start_date': '2020-06-26'}, {'end_date': '2023-10-31', 'facility': 'E37', 'site': 'sgp', 'start_date': '2020-07-01'}, {'end_date': '2021-05-29', 'facility': 'E38', 'site': 'sgp', 'start_date': '2020-06-25'}, {'end_date': '2023-10-30', 'facility': 'E39', 'site': 'sgp', 'start_date': '2020-09-07'}, {'end_date': '2023-09-28', 'facility': 'E40', 'site': 'sgp', 'start_date': '2020-09-07'}, {'end_date': '2023-08-02', 'facility': 'E41', 'site': 'sgp', 'start_date': '2021-07-31'}]" - ] - }, - { - "cell_type": "markdown", - "id": "9faaf875", - "metadata": {}, - "source": [ - "## Define site, facility, and date range" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "ac6764f5", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The following locations and date ranges are available for this VAP:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sitefacilitystart_dateend_date
0nsaC12021-07-042023-09-14
1oliM12020-10-162021-06-12
2sgpE112020-06-082023-09-28
3sgpE122020-06-072023-10-30
4sgpE132020-06-032023-10-26
5sgpE152021-07-122023-09-28
6sgpE92021-07-042023-09-28
7sgpE312020-06-082021-09-20
8sgpE322021-07-032023-10-31
9sgpE332020-06-112023-10-31
10sgpE342020-06-182023-09-28
11sgpE352019-06-242023-09-28
12sgpE362020-06-262023-09-28
13sgpE372020-07-012023-10-31
14sgpE382020-06-252021-05-29
15sgpE392020-09-072023-10-30
16sgpE402020-09-072023-09-28
17sgpE412021-07-312023-08-02
\n", - "
" - ], - "text/plain": [ - " site facility start_date end_date\n", - "0 nsa C1 2021-07-04 2023-09-14\n", - "1 oli M1 2020-10-16 2021-06-12\n", - "2 sgp E11 2020-06-08 2023-09-28\n", - "3 sgp E12 2020-06-07 2023-10-30\n", - "4 sgp E13 2020-06-03 2023-10-26\n", - "5 sgp E15 2021-07-12 2023-09-28\n", - "6 sgp E9 2021-07-04 2023-09-28\n", - "7 sgp E31 2020-06-08 2021-09-20\n", - "8 sgp E32 2021-07-03 2023-10-31\n", - "9 sgp E33 2020-06-11 2023-10-31\n", - "10 sgp E34 2020-06-18 2023-09-28\n", - "11 sgp E35 2019-06-24 2023-09-28\n", - "12 sgp E36 2020-06-26 2023-09-28\n", - "13 sgp E37 2020-07-01 2023-10-31\n", - "14 sgp E38 2020-06-25 2021-05-29\n", - "15 sgp E39 2020-09-07 2023-10-30\n", - "16 sgp E40 2020-09-07 2023-09-28\n", - "17 sgp E41 2021-07-31 2023-08-02" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "print(\"The following locations and date ranges are available for this VAP:\")\n", - "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" - ] - }, - { - "cell_type": "markdown", - "id": "8d132223", - "metadata": {}, - "source": [ - "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "e563983a", - "metadata": {}, - "outputs": [], - "source": [ - "site_facility = ( 'sgp', 'E11' )\n", - "\n", - "date_start = '2023-09-25'\n", - "date_end = '2023-09-27'" - ] - }, - { - "cell_type": "markdown", - "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", - "metadata": {}, - "source": [ - "## Load data files\n", - "Load data files from /data/archive/" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'/data/archive/sgp/sgpradflux1longE11.c1'" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Compile list of files\n", - "site, facility = site_facility\n", - "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", - "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", - "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", - "dir_path\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "6be8f3dc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['20230925', '20230926', '20230927']" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from datetime import date, timedelta\n", - "import pandas as pd\n", - "\n", - "def get_ARM_formated_dates(start_date, end_date):\n", - " \"\"\"\n", - " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", - " EXAMPLE:\n", - " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", - " >> [\"20180219\", \"20180220\", \"20180221\"] \n", - " \"\"\"\n", - " \n", - " _start_date = pd.to_datetime(start_date)\n", - " _end_date = pd.to_datetime(end_date)\n", - " \n", - " delta = _end_date - _start_date # returns timedelta \n", - " dates = []\n", - "\n", - " for i in range(delta.days + 1):\n", - " day = _start_date + timedelta(days=i)\n", - " day_formated = day.strftime(format=\"%Y%m%d\")\n", - " dates.append(day_formated)\n", - " return dates\n", - "\n", - "\n", - "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "51feea2e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['/data/archive/sgp/sgpradflux1longE11.c1/sgpradflux1longE11.c1.20230925.070000.nc',\n", - " '/data/archive/sgp/sgpradflux1longE11.c1/sgpradflux1longE11.c1.20230926.070000.nc',\n", - " '/data/archive/sgp/sgpradflux1longE11.c1/sgpradflux1longE11.c1.20230927.070000.nc']" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Filter a list of files based on date pattern\n", - "import glob\n", - "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", - "files_filter = []\n", - "for date in dates:\n", - " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", - " files_filter\n", - "files_filter" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "b0e5d0d7", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "3 files loaded\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset>\n",
-       "Dimensions:                                        (time: 4320, bound: 2)\n",
-       "Coordinates:\n",
-       "  * time                                           (time) datetime64[ns] 2023...\n",
-       "Dimensions without coordinates: bound\n",
-       "Data variables: (12/54)\n",
-       "    base_time                                      (time) datetime64[ns] 2023...\n",
-       "    time_offset                                    (time) datetime64[ns] 2023...\n",
-       "    time_bounds                                    (time, bound) object dask.array<chunksize=(1440, 2), meta=np.ndarray>\n",
-       "    downwelling_shortwave                          (time) float32 dask.array<chunksize=(1440,), meta=np.ndarray>\n",
-       "    source_downwelling_shortwave                   (time) int32 dask.array<chunksize=(1440,), meta=np.ndarray>\n",
-       "    qc_downwelling_shortwave                       (time) int32 dask.array<chunksize=(1440,), meta=np.ndarray>\n",
-       "    ...                                             ...\n",
-       "    qc_pressure                                    (time) int32 dask.array<chunksize=(1440,), meta=np.ndarray>\n",
-       "    precipitation                                  (time) float32 dask.array<chunksize=(1440,), meta=np.ndarray>\n",
-       "    qc_precipitation                               (time) int32 dask.array<chunksize=(1440,), meta=np.ndarray>\n",
-       "    lat                                            (time) float32 36.88 ... 3...\n",
-       "    lon                                            (time) float32 -98.29 ... ...\n",
-       "    alt                                            (time) float32 360.0 ... 3...\n",
-       "Attributes: (12/21)\n",
-       "    command_line:            radflux1long -s sgp -f E11 -b 20230901 -e 202310...\n",
-       "    Conventions:             ARM-1.3\n",
-       "    process_version:         radflux1long-3.16.0\n",
-       "    dod_version:             radflux1long-c1-1.6\n",
-       "    input_datastreams:       sgpqcrad1longE11.c1 : 6.6 : 20230629.000000-2023...\n",
-       "    site_id:                 sgp\n",
-       "    ...                      ...\n",
-       "    fitmode_comment:         01 = daily_fit 00 =  1_fit\n",
-       "    history:                 created by user dsmgr on machine prod-proc2.adc....\n",
-       "    _file_dates:             ['20230925', '20230926', '20230927']\n",
-       "    _file_times:             ['070000', '070000', '070000']\n",
-       "    _datastream:             sgpradflux1longE11.c1\n",
-       "    _arm_standards_flag:     1
" - ], - "text/plain": [ - "\n", - "Dimensions: (time: 4320, bound: 2)\n", - "Coordinates:\n", - " * time (time) datetime64[ns] 2023...\n", - "Dimensions without coordinates: bound\n", - "Data variables: (12/54)\n", - " base_time (time) datetime64[ns] 2023...\n", - " time_offset (time) datetime64[ns] 2023...\n", - " time_bounds (time, bound) object dask.array\n", - " downwelling_shortwave (time) float32 dask.array\n", - " source_downwelling_shortwave (time) int32 dask.array\n", - " qc_downwelling_shortwave (time) int32 dask.array\n", - " ... ...\n", - " qc_pressure (time) int32 dask.array\n", - " precipitation (time) float32 dask.array\n", - " qc_precipitation (time) int32 dask.array\n", - " lat (time) float32 36.88 ... 3...\n", - " lon (time) float32 -98.29 ... ...\n", - " alt (time) float32 360.0 ... 3...\n", - "Attributes: (12/21)\n", - " command_line: radflux1long -s sgp -f E11 -b 20230901 -e 202310...\n", - " Conventions: ARM-1.3\n", - " process_version: radflux1long-3.16.0\n", - " dod_version: radflux1long-c1-1.6\n", - " input_datastreams: sgpqcrad1longE11.c1 : 6.6 : 20230629.000000-2023...\n", - " site_id: sgp\n", - " ... ...\n", - " fitmode_comment: 01 = daily_fit 00 = 1_fit\n", - " history: created by user dsmgr on machine prod-proc2.adc....\n", - " _file_dates: ['20230925', '20230926', '20230927']\n", - " _file_times: ['070000', '070000', '070000']\n", - " _datastream: sgpradflux1longE11.c1\n", - " _arm_standards_flag: 1" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load files as a single dataset\n", - "files_list = files_filter \n", - "ds = act.io.armfiles.read_netcdf(files_list)\n", - "ds.clean.cleanup()\n", - "print(f'{len(files_list)} files loaded')\n", - "ds\n" - ] - }, - { - "cell_type": "markdown", - "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", - "metadata": {}, - "source": [ - "## Plot time series data\n", - "#### Define the list of variables to be plotted:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "d09b789e-84f1-4605-846b-a72c110c8048", - "metadata": {}, - "outputs": [], - "source": [ - "variables_to_plot = ['downwelling_shortwave', 'clearsky_downwelling_shortwave', 'downwelling_longwave']" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", - "metadata": {}, - "outputs": [], - "source": [ - "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", - "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", - "\n", - "for i,v in enumerate(variables_to_plot):\n", - " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", - " ts_ax.grid()\n", - "\n", - "plt.show()\n" - ] - }, - { - "cell_type": "markdown", - "id": "194399aa-1907-452b-8ba9-bc31d7f60291", - "metadata": {}, - "source": [ - "## Quality check plots\n", - "#### Define variable for QC plot" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", - "metadata": {}, - "outputs": [], - "source": [ - "qc_variable = 'downwelling_shortwave'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", - "metadata": {}, - "outputs": [], - "source": [ - "# QC Plot\n", - "if ('qc_'+qc_variable) in ds.variables:\n", - "\n", - " # Plot\n", - " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", - " qc_display.add_subplots((2,), figsize = (9.5,10))\n", - " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", - " qc_ax.grid()\n", - " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", - "\n", - " plt.show()\n", - "else:\n", - " print(f'QC not available for the selected field: {qc_variable}')\n" - ] - }, - { - "cell_type": "markdown", - "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", - "metadata": {}, - "source": [ - "## Field selection dropdown menu\n", - "Select variable to be plotted from a dropdown menu" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", - "metadata": {}, - "outputs": [], - "source": [ - "plt.ioff()\n", - "\n", - "# populate dropdown menu with available variables \n", - "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", - "d_variable = 'downwelling_shortwave'\n", - "dropdown = widgets.Dropdown(\n", - " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", - " value= d_variable,\n", - " description='Field:',\n", - " disabled=False,\n", - ")\n", - "dropdown.layout.margin = '0px 30% 0px 20%'\n", - "dropdown.layout.width = '50%'\n", - "\n", - "# set up display\n", - "i_display = act.plotting.TimeSeriesDisplay(ds)\n", - "i_display.add_subplots((1,), figsize = (9.5,5))\n", - "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", - "i_ax.grid()\n", - "i_fig = i_display.fig\n", - "\n", - "# update plot callback function\n", - "def update_plot(change):\n", - " i_ax.cla()\n", - " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", - " i_ax_new.grid()\n", - " i_fig.canvas.draw()\n", - " i_fig.canvas.flush_events()\n", - "\n", - "dropdown.observe(update_plot, names='value')\n", - "\n", - "widgets.AppLayout(\n", - " header=dropdown,\n", - " center=i_fig.canvas,\n", - " pane_heights=[1, 6,1]\n", - ")\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.12" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "state": {}, - "version_major": 2, - "version_minor": 0 - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/VAPs/quicklook/SACRADV3D3C/.ipynb_checkpoints/kasacradv3d3c.c1-checkpoint.ipynb b/VAPs/quicklook/SACRADV3D3C/.ipynb_checkpoints/kasacradv3d3c.c1-checkpoint.ipynb deleted file mode 100644 index 8d364ba7..00000000 --- a/VAPs/quicklook/SACRADV3D3C/.ipynb_checkpoints/kasacradv3d3c.c1-checkpoint.ipynb +++ /dev/null @@ -1,2574 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "70840257-70e4-45e2-b491-14bff5a257a3", - "metadata": {}, - "source": [ - "# KASACRADV3D3C.C1 Plots\n", - "\n", - "[Click here](https://www.arm.gov/capabilities/vaps/sacradv3d3c) for more information about this vap." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "460fd89f-e034-452c-b837-f65c5958264f", - "metadata": {}, - "outputs": [], - "source": [ - "%matplotlib widget\n", - "import ipywidgets as widgets\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import ipywidgets as widgets\n", - "import numpy as np\n", - "import pandas as pd\n", - "import os\n", - "from datetime import datetime\n", - "\n", - "import act\n", - "import xarray as xr\n", - "\n", - "# Data archive directory\n", - "DATA_DIR = r'/data/archive/'\n", - "\n", - "# Datastream info\n", - "DATASTREAM_NAME = 'kasacradv3d3c'\n", - "DATA_LEVEL = 'c1'\n", - "LOCATIONS = [{'end_date': '2012-08-31', 'facility': 'C1', 'site': 'sgp', 'start_date': '2012-08-01'}]" - ] - }, - { - "cell_type": "markdown", - "id": "9faaf875", - "metadata": {}, - "source": [ - "## Define site, facility, and date range" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "ac6764f5", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The following locations and date ranges are available for this VAP:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sitefacilitystart_dateend_date
0sgpC12012-08-012012-08-31
\n", - "
" - ], - "text/plain": [ - " site facility start_date end_date\n", - "0 sgp C1 2012-08-01 2012-08-31" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "print(\"The following locations and date ranges are available for this VAP:\")\n", - "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" - ] - }, - { - "cell_type": "markdown", - "id": "8d132223", - "metadata": {}, - "source": [ - "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "e563983a", - "metadata": {}, - "outputs": [], - "source": [ - "site_facility = ( 'sgp', 'C1' )\n", - "\n", - "date_start = '2012-08-29'\n", - "date_end = '2012-08-31'" - ] - }, - { - "cell_type": "markdown", - "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", - "metadata": {}, - "source": [ - "## Load data files\n", - "Load data files from /data/archive/" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'/data/archive/sgp/sgpkasacradv3d3cC1.c1'" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Compile list of files\n", - "site, facility = site_facility\n", - "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", - "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", - "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", - "dir_path\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "6be8f3dc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['20120829', '20120830', '20120831']" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from datetime import date, timedelta\n", - "import pandas as pd\n", - "\n", - "def get_ARM_formated_dates(start_date, end_date):\n", - " \"\"\"\n", - " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", - " EXAMPLE:\n", - " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", - " >> [\"20180219\", \"20180220\", \"20180221\"] \n", - " \"\"\"\n", - " \n", - " _start_date = pd.to_datetime(start_date)\n", - " _end_date = pd.to_datetime(end_date)\n", - " \n", - " delta = _end_date - _start_date # returns timedelta \n", - " dates = []\n", - "\n", - " for i in range(delta.days + 1):\n", - " day = _start_date + timedelta(days=i)\n", - " day_formated = day.strftime(format=\"%Y%m%d\")\n", - " dates.append(day_formated)\n", - " return dates\n", - "\n", - "\n", - "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "51feea2e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.025008.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.153009.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.201628.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.002402.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.175243.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.195648.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.130446.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.081435.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.151031.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.173303.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.030947.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.054747.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.124506.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.104113.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.075456.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.004340.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.102134.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.052807.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.221741.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.051559.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120829.223721.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.202247.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.104350.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.131114.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.030911.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.010234.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.173849.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.224833.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.004254.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.075837.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.081817.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.053252.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.125133.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.102411.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.153540.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.055231.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.151600.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.222853.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.175829.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.200306.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120830.032851.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.060543.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.054603.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.230808.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.202355.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.103937.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.130632.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.105916.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.224825.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.011234.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.175952.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.031811.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.033753.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.005255.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.132611.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.181932.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.153200.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.204334.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.083147.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.155139.nc',\n", - " '/data/archive/sgp/sgpkasacradv3d3cC1.c1/sgpkasacradv3d3cC1.c1.20120831.081207.nc']" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Filter a list of files based on date pattern\n", - "import glob\n", - "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", - "files_filter = []\n", - "for date in dates:\n", - " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", - " files_filter\n", - "files_filter" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "b0e5d0d7", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "61 files loaded\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset>\n",
-       "Dimensions:                                       (time: 23687, bound: 2,\n",
-       "                                                   height: 201, bin: 28,\n",
-       "                                                   isoline: 4, h_distance: 401,\n",
-       "                                                   frequency: 1)\n",
-       "Coordinates:\n",
-       "  * time                                          (time) datetime64[ns] 2012-...\n",
-       "  * height                                        (height) float32 0.0 ... 10.0\n",
-       "  * bin                                           (bin) float32 -47.5 ... 20.0\n",
-       "  * isoline                                       (isoline) float32 5.0 ... 20.0\n",
-       "  * h_distance                                    (h_distance) float32 -1e+04...\n",
-       "  * frequency                                     (frequency) float32 3.529e+10\n",
-       "Dimensions without coordinates: bound\n",
-       "Data variables: (12/20)\n",
-       "    base_time                                     (time) datetime64[ns] 2012-...\n",
-       "    time_offset                                   (time) datetime64[ns] 2012-...\n",
-       "    time_bounds                                   (time, bound) object dask.array<chunksize=(392, 2), meta=np.ndarray>\n",
-       "    height_bounds                                 (time, height, bound) float32 dask.array<chunksize=(392, 201, 2), meta=np.ndarray>\n",
-       "    bin_bounds                                    (time, bin, bound) float32 dask.array<chunksize=(392, 28, 2), meta=np.ndarray>\n",
-       "    isoline_bounds                                (time, isoline, bound) float32 dask.array<chunksize=(392, 4, 2), meta=np.ndarray>\n",
-       "    ...                                            ...\n",
-       "    cloud_fraction                                (time, isoline, height) float32 dask.array<chunksize=(392, 4, 201), meta=np.ndarray>\n",
-       "    cloud_fraction_std                            (time, isoline, height) float32 dask.array<chunksize=(392, 4, 201), meta=np.ndarray>\n",
-       "    cfad                                          (time, bin, height) float32 dask.array<chunksize=(392, 28, 201), meta=np.ndarray>\n",
-       "    lat                                           (time) float32 36.6 ... 36.6\n",
-       "    lon                                           (time) float32 -97.49 ... -...\n",
-       "    alt                                           (time) float32 318.0 ... 318.0\n",
-       "Attributes: (12/20)\n",
-       "    command_line:          sacradv3d3c -s sgp -f C1 -b 20120829 -n sacradv3d3...\n",
-       "    process_version:       vap-sacradv3d3c-1.1-0.el6\n",
-       "    dod_version:           kasacradv3d3c-c1-1.2\n",
-       "    input_datastreams:     sgpkasacrcorcwrhiC1.c1 : 1.0 : 20120829.002403-201...\n",
-       "    site_id:               sgp\n",
-       "    platform_id:           kasacradv3d3c\n",
-       "    ...                    ...\n",
-       "    radar_beam_width_h:    0.311\n",
-       "    history:               created by user singh on machine amber at 2018-12-...\n",
-       "    _file_dates:           ['20120829', '20120829', '20120829', '20120829', '...\n",
-       "    _file_times:           ['002402', '004340', '025008', '030947', '051559',...\n",
-       "    _datastream:           sgpkasacradv3d3cC1.c1\n",
-       "    _arm_standards_flag:   1
" - ], - "text/plain": [ - "\n", - "Dimensions: (time: 23687, bound: 2,\n", - " height: 201, bin: 28,\n", - " isoline: 4, h_distance: 401,\n", - " frequency: 1)\n", - "Coordinates:\n", - " * time (time) datetime64[ns] 2012-...\n", - " * height (height) float32 0.0 ... 10.0\n", - " * bin (bin) float32 -47.5 ... 20.0\n", - " * isoline (isoline) float32 5.0 ... 20.0\n", - " * h_distance (h_distance) float32 -1e+04...\n", - " * frequency (frequency) float32 3.529e+10\n", - "Dimensions without coordinates: bound\n", - "Data variables: (12/20)\n", - " base_time (time) datetime64[ns] 2012-...\n", - " time_offset (time) datetime64[ns] 2012-...\n", - " time_bounds (time, bound) object dask.array\n", - " height_bounds (time, height, bound) float32 dask.array\n", - " bin_bounds (time, bin, bound) float32 dask.array\n", - " isoline_bounds (time, isoline, bound) float32 dask.array\n", - " ... ...\n", - " cloud_fraction (time, isoline, height) float32 dask.array\n", - " cloud_fraction_std (time, isoline, height) float32 dask.array\n", - " cfad (time, bin, height) float32 dask.array\n", - " lat (time) float32 36.6 ... 36.6\n", - " lon (time) float32 -97.49 ... -...\n", - " alt (time) float32 318.0 ... 318.0\n", - "Attributes: (12/20)\n", - " command_line: sacradv3d3c -s sgp -f C1 -b 20120829 -n sacradv3d3...\n", - " process_version: vap-sacradv3d3c-1.1-0.el6\n", - " dod_version: kasacradv3d3c-c1-1.2\n", - " input_datastreams: sgpkasacrcorcwrhiC1.c1 : 1.0 : 20120829.002403-201...\n", - " site_id: sgp\n", - " platform_id: kasacradv3d3c\n", - " ... ...\n", - " radar_beam_width_h: 0.311\n", - " history: created by user singh on machine amber at 2018-12-...\n", - " _file_dates: ['20120829', '20120829', '20120829', '20120829', '...\n", - " _file_times: ['002402', '004340', '025008', '030947', '051559',...\n", - " _datastream: sgpkasacradv3d3cC1.c1\n", - " _arm_standards_flag: 1" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load files as a single dataset\n", - "files_list = files_filter \n", - "ds = act.io.armfiles.read_netcdf(files_list)\n", - "ds.clean.cleanup()\n", - "print(f'{len(files_list)} files loaded')\n", - "ds\n" - ] - }, - { - "cell_type": "markdown", - "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", - "metadata": {}, - "source": [ - "## Plot time series data\n", - "#### Define the list of variables to be plotted:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "d09b789e-84f1-4605-846b-a72c110c8048", - "metadata": {}, - "outputs": [], - "source": [ - "variables_to_plot = ['wind_speed', 'wind_direction', 'cloud_fraction']" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", - "metadata": {}, - "outputs": [ - { - "ename": "TypeError", - "evalue": "Dimensions of C (201, 4, 23687) should be one smaller than X(23687) and Y(4) while using shading='flat' see help(pcolormesh)", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[9], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m ts_display\u001b[38;5;241m.\u001b[39madd_subplots((\u001b[38;5;28mlen\u001b[39m(variables_to_plot),), figsize \u001b[38;5;241m=\u001b[39m (\u001b[38;5;241m9.5\u001b[39m,\u001b[38;5;241m4\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mlen\u001b[39m(variables_to_plot)))\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i,v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(variables_to_plot):\n\u001b[0;32m----> 5\u001b[0m ts_ax \u001b[38;5;241m=\u001b[39m \u001b[43mts_display\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mplot\u001b[49m\u001b[43m(\u001b[49m\u001b[43mv\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msubplot_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mi\u001b[49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mset_title\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mds\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvariables\u001b[49m\u001b[43m[\u001b[49m\u001b[43mv\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mattrs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mlong_name\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6\u001b[0m ts_ax\u001b[38;5;241m.\u001b[39mgrid()\n\u001b[1;32m 8\u001b[0m plt\u001b[38;5;241m.\u001b[39mshow()\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/plotting/timeseriesdisplay.py:588\u001b[0m, in \u001b[0;36mTimeSeriesDisplay.plot\u001b[0;34m(self, field, dsname, subplot_index, cmap, set_title, add_nan, day_night_background, invert_y_axis, abs_limits, time_rng, y_rng, use_var_for_y, set_shading, assessment_overplot, overplot_marker, overplot_behind, overplot_markersize, assessment_overplot_category, assessment_overplot_category_color, force_line_plot, labels, cbar_label, cbar_h_adjust, secondary_y, y_axis_flag_meanings, colorbar_labels, cb_friendly, **kwargs)\u001b[0m\n\u001b[1;32m 586\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 587\u001b[0m kwargs[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mface\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m--> 588\u001b[0m mesh \u001b[38;5;241m=\u001b[39m \u001b[43max\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpcolormesh\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 589\u001b[0m \u001b[43m \u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43masarray\u001b[49m\u001b[43m(\u001b[49m\u001b[43mxdata\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 590\u001b[0m \u001b[43m \u001b[49m\u001b[43mydata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 591\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtranspose\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 592\u001b[0m \u001b[43m \u001b[49m\u001b[43mshading\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mset_shading\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 593\u001b[0m \u001b[43m \u001b[49m\u001b[43mcmap\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcmap\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 594\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 595\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 597\u001b[0m \u001b[38;5;66;03m# Set Title\u001b[39;00m\n\u001b[1;32m 598\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m set_title \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/__init__.py:1442\u001b[0m, in \u001b[0;36m_preprocess_data..inner\u001b[0;34m(ax, data, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1439\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 1440\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minner\u001b[39m(ax, \u001b[38;5;241m*\u001b[39margs, data\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 1441\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m data \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1442\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43max\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mmap\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43msanitize_sequence\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1444\u001b[0m bound \u001b[38;5;241m=\u001b[39m new_sig\u001b[38;5;241m.\u001b[39mbind(ax, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 1445\u001b[0m auto_label \u001b[38;5;241m=\u001b[39m (bound\u001b[38;5;241m.\u001b[39marguments\u001b[38;5;241m.\u001b[39mget(label_namer)\n\u001b[1;32m 1446\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m bound\u001b[38;5;241m.\u001b[39mkwargs\u001b[38;5;241m.\u001b[39mget(label_namer))\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/axes/_axes.py:6220\u001b[0m, in \u001b[0;36mAxes.pcolormesh\u001b[0;34m(self, alpha, norm, cmap, vmin, vmax, shading, antialiased, *args, **kwargs)\u001b[0m\n\u001b[1;32m 6217\u001b[0m shading \u001b[38;5;241m=\u001b[39m shading\u001b[38;5;241m.\u001b[39mlower()\n\u001b[1;32m 6218\u001b[0m kwargs\u001b[38;5;241m.\u001b[39msetdefault(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnone\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m-> 6220\u001b[0m X, Y, C, shading \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_pcolorargs\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mpcolormesh\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6221\u001b[0m \u001b[43m \u001b[49m\u001b[43mshading\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mshading\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6222\u001b[0m coords \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mstack([X, Y], axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m 6223\u001b[0m \u001b[38;5;66;03m# convert to one dimensional array, except for 3D RGB(A) arrays\u001b[39;00m\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/axes/_axes.py:5751\u001b[0m, in \u001b[0;36mAxes._pcolorargs\u001b[0;34m(self, funcname, shading, *args, **kwargs)\u001b[0m\n\u001b[1;32m 5749\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m shading \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mflat\u001b[39m\u001b[38;5;124m'\u001b[39m:\n\u001b[1;32m 5750\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (Nx, Ny) \u001b[38;5;241m!=\u001b[39m (ncols \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m, nrows \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m):\n\u001b[0;32m-> 5751\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDimensions of C \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mC\u001b[38;5;241m.\u001b[39mshape\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m should\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 5752\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m be one smaller than X(\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mNx\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m) and Y(\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mNy\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 5753\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m while using shading=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mflat\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 5754\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m see help(\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfuncname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 5755\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m: \u001b[38;5;66;03m# ['nearest', 'gouraud']:\u001b[39;00m\n\u001b[1;32m 5756\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (Nx, Ny) \u001b[38;5;241m!=\u001b[39m (ncols, nrows):\n", - "\u001b[0;31mTypeError\u001b[0m: Dimensions of C (201, 4, 23687) should be one smaller than X(23687) and Y(4) while using shading='flat' see help(pcolormesh)" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "8189dde8d6e0443cb4732d2453c2a30b", - "version_major": 2, - "version_minor": 0 - }, - "image/png": "", - "text/html": [ - "\n", - "
\n", - "
\n", - " Figure\n", - "
\n", - " \n", - "
\n", - " " - ], - "text/plain": [ - "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", - "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", - "\n", - "for i,v in enumerate(variables_to_plot):\n", - " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", - " ts_ax.grid()\n", - "\n", - "plt.show()\n" - ] - }, - { - "cell_type": "markdown", - "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", - "metadata": {}, - "source": [ - "## Field selection dropdown menu\n", - "Select variable to be plotted from a dropdown menu" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", - "metadata": {}, - "outputs": [], - "source": [ - "plt.ioff()\n", - "\n", - "# populate dropdown menu with available variables \n", - "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", - "d_variable = 'wind_speed'\n", - "dropdown = widgets.Dropdown(\n", - " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", - " value= d_variable,\n", - " description='Field:',\n", - " disabled=False,\n", - ")\n", - "dropdown.layout.margin = '0px 30% 0px 20%'\n", - "dropdown.layout.width = '50%'\n", - "\n", - "# set up display\n", - "i_display = act.plotting.TimeSeriesDisplay(ds)\n", - "i_display.add_subplots((1,), figsize = (9.5,5))\n", - "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", - "i_ax.grid()\n", - "i_fig = i_display.fig\n", - "\n", - "# update plot callback function\n", - "def update_plot(change):\n", - " i_ax.cla()\n", - " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", - " i_ax_new.grid()\n", - " i_fig.canvas.draw()\n", - " i_fig.canvas.flush_events()\n", - "\n", - "dropdown.observe(update_plot, names='value')\n", - "\n", - "widgets.AppLayout(\n", - " header=dropdown,\n", - " center=i_fig.canvas,\n", - " pane_heights=[1, 6,1]\n", - ")\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "state": {}, - "version_major": 2, - "version_minor": 0 - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/VAPs/quicklook/SFCCLDGRID/.ipynb_checkpoints/15swfcldgrid1long.c1-checkpoint.ipynb b/VAPs/quicklook/SFCCLDGRID/.ipynb_checkpoints/15swfcldgrid1long.c1-checkpoint.ipynb deleted file mode 100644 index 0ed9cccf..00000000 --- a/VAPs/quicklook/SFCCLDGRID/.ipynb_checkpoints/15swfcldgrid1long.c1-checkpoint.ipynb +++ /dev/null @@ -1,2384 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "70840257-70e4-45e2-b491-14bff5a257a3", - "metadata": {}, - "source": [ - "# 15SWFCLDGRID1LONG.C1 Plots\n", - "\n", - "[Click here](https://www.arm.gov/capabilities/vaps/sfccldgrid) for more information about this vap." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "460fd89f-e034-452c-b837-f65c5958264f", - "metadata": {}, - "outputs": [], - "source": [ - "%matplotlib widget\n", - "import ipywidgets as widgets\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import ipywidgets as widgets\n", - "import numpy as np\n", - "import pandas as pd\n", - "import os\n", - "from datetime import datetime\n", - "\n", - "import act\n", - "import xarray as xr\n", - "\n", - "# Data archive directory\n", - "DATA_DIR = r'/data/archive/'\n", - "\n", - "# Datastream info\n", - "DATASTREAM_NAME = '15swfcldgrid1long'\n", - "DATA_LEVEL = 'c1'\n", - "LOCATIONS = [{'end_date': '2009-11-25', 'facility': 'N1', 'site': 'sgp', 'start_date': '1997-01-01'}]" - ] - }, - { - "cell_type": "markdown", - "id": "9faaf875", - "metadata": {}, - "source": [ - "## Define site, facility, and date range" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "ac6764f5", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The following locations and date ranges are available for this VAP:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sitefacilitystart_dateend_date
0sgpN11997-01-012009-11-25
\n", - "
" - ], - "text/plain": [ - " site facility start_date end_date\n", - "0 sgp N1 1997-01-01 2009-11-25" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "print(\"The following locations and date ranges are available for this VAP:\")\n", - "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" - ] - }, - { - "cell_type": "markdown", - "id": "8d132223", - "metadata": {}, - "source": [ - "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "e563983a", - "metadata": {}, - "outputs": [], - "source": [ - "site_facility = ( 'sgp', 'N1' )\n", - "\n", - "date_start = '2009-11-23'\n", - "date_end = '2009-11-25'" - ] - }, - { - "cell_type": "markdown", - "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", - "metadata": {}, - "source": [ - "## Load data files\n", - "Load data files from /data/archive/" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'/data/archive/sgp/sgp15swfcldgrid1longN1.c1'" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Compile list of files\n", - "site, facility = site_facility\n", - "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", - "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", - "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", - "dir_path\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "6be8f3dc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['20091123', '20091124', '20091125']" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from datetime import date, timedelta\n", - "import pandas as pd\n", - "\n", - "def get_ARM_formated_dates(start_date, end_date):\n", - " \"\"\"\n", - " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", - " EXAMPLE:\n", - " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", - " >> [\"20180219\", \"20180220\", \"20180221\"] \n", - " \"\"\"\n", - " \n", - " _start_date = pd.to_datetime(start_date)\n", - " _end_date = pd.to_datetime(end_date)\n", - " \n", - " delta = _end_date - _start_date # returns timedelta \n", - " dates = []\n", - "\n", - " for i in range(delta.days + 1):\n", - " day = _start_date + timedelta(days=i)\n", - " day_formated = day.strftime(format=\"%Y%m%d\")\n", - " dates.append(day_formated)\n", - " return dates\n", - "\n", - "\n", - "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "51feea2e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['/data/archive/sgp/sgp15swfcldgrid1longN1.c1/sgp15swfcldgrid1longN1.c1.20091123.180000.cdf',\n", - " '/data/archive/sgp/sgp15swfcldgrid1longN1.c1/sgp15swfcldgrid1longN1.c1.20091124.144500.cdf',\n", - " '/data/archive/sgp/sgp15swfcldgrid1longN1.c1/sgp15swfcldgrid1longN1.c1.20091125.144500.cdf']" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Filter a list of files based on date pattern\n", - "import glob\n", - "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", - "files_filter = []\n", - "for date in dates:\n", - " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", - " files_filter\n", - "files_filter" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "b0e5d0d7", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "3 files loaded\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset>\n",
-       "Dimensions:           (time: 33, lat: 15, lon: 17)\n",
-       "Coordinates:\n",
-       "  * time              (time) datetime64[ns] 2009-11-23T18:00:00 ... 2009-11-2...\n",
-       "  * lat               (lat) float32 38.5 38.25 38.0 37.75 ... 35.5 35.25 35.0\n",
-       "  * lon               (lon) float32 99.5 99.25 99.0 98.75 ... 96.0 95.75 95.5\n",
-       "Data variables: (12/22)\n",
-       "    base_time         (time) datetime64[ns] 2009-11-23T18:00:00 ... 2009-11-2...\n",
-       "    time_offset       (time) datetime64[ns] 2009-11-23T18:00:00 ... 2009-11-2...\n",
-       "    cloudfraction     (time, lat, lon) float32 dask.array<chunksize=(2, 15, 17), meta=np.ndarray>\n",
-       "    qc_cloudfraction  (time, lat, lon) int32 dask.array<chunksize=(2, 15, 17), meta=np.ndarray>\n",
-       "    cf_cloudfraction  (time) int32 dask.array<chunksize=(2,), meta=np.ndarray>\n",
-       "    tswfluxdn         (time, lat, lon) float32 dask.array<chunksize=(2, 15, 17), meta=np.ndarray>\n",
-       "    ...                ...\n",
-       "    cf_clrfluxdn      (time) int32 dask.array<chunksize=(2,), meta=np.ndarray>\n",
-       "    cdirfluxdn        (time, lat, lon) float32 dask.array<chunksize=(2, 15, 17), meta=np.ndarray>\n",
-       "    qc_cdirfluxdn     (time, lat, lon) int32 dask.array<chunksize=(2, 15, 17), meta=np.ndarray>\n",
-       "    cf_cdirfluxdn     (time) int32 dask.array<chunksize=(2,), meta=np.ndarray>\n",
-       "    azimuth           (time) float32 dask.array<chunksize=(2,), meta=np.ndarray>\n",
-       "    alt               (time) float32 318.0 318.0 318.0 ... 318.0 318.0 318.0\n",
-       "Attributes: (12/18)\n",
-       "    Date:                           Wed Jun 16 21:58:59 2010\n",
-       "    Version:                        $State: process-vap-sfccldgrid1long-2.0-0 $\n",
-       "    Command_Line:                   sfccldgrid1long -d 20091123\n",
-       "    Input_Platforms:                sgp15swfanalbrs1longC1.c1, sgp15swfanalsi...\n",
-       "    BW_Version:                     $State: ds-dsutil-bw-4.3-0 $\n",
-       "    qc_format_version:              0.1\n",
-       "    ...                             ...\n",
-       "    history:                        created by user dsmgr on machine zinc at ...\n",
-       "    _file_dates:                    ['20091123', '20091124', '20091125']\n",
-       "    _file_times:                    ['180000', '144500', '144500']\n",
-       "    datastream:                     sgp15swfcldgrid1longN1.c1\n",
-       "    _datastream:                    sgp15swfcldgrid1longN1.c1\n",
-       "    _arm_standards_flag:            1
" - ], - "text/plain": [ - "\n", - "Dimensions: (time: 33, lat: 15, lon: 17)\n", - "Coordinates:\n", - " * time (time) datetime64[ns] 2009-11-23T18:00:00 ... 2009-11-2...\n", - " * lat (lat) float32 38.5 38.25 38.0 37.75 ... 35.5 35.25 35.0\n", - " * lon (lon) float32 99.5 99.25 99.0 98.75 ... 96.0 95.75 95.5\n", - "Data variables: (12/22)\n", - " base_time (time) datetime64[ns] 2009-11-23T18:00:00 ... 2009-11-2...\n", - " time_offset (time) datetime64[ns] 2009-11-23T18:00:00 ... 2009-11-2...\n", - " cloudfraction (time, lat, lon) float32 dask.array\n", - " qc_cloudfraction (time, lat, lon) int32 dask.array\n", - " cf_cloudfraction (time) int32 dask.array\n", - " tswfluxdn (time, lat, lon) float32 dask.array\n", - " ... ...\n", - " cf_clrfluxdn (time) int32 dask.array\n", - " cdirfluxdn (time, lat, lon) float32 dask.array\n", - " qc_cdirfluxdn (time, lat, lon) int32 dask.array\n", - " cf_cdirfluxdn (time) int32 dask.array\n", - " azimuth (time) float32 dask.array\n", - " alt (time) float32 318.0 318.0 318.0 ... 318.0 318.0 318.0\n", - "Attributes: (12/18)\n", - " Date: Wed Jun 16 21:58:59 2010\n", - " Version: $State: process-vap-sfccldgrid1long-2.0-0 $\n", - " Command_Line: sfccldgrid1long -d 20091123\n", - " Input_Platforms: sgp15swfanalbrs1longC1.c1, sgp15swfanalsi...\n", - " BW_Version: $State: ds-dsutil-bw-4.3-0 $\n", - " qc_format_version: 0.1\n", - " ... ...\n", - " history: created by user dsmgr on machine zinc at ...\n", - " _file_dates: ['20091123', '20091124', '20091125']\n", - " _file_times: ['180000', '144500', '144500']\n", - " datastream: sgp15swfcldgrid1longN1.c1\n", - " _datastream: sgp15swfcldgrid1longN1.c1\n", - " _arm_standards_flag: 1" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load files as a single dataset\n", - "files_list = files_filter \n", - "ds = act.io.armfiles.read_netcdf(files_list)\n", - "ds.clean.cleanup()\n", - "print(f'{len(files_list)} files loaded')\n", - "ds\n" - ] - }, - { - "cell_type": "markdown", - "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", - "metadata": {}, - "source": [ - "## Plot time series data\n", - "#### Define the list of variables to be plotted:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "d09b789e-84f1-4605-846b-a72c110c8048", - "metadata": {}, - "outputs": [], - "source": [ - "variables_to_plot = ['cloudfraction', 'cf_cloudfraction', 'tswfluxdn']" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", - "metadata": {}, - "outputs": [ - { - "ename": "TypeError", - "evalue": "Dimensions of C (17, 15, 33) should be one smaller than X(33) and Y(15) while using shading='flat' see help(pcolormesh)", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[9], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m ts_display\u001b[38;5;241m.\u001b[39madd_subplots((\u001b[38;5;28mlen\u001b[39m(variables_to_plot),), figsize \u001b[38;5;241m=\u001b[39m (\u001b[38;5;241m9.5\u001b[39m,\u001b[38;5;241m4\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mlen\u001b[39m(variables_to_plot)))\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i,v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(variables_to_plot):\n\u001b[0;32m----> 5\u001b[0m ts_ax \u001b[38;5;241m=\u001b[39m \u001b[43mts_display\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mplot\u001b[49m\u001b[43m(\u001b[49m\u001b[43mv\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msubplot_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mi\u001b[49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mset_title\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mds\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvariables\u001b[49m\u001b[43m[\u001b[49m\u001b[43mv\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mattrs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mlong_name\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6\u001b[0m ts_ax\u001b[38;5;241m.\u001b[39mgrid()\n\u001b[1;32m 8\u001b[0m plt\u001b[38;5;241m.\u001b[39mshow()\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/plotting/timeseriesdisplay.py:588\u001b[0m, in \u001b[0;36mTimeSeriesDisplay.plot\u001b[0;34m(self, field, dsname, subplot_index, cmap, set_title, add_nan, day_night_background, invert_y_axis, abs_limits, time_rng, y_rng, use_var_for_y, set_shading, assessment_overplot, overplot_marker, overplot_behind, overplot_markersize, assessment_overplot_category, assessment_overplot_category_color, force_line_plot, labels, cbar_label, cbar_h_adjust, secondary_y, y_axis_flag_meanings, colorbar_labels, cb_friendly, **kwargs)\u001b[0m\n\u001b[1;32m 586\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 587\u001b[0m kwargs[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mface\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m--> 588\u001b[0m mesh \u001b[38;5;241m=\u001b[39m \u001b[43max\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpcolormesh\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 589\u001b[0m \u001b[43m \u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43masarray\u001b[49m\u001b[43m(\u001b[49m\u001b[43mxdata\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 590\u001b[0m \u001b[43m \u001b[49m\u001b[43mydata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 591\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtranspose\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 592\u001b[0m \u001b[43m \u001b[49m\u001b[43mshading\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mset_shading\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 593\u001b[0m \u001b[43m \u001b[49m\u001b[43mcmap\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcmap\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 594\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 595\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 597\u001b[0m \u001b[38;5;66;03m# Set Title\u001b[39;00m\n\u001b[1;32m 598\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m set_title \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/__init__.py:1442\u001b[0m, in \u001b[0;36m_preprocess_data..inner\u001b[0;34m(ax, data, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1439\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 1440\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minner\u001b[39m(ax, \u001b[38;5;241m*\u001b[39margs, data\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 1441\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m data \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1442\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43max\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mmap\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43msanitize_sequence\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1444\u001b[0m bound \u001b[38;5;241m=\u001b[39m new_sig\u001b[38;5;241m.\u001b[39mbind(ax, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 1445\u001b[0m auto_label \u001b[38;5;241m=\u001b[39m (bound\u001b[38;5;241m.\u001b[39marguments\u001b[38;5;241m.\u001b[39mget(label_namer)\n\u001b[1;32m 1446\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m bound\u001b[38;5;241m.\u001b[39mkwargs\u001b[38;5;241m.\u001b[39mget(label_namer))\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/axes/_axes.py:6220\u001b[0m, in \u001b[0;36mAxes.pcolormesh\u001b[0;34m(self, alpha, norm, cmap, vmin, vmax, shading, antialiased, *args, **kwargs)\u001b[0m\n\u001b[1;32m 6217\u001b[0m shading \u001b[38;5;241m=\u001b[39m shading\u001b[38;5;241m.\u001b[39mlower()\n\u001b[1;32m 6218\u001b[0m kwargs\u001b[38;5;241m.\u001b[39msetdefault(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnone\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m-> 6220\u001b[0m X, Y, C, shading \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_pcolorargs\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mpcolormesh\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6221\u001b[0m \u001b[43m \u001b[49m\u001b[43mshading\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mshading\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6222\u001b[0m coords \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mstack([X, Y], axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m 6223\u001b[0m \u001b[38;5;66;03m# convert to one dimensional array, except for 3D RGB(A) arrays\u001b[39;00m\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/axes/_axes.py:5751\u001b[0m, in \u001b[0;36mAxes._pcolorargs\u001b[0;34m(self, funcname, shading, *args, **kwargs)\u001b[0m\n\u001b[1;32m 5749\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m shading \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mflat\u001b[39m\u001b[38;5;124m'\u001b[39m:\n\u001b[1;32m 5750\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (Nx, Ny) \u001b[38;5;241m!=\u001b[39m (ncols \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m, nrows \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m):\n\u001b[0;32m-> 5751\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDimensions of C \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mC\u001b[38;5;241m.\u001b[39mshape\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m should\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 5752\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m be one smaller than X(\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mNx\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m) and Y(\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mNy\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 5753\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m while using shading=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mflat\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 5754\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m see help(\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfuncname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 5755\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m: \u001b[38;5;66;03m# ['nearest', 'gouraud']:\u001b[39;00m\n\u001b[1;32m 5756\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (Nx, Ny) \u001b[38;5;241m!=\u001b[39m (ncols, nrows):\n", - "\u001b[0;31mTypeError\u001b[0m: Dimensions of C (17, 15, 33) should be one smaller than X(33) and Y(15) while using shading='flat' see help(pcolormesh)" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "5ec59307f5704277961c72d61c88ebc6", - "version_major": 2, - "version_minor": 0 - }, - "image/png": "", - "text/html": [ - "\n", - "
\n", - "
\n", - " Figure\n", - "
\n", - " \n", - "
\n", - " " - ], - "text/plain": [ - "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", - "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", - "\n", - "for i,v in enumerate(variables_to_plot):\n", - " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", - " ts_ax.grid()\n", - "\n", - "plt.show()\n" - ] - }, - { - "cell_type": "markdown", - "id": "194399aa-1907-452b-8ba9-bc31d7f60291", - "metadata": {}, - "source": [ - "## Quality check plots\n", - "#### Define variable for QC plot" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", - "metadata": {}, - "outputs": [], - "source": [ - "qc_variable = 'cloudfraction'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", - "metadata": {}, - "outputs": [], - "source": [ - "# QC Plot\n", - "if ('qc_'+qc_variable) in ds.variables:\n", - "\n", - " # Plot\n", - " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", - " qc_display.add_subplots((2,), figsize = (9.5,10))\n", - " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", - " qc_ax.grid()\n", - " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", - "\n", - " plt.show()\n", - "else:\n", - " print(f'QC not available for the selected field: {qc_variable}')\n" - ] - }, - { - "cell_type": "markdown", - "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", - "metadata": {}, - "source": [ - "## Field selection dropdown menu\n", - "Select variable to be plotted from a dropdown menu" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", - "metadata": {}, - "outputs": [], - "source": [ - "plt.ioff()\n", - "\n", - "# populate dropdown menu with available variables \n", - "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", - "d_variable = 'cloudfraction'\n", - "dropdown = widgets.Dropdown(\n", - " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", - " value= d_variable,\n", - " description='Field:',\n", - " disabled=False,\n", - ")\n", - "dropdown.layout.margin = '0px 30% 0px 20%'\n", - "dropdown.layout.width = '50%'\n", - "\n", - "# set up display\n", - "i_display = act.plotting.TimeSeriesDisplay(ds)\n", - "i_display.add_subplots((1,), figsize = (9.5,5))\n", - "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", - "i_ax.grid()\n", - "i_fig = i_display.fig\n", - "\n", - "# update plot callback function\n", - "def update_plot(change):\n", - " i_ax.cla()\n", - " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", - " i_ax_new.grid()\n", - " i_fig.canvas.draw()\n", - " i_fig.canvas.flush_events()\n", - "\n", - "dropdown.observe(update_plot, names='value')\n", - "\n", - "widgets.AppLayout(\n", - " header=dropdown,\n", - " center=i_fig.canvas,\n", - " pane_heights=[1, 6,1]\n", - ")\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "state": {}, - "version_major": 2, - "version_minor": 0 - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/VAPs/quicklook/SFCCLDGRID/.ipynb_checkpoints/sfccldgrid2longcaracena.c1-checkpoint.ipynb b/VAPs/quicklook/SFCCLDGRID/.ipynb_checkpoints/sfccldgrid2longcaracena.c1-checkpoint.ipynb deleted file mode 100644 index d7d3c877..00000000 --- a/VAPs/quicklook/SFCCLDGRID/.ipynb_checkpoints/sfccldgrid2longcaracena.c1-checkpoint.ipynb +++ /dev/null @@ -1,5150 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "70840257-70e4-45e2-b491-14bff5a257a3", - "metadata": {}, - "source": [ - "# SFCCLDGRID2LONGCARACENA.C1 Plots\n", - "\n", - "[Click here](https://www.arm.gov/capabilities/vaps/sfccldgrid) for more information about this vap." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "460fd89f-e034-452c-b837-f65c5958264f", - "metadata": {}, - "outputs": [], - "source": [ - "%matplotlib widget\n", - "import ipywidgets as widgets\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import ipywidgets as widgets\n", - "import numpy as np\n", - "import pandas as pd\n", - "import os\n", - "from datetime import datetime\n", - "\n", - "import act\n", - "import xarray as xr\n", - "\n", - "# Data archive directory\n", - "DATA_DIR = r'/data/archive/'\n", - "\n", - "# Datastream info\n", - "DATASTREAM_NAME = 'sfccldgrid2longcaracena'\n", - "DATA_LEVEL = 'c1'\n", - "LOCATIONS = [{'end_date': '2020-06-01', 'facility': 'N1', 'site': 'sgp', 'start_date': '2011-10-21'}]" - ] - }, - { - "cell_type": "markdown", - "id": "9faaf875", - "metadata": {}, - "source": [ - "## Define site, facility, and date range" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "ac6764f5", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The following locations and date ranges are available for this VAP:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sitefacilitystart_dateend_date
0sgpN12011-10-212020-06-01
\n", - "
" - ], - "text/plain": [ - " site facility start_date end_date\n", - "0 sgp N1 2011-10-21 2020-06-01" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "print(\"The following locations and date ranges are available for this VAP:\")\n", - "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" - ] - }, - { - "cell_type": "markdown", - "id": "8d132223", - "metadata": {}, - "source": [ - "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "e563983a", - "metadata": {}, - "outputs": [], - "source": [ - "site_facility = ( 'sgp', 'N1' )\n", - "\n", - "date_start = '2020-05-29'\n", - "date_end = '2020-05-31'" - ] - }, - { - "cell_type": "markdown", - "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", - "metadata": {}, - "source": [ - "## Load data files\n", - "Load data files from /data/archive/" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'/data/archive/sgp/sgpsfccldgrid2longcaracenaN1.c1'" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Compile list of files\n", - "site, facility = site_facility\n", - "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", - "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", - "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", - "dir_path\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "6be8f3dc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['20200529', '20200530', '20200531']" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from datetime import date, timedelta\n", - "import pandas as pd\n", - "\n", - "def get_ARM_formated_dates(start_date, end_date):\n", - " \"\"\"\n", - " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", - " EXAMPLE:\n", - " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", - " >> [\"20180219\", \"20180220\", \"20180221\"] \n", - " \"\"\"\n", - " \n", - " _start_date = pd.to_datetime(start_date)\n", - " _end_date = pd.to_datetime(end_date)\n", - " \n", - " delta = _end_date - _start_date # returns timedelta \n", - " dates = []\n", - "\n", - " for i in range(delta.days + 1):\n", - " day = _start_date + timedelta(days=i)\n", - " day_formated = day.strftime(format=\"%Y%m%d\")\n", - " dates.append(day_formated)\n", - " return dates\n", - "\n", - "\n", - "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "51feea2e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['/data/archive/sgp/sgpsfccldgrid2longcaracenaN1.c1/sgpsfccldgrid2longcaracenaN1.c1.20200529.060000.nc',\n", - " '/data/archive/sgp/sgpsfccldgrid2longcaracenaN1.c1/sgpsfccldgrid2longcaracenaN1.c1.20200530.060000.nc',\n", - " '/data/archive/sgp/sgpsfccldgrid2longcaracenaN1.c1/sgpsfccldgrid2longcaracenaN1.c1.20200531.060000.nc']" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Filter a list of files based on date pattern\n", - "import glob\n", - "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", - "files_filter = []\n", - "for date in dates:\n", - " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", - " files_filter\n", - "files_filter" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "b0e5d0d7", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "3 files loaded\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset>\n",
-       "Dimensions:                                                         (time: 288,\n",
-       "                                                                     bound: 2,\n",
-       "                                                                     lat: 8,\n",
-       "                                                                     lon: 11)\n",
-       "Coordinates:\n",
-       "  * time                                                            (time) datetime64[ns] ...\n",
-       "  * lat                                                             (lat) float32 ...\n",
-       "  * lon                                                             (lon) float32 ...\n",
-       "Dimensions without coordinates: bound\n",
-       "Data variables: (12/59)\n",
-       "    base_time                                                       (time) datetime64[ns] ...\n",
-       "    time_offset                                                     (time) datetime64[ns] ...\n",
-       "    time_bounds                                                     (time, bound) object dask.array<chunksize=(96, 2), meta=np.ndarray>\n",
-       "    downwelling_shortwave                                           (time, lat, lon) float32 dask.array<chunksize=(96, 8, 11), meta=np.ndarray>\n",
-       "    source_central_facility_downwelling_shortwave                   (time) int32 dask.array<chunksize=(96,), meta=np.ndarray>\n",
-       "    qc_downwelling_shortwave                                        (time, lat, lon) int32 dask.array<chunksize=(96, 8, 11), meta=np.ndarray>\n",
-       "    ...                                                              ...\n",
-       "    qc_visible_cloud_optical_depth                                  (time, lat, lon) int32 dask.array<chunksize=(96, 8, 11), meta=np.ndarray>\n",
-       "    cloud_radiating_temperature                                     (time, lat, lon) float32 dask.array<chunksize=(96, 8, 11), meta=np.ndarray>\n",
-       "    source_central_facility_cloud_radiating_temperature             (time) int32 dask.array<chunksize=(96,), meta=np.ndarray>\n",
-       "    qc_cloud_radiating_temperature                                  (time, lat, lon) int32 dask.array<chunksize=(96, 8, 11), meta=np.ndarray>\n",
-       "    azimuth                                                         (time) float32 dask.array<chunksize=(96,), meta=np.ndarray>\n",
-       "    alt                                                             (time, lat, lon) float32 dask.array<chunksize=(96, 8, 11), meta=np.ndarray>\n",
-       "Attributes: (12/17)\n",
-       "    command_line:          sfccldgrid2long_caracena -s sgp -f N1 -b 20171001 ...\n",
-       "    Conventions:           ARM-1.3\n",
-       "    process_version:       vap-sfccldgrid2long_caracena-1.4-0.el7\n",
-       "    dod_version:           sfccldgrid2longcaracena-c1-1.2\n",
-       "    input_datastreams:     sgpsfccldgrid2longstationN1.c1 : 1.4 : 20200529.06...\n",
-       "    site_id:               sgp\n",
-       "    ...                    ...\n",
-       "    doi:                   10.5439/1393588\n",
-       "    history:               created by user gaustad on machine agate at 2022-0...\n",
-       "    _file_dates:           ['20200529', '20200530', '20200531']\n",
-       "    _file_times:           ['060000', '060000', '060000']\n",
-       "    _datastream:           sgpsfccldgrid2longcaracenaN1.c1\n",
-       "    _arm_standards_flag:   1
" - ], - "text/plain": [ - "\n", - "Dimensions: (time: 288,\n", - " bound: 2,\n", - " lat: 8,\n", - " lon: 11)\n", - "Coordinates:\n", - " * time (time) datetime64[ns] ...\n", - " * lat (lat) float32 ...\n", - " * lon (lon) float32 ...\n", - "Dimensions without coordinates: bound\n", - "Data variables: (12/59)\n", - " base_time (time) datetime64[ns] ...\n", - " time_offset (time) datetime64[ns] ...\n", - " time_bounds (time, bound) object dask.array\n", - " downwelling_shortwave (time, lat, lon) float32 dask.array\n", - " source_central_facility_downwelling_shortwave (time) int32 dask.array\n", - " qc_downwelling_shortwave (time, lat, lon) int32 dask.array\n", - " ... ...\n", - " qc_visible_cloud_optical_depth (time, lat, lon) int32 dask.array\n", - " cloud_radiating_temperature (time, lat, lon) float32 dask.array\n", - " source_central_facility_cloud_radiating_temperature (time) int32 dask.array\n", - " qc_cloud_radiating_temperature (time, lat, lon) int32 dask.array\n", - " azimuth (time) float32 dask.array\n", - " alt (time, lat, lon) float32 dask.array\n", - "Attributes: (12/17)\n", - " command_line: sfccldgrid2long_caracena -s sgp -f N1 -b 20171001 ...\n", - " Conventions: ARM-1.3\n", - " process_version: vap-sfccldgrid2long_caracena-1.4-0.el7\n", - " dod_version: sfccldgrid2longcaracena-c1-1.2\n", - " input_datastreams: sgpsfccldgrid2longstationN1.c1 : 1.4 : 20200529.06...\n", - " site_id: sgp\n", - " ... ...\n", - " doi: 10.5439/1393588\n", - " history: created by user gaustad on machine agate at 2022-0...\n", - " _file_dates: ['20200529', '20200530', '20200531']\n", - " _file_times: ['060000', '060000', '060000']\n", - " _datastream: sgpsfccldgrid2longcaracenaN1.c1\n", - " _arm_standards_flag: 1" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load files as a single dataset\n", - "files_list = files_filter \n", - "ds = act.io.armfiles.read_netcdf(files_list)\n", - "ds.clean.cleanup()\n", - "print(f'{len(files_list)} files loaded')\n", - "ds\n" - ] - }, - { - "cell_type": "markdown", - "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", - "metadata": {}, - "source": [ - "## Plot time series data\n", - "#### Define the list of variables to be plotted:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "d09b789e-84f1-4605-846b-a72c110c8048", - "metadata": {}, - "outputs": [], - "source": [ - "variables_to_plot = ['downwelling_shortwave', 'source_central_facility_downwelling_shortwave', 'clearsky_downwelling_shortwave']" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", - "metadata": {}, - "outputs": [ - { - "ename": "TypeError", - "evalue": "Dimensions of C (11, 8, 288) should be one smaller than X(288) and Y(8) while using shading='flat' see help(pcolormesh)", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[9], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m ts_display\u001b[38;5;241m.\u001b[39madd_subplots((\u001b[38;5;28mlen\u001b[39m(variables_to_plot),), figsize \u001b[38;5;241m=\u001b[39m (\u001b[38;5;241m9.5\u001b[39m,\u001b[38;5;241m4\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mlen\u001b[39m(variables_to_plot)))\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i,v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(variables_to_plot):\n\u001b[0;32m----> 5\u001b[0m ts_ax \u001b[38;5;241m=\u001b[39m \u001b[43mts_display\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mplot\u001b[49m\u001b[43m(\u001b[49m\u001b[43mv\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msubplot_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mi\u001b[49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mset_title\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mds\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvariables\u001b[49m\u001b[43m[\u001b[49m\u001b[43mv\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mattrs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mlong_name\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6\u001b[0m ts_ax\u001b[38;5;241m.\u001b[39mgrid()\n\u001b[1;32m 8\u001b[0m plt\u001b[38;5;241m.\u001b[39mshow()\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/plotting/timeseriesdisplay.py:588\u001b[0m, in \u001b[0;36mTimeSeriesDisplay.plot\u001b[0;34m(self, field, dsname, subplot_index, cmap, set_title, add_nan, day_night_background, invert_y_axis, abs_limits, time_rng, y_rng, use_var_for_y, set_shading, assessment_overplot, overplot_marker, overplot_behind, overplot_markersize, assessment_overplot_category, assessment_overplot_category_color, force_line_plot, labels, cbar_label, cbar_h_adjust, secondary_y, y_axis_flag_meanings, colorbar_labels, cb_friendly, **kwargs)\u001b[0m\n\u001b[1;32m 586\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 587\u001b[0m kwargs[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mface\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m--> 588\u001b[0m mesh \u001b[38;5;241m=\u001b[39m \u001b[43max\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpcolormesh\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 589\u001b[0m \u001b[43m \u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43masarray\u001b[49m\u001b[43m(\u001b[49m\u001b[43mxdata\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 590\u001b[0m \u001b[43m \u001b[49m\u001b[43mydata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 591\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtranspose\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 592\u001b[0m \u001b[43m \u001b[49m\u001b[43mshading\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mset_shading\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 593\u001b[0m \u001b[43m \u001b[49m\u001b[43mcmap\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcmap\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 594\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 595\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 597\u001b[0m \u001b[38;5;66;03m# Set Title\u001b[39;00m\n\u001b[1;32m 598\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m set_title \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/__init__.py:1442\u001b[0m, in \u001b[0;36m_preprocess_data..inner\u001b[0;34m(ax, data, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1439\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 1440\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minner\u001b[39m(ax, \u001b[38;5;241m*\u001b[39margs, data\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 1441\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m data \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1442\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43max\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mmap\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43msanitize_sequence\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1444\u001b[0m bound \u001b[38;5;241m=\u001b[39m new_sig\u001b[38;5;241m.\u001b[39mbind(ax, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 1445\u001b[0m auto_label \u001b[38;5;241m=\u001b[39m (bound\u001b[38;5;241m.\u001b[39marguments\u001b[38;5;241m.\u001b[39mget(label_namer)\n\u001b[1;32m 1446\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m bound\u001b[38;5;241m.\u001b[39mkwargs\u001b[38;5;241m.\u001b[39mget(label_namer))\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/axes/_axes.py:6220\u001b[0m, in \u001b[0;36mAxes.pcolormesh\u001b[0;34m(self, alpha, norm, cmap, vmin, vmax, shading, antialiased, *args, **kwargs)\u001b[0m\n\u001b[1;32m 6217\u001b[0m shading \u001b[38;5;241m=\u001b[39m shading\u001b[38;5;241m.\u001b[39mlower()\n\u001b[1;32m 6218\u001b[0m kwargs\u001b[38;5;241m.\u001b[39msetdefault(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgecolors\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnone\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m-> 6220\u001b[0m X, Y, C, shading \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_pcolorargs\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mpcolormesh\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6221\u001b[0m \u001b[43m \u001b[49m\u001b[43mshading\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mshading\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6222\u001b[0m coords \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mstack([X, Y], axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m 6223\u001b[0m \u001b[38;5;66;03m# convert to one dimensional array, except for 3D RGB(A) arrays\u001b[39;00m\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/axes/_axes.py:5751\u001b[0m, in \u001b[0;36mAxes._pcolorargs\u001b[0;34m(self, funcname, shading, *args, **kwargs)\u001b[0m\n\u001b[1;32m 5749\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m shading \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mflat\u001b[39m\u001b[38;5;124m'\u001b[39m:\n\u001b[1;32m 5750\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (Nx, Ny) \u001b[38;5;241m!=\u001b[39m (ncols \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m, nrows \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m):\n\u001b[0;32m-> 5751\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDimensions of C \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mC\u001b[38;5;241m.\u001b[39mshape\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m should\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 5752\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m be one smaller than X(\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mNx\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m) and Y(\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mNy\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 5753\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m while using shading=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mflat\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 5754\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m see help(\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfuncname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 5755\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m: \u001b[38;5;66;03m# ['nearest', 'gouraud']:\u001b[39;00m\n\u001b[1;32m 5756\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (Nx, Ny) \u001b[38;5;241m!=\u001b[39m (ncols, nrows):\n", - "\u001b[0;31mTypeError\u001b[0m: Dimensions of C (11, 8, 288) should be one smaller than X(288) and Y(8) while using shading='flat' see help(pcolormesh)" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "dd49c62706534d8988ce7d6a5c25f646", - "version_major": 2, - "version_minor": 0 - }, - "image/png": "", - "text/html": [ - "\n", - "
\n", - "
\n", - " Figure\n", - "
\n", - " \n", - "
\n", - " " - ], - "text/plain": [ - "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", - "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", - "\n", - "for i,v in enumerate(variables_to_plot):\n", - " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", - " ts_ax.grid()\n", - "\n", - "plt.show()\n" - ] - }, - { - "cell_type": "markdown", - "id": "194399aa-1907-452b-8ba9-bc31d7f60291", - "metadata": {}, - "source": [ - "## Quality check plots\n", - "#### Define variable for QC plot" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", - "metadata": {}, - "outputs": [], - "source": [ - "qc_variable = 'downwelling_shortwave'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", - "metadata": {}, - "outputs": [], - "source": [ - "# QC Plot\n", - "if ('qc_'+qc_variable) in ds.variables:\n", - "\n", - " # Plot\n", - " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", - " qc_display.add_subplots((2,), figsize = (9.5,10))\n", - " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", - " qc_ax.grid()\n", - " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", - "\n", - " plt.show()\n", - "else:\n", - " print(f'QC not available for the selected field: {qc_variable}')\n" - ] - }, - { - "cell_type": "markdown", - "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", - "metadata": {}, - "source": [ - "## Field selection dropdown menu\n", - "Select variable to be plotted from a dropdown menu" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", - "metadata": {}, - "outputs": [], - "source": [ - "plt.ioff()\n", - "\n", - "# populate dropdown menu with available variables \n", - "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", - "d_variable = 'downwelling_shortwave'\n", - "dropdown = widgets.Dropdown(\n", - " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", - " value= d_variable,\n", - " description='Field:',\n", - " disabled=False,\n", - ")\n", - "dropdown.layout.margin = '0px 30% 0px 20%'\n", - "dropdown.layout.width = '50%'\n", - "\n", - "# set up display\n", - "i_display = act.plotting.TimeSeriesDisplay(ds)\n", - "i_display.add_subplots((1,), figsize = (9.5,5))\n", - "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", - "i_ax.grid()\n", - "i_fig = i_display.fig\n", - "\n", - "# update plot callback function\n", - "def update_plot(change):\n", - " i_ax.cla()\n", - " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", - " i_ax_new.grid()\n", - " i_fig.canvas.draw()\n", - " i_fig.canvas.flush_events()\n", - "\n", - "dropdown.observe(update_plot, names='value')\n", - "\n", - "widgets.AppLayout(\n", - " header=dropdown,\n", - " center=i_fig.canvas,\n", - " pane_heights=[1, 6,1]\n", - ")\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "state": {}, - "version_major": 2, - "version_minor": 0 - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/VAPs/quicklook/SONDEADJUST/.ipynb_checkpoints/SONDEADJUST_tutorial-checkpoint.ipynb b/VAPs/quicklook/SONDEADJUST/.ipynb_checkpoints/SONDEADJUST_tutorial-checkpoint.ipynb deleted file mode 100644 index 698be192..00000000 --- a/VAPs/quicklook/SONDEADJUST/.ipynb_checkpoints/SONDEADJUST_tutorial-checkpoint.ipynb +++ /dev/null @@ -1,4058 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "70840257-70e4-45e2-b491-14bff5a257a3", - "metadata": {}, - "source": [ - "# SONDEADJUST.C1 Notebook\n", - "\n", - "[Click here](https://www.arm.gov/capabilities/vaps/sondeadjust) for more information about this vap." - ] - }, - { - "cell_type": "markdown", - "id": "97097763", - "metadata": {}, - "source": [ - "In this notebook, we demonstrate the workflow to explore ARM vap data (using sondeadjust as an example.) Value-added products (VAPs) are higher-order data products that have been analyzed and processed to ease scientists’ use of ARM data in atmospheric research and global climate models. \n", - "Here is the main content we will cover." - ] - }, - { - "cell_type": "markdown", - "id": "eddec40f", - "metadata": {}, - "source": [ - "# Table of Content\n", - "## Access the data\n", - "* How to retrieve the data\n", - "* Data path and file name conventions\n", - "* Load data\n", - "## Explore the data\n", - "* NetCDF Data structure\n", - "* Xarray essentials\n", - "* Xarray Variable\n", - "## Plot the data" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "460fd89f-e034-452c-b837-f65c5958264f", - "metadata": {}, - "outputs": [], - "source": [ - "import ipywidgets as widgets\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "import pandas as pd\n", - "import os\n", - "from datetime import datetime\n", - "\n", - "import act\n", - "import xarray as xr\n", - "\n", - "import random\n", - "\n", - "import glob\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "id": "2372d5be", - "metadata": {}, - "source": [ - "## Access the data" - ] - }, - { - "cell_type": "markdown", - "id": "5b0d9684", - "metadata": {}, - "source": [ - "### How to retrieve the data\n", - "We assume the path \"/data/archive\" is available where you are running this notebook. You can use `os.path.exists(\"/data/archive\")` to verify if the path exists at your machine.\n", - "\n", - "\n", - "### Data path and file name conventions\n", - "There several common terminologies regarding ARM data, for example, data-stream-name, data-level, site, facility, instrument, etc. (For more details, please see [ARM Data File Standards Version 1.2](https://www.google.com/search?q=arm+datastream+facility+cite+definition&rlz=1C1GCEJ_enUS1029US1029&ei=hb41ZICrBPukqtsPvMODIA&ved=0ahUKEwjAgeiS0aL-AhV7kmoFHbzhAAQQ4dUDCBA&uact=5&oq=arm+datastream+facility+cite+definition&gs_lcp=Cgxnd3Mtd2l6LXNlcnAQAzIFCAAQogQyBQgAEKIEMgUIABCiBDIFCAAQogQyBQgAEKIEOgoIABBHENYEELADOgoIIRCgARDDBBAKSgQIQRgAUJ4DWIoOYO0PaAFwAXgAgAGMAYgBuQmSAQMyLjmYAQCgAQHIAQjAAQE&sclient=gws-wiz-serp).) \n", - "\n", - "For example, this notebook is called `sondeadjust.c1`, where `sondeadjust` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n", - "\n", - "This datastream also contains site `nsa` and facility `C1`. (Note: individual datastream might have multiple site-facility pairs.)\n", - "In such a case, the data of this data-stream is stored at `/data/archive/nsa/nsasondeadjustC1.c1`, which is in the format of `//.`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n", - "\n", - "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n", - "\n", - "(Note: refer to the https://adc.arm.gov/discovery/#/ and https://adc.arm.gov/solr8/metadata/select API to explore ARM datastream and assoicated available sites and facilities\n", - "\n", - "Please see the following examples in action" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "d7e9eb85", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Verify if DATA_DIR path exists\n", - "DATA_DIR = \"/data/archive\"\n", - "os.path.exists(DATA_DIR)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "586993fd", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/data/archive/nsa/nsasondeadjustC1.c1\n", - "True\n" - ] - } - ], - "source": [ - "# Speicify datastream_dir following the path conventions and check its existence\n", - "DATASTREAM_NAME = \"sondeadjust\"\n", - "DATA_LEVEL = \"c1\"\n", - "site = \"nsa\"\n", - "facility = \"C1\"\n", - "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n", - "print(datastream_dir)\n", - "print(os.path.exists(datastream_dir))\n" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "0742f7c1", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['nsasondeadjustC1.c1.20110710.054600.cdf',\n", - " 'nsasondeadjustC1.c1.20090319.051600.cdf',\n", - " 'nsasondeadjustC1.c1.20080508.052600.cdf',\n", - " 'nsasondeadjustC1.c1.20070816.181100.cdf',\n", - " 'nsasondeadjustC1.c1.20110923.174200.cdf']" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# bonus: list 5 (random) files under datastream_dir\n", - "files = os.listdir(datastream_dir)\n", - "files[:5]" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "39b98a36", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20080601.053500.cdf'" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# bonus: get most recent file\n", - "list_of_files = glob.glob(f\"{datastream_dir}/*\") # * means all if need specific format then *.csv\n", - "latest_file = max(list_of_files, key=os.path.getctime)\n", - "latest_file" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "902d514e", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['nsasondeadjustC1.c1.20020428.184800.cdf', 'nsasondeadjustC1.c1.20020428.220500.cdf', 'nsasondeadjustC1.c1.20020428.235900.cdf', 'nsasondeadjustC1.c1.20020429.013100.cdf', 'nsasondeadjustC1.c1.20020429.182500.cdf']\n", - "['nsasondeadjustC1.c1.20120716.173000.cdf', 'nsasondeadjustC1.c1.20120716.053000.cdf', 'nsasondeadjustC1.c1.20120715.214900.cdf', 'nsasondeadjustC1.c1.20120715.173000.cdf', 'nsasondeadjustC1.c1.20120715.053000.cdf']\n" - ] - } - ], - "source": [ - "# bonus sort datastream files based on datetime\n", - "files = os.listdir(datastream_dir)\n", - "file_sorted = files.copy()\n", - "file_sorted.sort() \n", - "print(file_sorted[:5])\n", - "\n", - "# to reverse\n", - "file_sorted_reverse = files.copy() \n", - "file_sorted_reverse.sort(reverse=True)\n", - "print(file_sorted_reverse[:5])" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "ec5923b2", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070921.173300.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070929.190700.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070925.201400.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070928.174800.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070930.172800.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070905.052400.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070927.052700.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070920.165900.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070929.053300.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070909.053200.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070904.204700.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070913.210100.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070925.052400.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070907.173500.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070919.053400.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070901.172500.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070919.174900.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070903.210800.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070913.180600.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070914.204200.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070916.052500.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070917.213100.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070904.052100.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070915.172400.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070923.172500.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070903.173300.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070928.205300.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070905.174000.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070926.213000.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070918.052700.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070914.173800.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070914.053000.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070918.205800.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070911.052500.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070917.180300.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070904.174000.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070901.052400.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070918.181600.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070911.174500.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070907.053800.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070910.213600.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070902.172900.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070924.173400.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070910.052900.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070906.220400.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070920.053400.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070926.173600.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070906.053200.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070923.052600.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070911.221800.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070917.052500.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070927.180200.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070907.213000.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070910.173600.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070912.212300.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070912.052600.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070922.172900.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070925.180300.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070909.172700.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070921.052700.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070928.053300.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070903.052400.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070902.052300.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070930.052600.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070912.173700.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070915.052900.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070927.211200.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070916.173200.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070913.053500.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070922.061100.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070908.052500.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070905.210700.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070906.175700.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070908.175400.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070924.052900.cdf',\n", - " '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20070922.053900.cdf']" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# bonus: pattern matching\n", - "# filter the 200709** files under datastream_dir\n", - "files_filter = glob.glob(f'{datastream_dir}/*.200709*.*')\n", - "files_filter" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "c4a4aa18", - "metadata": {}, - "outputs": [], - "source": [ - "# bonus: use armnotebook_utils.file_filter (TODO) to filter files based on datastream info \n" - ] - }, - { - "cell_type": "markdown", - "id": "cfeb9efc", - "metadata": {}, - "source": [ - "### Load data\n", - "The arm data is stored in [NetCDF](https://en.wikipedia.org/wiki/NetCDF#:~:text=%22NetCDF%20(network%20Common%20Data%20Form,format%20for%20representing%20scientific%20data.) format. We can use xarray's `open_dataset` method to load single file, or `open_mfdataset` to open multiple files. (Note: the latter will still return a single xarray dataset object by combining multiple files.)\n", - "\n", - "Note: open_dataset keeps the file handle open and lazy loads its contents. All parameters are passed directly to open_dataset. It is a preferable method over load_dataset for memory-effiency. (more details, please see [xarray.open_dataset](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html))\n", - "\n", - "See the following example in action" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "a440a329", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20080601.053500.cdf\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset>\n",
-       "Dimensions:       (time: 2975)\n",
-       "Coordinates:\n",
-       "  * time          (time) datetime64[ns] 2008-06-01T05:35:00 ... 2008-06-01T07...\n",
-       "Data variables: (12/35)\n",
-       "    base_time     datetime64[ns] ...\n",
-       "    time_offset   (time) datetime64[ns] ...\n",
-       "    qc_time       (time) int32 ...\n",
-       "    pres          (time) float32 ...\n",
-       "    qc_pres       (time) int32 ...\n",
-       "    tdry          (time) float32 ...\n",
-       "    ...            ...\n",
-       "    qc_rh_scaled  (time) int32 ...\n",
-       "    dp_scaled     (time) float32 ...\n",
-       "    qc_dp_scaled  (time) int32 ...\n",
-       "    lat           (time) float32 ...\n",
-       "    lon           (time) float32 ...\n",
-       "    alt           (time) float32 ...\n",
-       "Attributes: (12/16)\n",
-       "    process_version:                $State: vap-sonde_adjust-8.0-0.sol5_10$\n",
-       "    command_line:                   sonde_adjust -d 20080601 -f nsaC1 -a 0\n",
-       "    site_id:                        nsa\n",
-       "    facility_id:                    C1: Barrow, Alaska\n",
-       "    reference1:                     Wang et.al. 2002. "Corrections of Humidit...\n",
-       "    reference2:                     Miloshevich et.al. 2004. "Development and...\n",
-       "    ...                             ...\n",
-       "    station_elevation:              8 m MSL\n",
-       "    input_datastreams_description:  A string consisting of the datastream(s),...\n",
-       "    input_datastreams_num:          6\n",
-       "    input_datastreams:              nsasondewnpnC1.b1 : 5.190000 : 20080601.0...\n",
-       "    zeb_platform:                   nsasondeadjustC1.c1\n",
-       "    history:                        created by user gervais on machine emeral...
" - ], - "text/plain": [ - "\n", - "Dimensions: (time: 2975)\n", - "Coordinates:\n", - " * time (time) datetime64[ns] 2008-06-01T05:35:00 ... 2008-06-01T07...\n", - "Data variables: (12/35)\n", - " base_time datetime64[ns] ...\n", - " time_offset (time) datetime64[ns] ...\n", - " qc_time (time) int32 ...\n", - " pres (time) float32 ...\n", - " qc_pres (time) int32 ...\n", - " tdry (time) float32 ...\n", - " ... ...\n", - " qc_rh_scaled (time) int32 ...\n", - " dp_scaled (time) float32 ...\n", - " qc_dp_scaled (time) int32 ...\n", - " lat (time) float32 ...\n", - " lon (time) float32 ...\n", - " alt (time) float32 ...\n", - "Attributes: (12/16)\n", - " process_version: $State: vap-sonde_adjust-8.0-0.sol5_10$\n", - " command_line: sonde_adjust -d 20080601 -f nsaC1 -a 0\n", - " site_id: nsa\n", - " facility_id: C1: Barrow, Alaska\n", - " reference1: Wang et.al. 2002. \"Corrections of Humidit...\n", - " reference2: Miloshevich et.al. 2004. \"Development and...\n", - " ... ...\n", - " station_elevation: 8 m MSL\n", - " input_datastreams_description: A string consisting of the datastream(s),...\n", - " input_datastreams_num: 6\n", - " input_datastreams: nsasondewnpnC1.b1 : 5.190000 : 20080601.0...\n", - " zeb_platform: nsasondeadjustC1.c1\n", - " history: created by user gervais on machine emeral..." - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# open single file\n", - "full_path = latest_file\n", - "print(full_path)\n", - "ds_single = xr.open_dataset(full_path)\n", - "ds_single" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "b0143a3d", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Dimensions: (time: 2975)\n", - "Coordinates:\n", - " * time (time) datetime64[ns] 2008-06-01T05:35:00 ... 2008-06-01T07...\n", - "Data variables: (12/35)\n", - " base_time datetime64[ns] ...\n", - " time_offset (time) datetime64[ns] ...\n", - " qc_time (time) int32 ...\n", - " pres (time) float32 ...\n", - " qc_pres (time) int32 ...\n", - " tdry (time) float32 ...\n", - " ... ...\n", - " qc_rh_scaled (time) int32 ...\n", - " dp_scaled (time) float32 ...\n", - " qc_dp_scaled (time) int32 ...\n", - " lat (time) float32 ...\n", - " lon (time) float32 ...\n", - " alt (time) float32 ...\n", - "Attributes: (12/16)\n", - " process_version: $State: vap-sonde_adjust-8.0-0.sol5_10$\n", - " command_line: sonde_adjust -d 20080601 -f nsaC1 -a 0\n", - " site_id: nsa\n", - " facility_id: C1: Barrow, Alaska\n", - " reference1: Wang et.al. 2002. \"Corrections of Humidit...\n", - " reference2: Miloshevich et.al. 2004. \"Development and...\n", - " ... ...\n", - " station_elevation: 8 m MSL\n", - " input_datastreams_description: A string consisting of the datastream(s),...\n", - " input_datastreams_num: 6\n", - " input_datastreams: nsasondewnpnC1.b1 : 5.190000 : 20080601.0...\n", - " zeb_platform: nsasondeadjustC1.c1\n", - " history: created by user gervais on machine emeral...\n", - "\n" - ] - } - ], - "source": [ - "print(ds_single)\n", - "print(type(ds_single))" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "1c0f8939", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20120716.173000.cdf', '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20120716.053000.cdf', '/data/archive/nsa/nsasondeadjustC1.c1/nsasondeadjustC1.c1.20120715.214900.cdf']\n", - "\n", - "\n", - "Dimensions: (time: 7872)\n", - "Coordinates:\n", - " * time (time) datetime64[ns] 2012-07-15T21:49:00 ... 2012-07-16T18...\n", - "Data variables: (12/35)\n", - " base_time (time) datetime64[ns] 2012-07-15T21:49:00 ... 2012-07-16T17...\n", - " time_offset (time) datetime64[ns] dask.array\n", - " qc_time (time) int32 dask.array\n", - " pres (time) float32 dask.array\n", - " qc_pres (time) int32 dask.array\n", - " tdry (time) float32 dask.array\n", - " ... ...\n", - " qc_rh_scaled (time) int32 dask.array\n", - " dp_scaled (time) float32 dask.array\n", - " qc_dp_scaled (time) int32 dask.array\n", - " lat (time) float32 dask.array\n", - " lon (time) float32 dask.array\n", - " alt (time) float32 dask.array\n", - "Attributes: (12/16)\n", - " process_version: $State: vap-sonde_adjust-8.0-0.sol5_10$\n", - " command_line: sonde_adjust -d 20120715 -f nsaC1 -a 0\n", - " site_id: nsa\n", - " facility_id: C1: Barrow, Alaska\n", - " reference1: Wang et.al. 2002. \"Corrections of Humidit...\n", - " reference2: Miloshevich et.al. 2004. \"Development and...\n", - " ... ...\n", - " station_elevation: 8 m MSL\n", - " input_datastreams_description: A string consisting of the datastream(s),...\n", - " input_datastreams_num: 6\n", - " input_datastreams: nsasondewnpnC1.b1 : 10.800000 : 20120715....\n", - " zeb_platform: nsasondeadjustC1.c1\n", - " history: created by user gervais on machine emeral...\n" - ] - } - ], - "source": [ - "# open multiple files\n", - "n_files = 3\n", - "full_paths = [os.path.join(datastream_dir, f_path) for f_path in file_sorted_reverse[:n_files]]\n", - "print(full_paths)\n", - "try: # Note: sometimes multiple files cannot be merged, so we used try except here.\n", - " ds_mutiple = xr.open_mfdataset(full_paths)\n", - " # ds_mutiple\n", - " print(type(ds_mutiple))\n", - " print(ds_mutiple)\n", - "except Exception as e:\n", - " print(e)" - ] - }, - { - "cell_type": "markdown", - "id": "3fa59943", - "metadata": {}, - "source": [ - "## Explore the data" - ] - }, - { - "cell_type": "markdown", - "id": "571f69b9", - "metadata": {}, - "source": [ - "### NetCDF Data structure \n", - "\n", - "\n", - "(If you are confident with NetCDF basics and xarray essentials, feel free to skip this session.)\n", - "Before we dive into data exploration, there are some eseential concpets we should be familiar with (shown below): \n", - "* Dataset\n", - "* Data array\n", - "* Variable\n", - "* Dimenssion\n", - "* Coordinate\n", - "* Data Type\n", - "* Meta Data (Attributes)\n", - "\n", - "We will not go into details about NetCDF basics and here are some references you might find helpful\n", - "* [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#)\n", - "* [Network Common Data Form (NetCDF)](https://www.unidata.ucar.edu/software/netcdf/) and [A Brief History of (netCDF) Time](https://www.unidata.ucar.edu/software/netcdf/time/recs.html)\n", - "\n", - "\n", - "\n", - "### Xarray essentials\n", - "Earlier we introduced the xarray pacakge and used `open_dataset` and `open_mfdataset` to retrieve NetCDF data file as an xarray Dataset object. Recall that you can review the data in a notebook by using `print(ds)` or `ds`. \n", - "\n", - "Using xarray to retrieve the aforementioned NetCDF basics is straightforwared. In general,\n", - "* Dataset: `ds`\n", - "* Data array: `ds.variables`\n", - "* Variable: `ds.ds.variables`\n", - "* Dimenssion: `ds.dims`\n", - "* Coordinate: `ds.coords`\n", - "* Data Type: `type`\n", - "* Meta Data (Attributes)\n", - "\n", - "Also, here are some references if you are new to xarray\n", - "* [Xarray in 45 minutes](https://tutorial.xarray.dev/overview/xarray-in-45-min.html)\n", - "* [Handling NetCDF Files using XArray for Absolute Beginners](https://towardsdatascience.com/handling-netcdf-files-using-xarray-for-absolute-beginners-111a8ab4463f)\n", - "\n", - "Try the following commands in action\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "b3eac323", - "metadata": {}, - "outputs": [], - "source": [ - "ds = ds_single" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "77ecf85d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset>\n",
-       "Dimensions:       (time: 2975)\n",
-       "Coordinates:\n",
-       "  * time          (time) datetime64[ns] 2008-06-01T05:35:00 ... 2008-06-01T07...\n",
-       "Data variables: (12/35)\n",
-       "    base_time     datetime64[ns] ...\n",
-       "    time_offset   (time) datetime64[ns] ...\n",
-       "    qc_time       (time) int32 ...\n",
-       "    pres          (time) float32 ...\n",
-       "    qc_pres       (time) int32 ...\n",
-       "    tdry          (time) float32 ...\n",
-       "    ...            ...\n",
-       "    qc_rh_scaled  (time) int32 ...\n",
-       "    dp_scaled     (time) float32 ...\n",
-       "    qc_dp_scaled  (time) int32 ...\n",
-       "    lat           (time) float32 ...\n",
-       "    lon           (time) float32 ...\n",
-       "    alt           (time) float32 ...\n",
-       "Attributes: (12/16)\n",
-       "    process_version:                $State: vap-sonde_adjust-8.0-0.sol5_10$\n",
-       "    command_line:                   sonde_adjust -d 20080601 -f nsaC1 -a 0\n",
-       "    site_id:                        nsa\n",
-       "    facility_id:                    C1: Barrow, Alaska\n",
-       "    reference1:                     Wang et.al. 2002. "Corrections of Humidit...\n",
-       "    reference2:                     Miloshevich et.al. 2004. "Development and...\n",
-       "    ...                             ...\n",
-       "    station_elevation:              8 m MSL\n",
-       "    input_datastreams_description:  A string consisting of the datastream(s),...\n",
-       "    input_datastreams_num:          6\n",
-       "    input_datastreams:              nsasondewnpnC1.b1 : 5.190000 : 20080601.0...\n",
-       "    zeb_platform:                   nsasondeadjustC1.c1\n",
-       "    history:                        created by user gervais on machine emeral...
" - ], - "text/plain": [ - "\n", - "Dimensions: (time: 2975)\n", - "Coordinates:\n", - " * time (time) datetime64[ns] 2008-06-01T05:35:00 ... 2008-06-01T07...\n", - "Data variables: (12/35)\n", - " base_time datetime64[ns] ...\n", - " time_offset (time) datetime64[ns] ...\n", - " qc_time (time) int32 ...\n", - " pres (time) float32 ...\n", - " qc_pres (time) int32 ...\n", - " tdry (time) float32 ...\n", - " ... ...\n", - " qc_rh_scaled (time) int32 ...\n", - " dp_scaled (time) float32 ...\n", - " qc_dp_scaled (time) int32 ...\n", - " lat (time) float32 ...\n", - " lon (time) float32 ...\n", - " alt (time) float32 ...\n", - "Attributes: (12/16)\n", - " process_version: $State: vap-sonde_adjust-8.0-0.sol5_10$\n", - " command_line: sonde_adjust -d 20080601 -f nsaC1 -a 0\n", - " site_id: nsa\n", - " facility_id: C1: Barrow, Alaska\n", - " reference1: Wang et.al. 2002. \"Corrections of Humidit...\n", - " reference2: Miloshevich et.al. 2004. \"Development and...\n", - " ... ...\n", - " station_elevation: 8 m MSL\n", - " input_datastreams_description: A string consisting of the datastream(s),...\n", - " input_datastreams_num: 6\n", - " input_datastreams: nsasondewnpnC1.b1 : 5.190000 : 20080601.0...\n", - " zeb_platform: nsasondeadjustC1.c1\n", - " history: created by user gervais on machine emeral..." - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Dataset \n", - "ds" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "25e7de09", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Frozen({'base_time': \n", - "[1 values with dtype=datetime64[ns]]\n", - "Attributes:\n", - " string: 1-Jun-2008,5:35:00 GMT\n", - " long_name: Base time in Epoch, 'time_offset': \n", - "[2975 values with dtype=datetime64[ns]]\n", - "Attributes:\n", - " long_name: Time offset from base_time, 'time': \n", - "array(['2008-06-01T05:35:00.000000000', '2008-06-01T05:35:02.000000000',\n", - " '2008-06-01T05:35:04.000000000', ..., '2008-06-01T07:14:04.000000000',\n", - " '2008-06-01T07:14:06.000000000', '2008-06-01T07:14:08.000000000'],\n", - " dtype='datetime64[ns]')\n", - "Attributes:\n", - " long_name: Time offset from midnight, 'qc_time': \n", - "[2975 values with dtype=int32]\n", - "Attributes: (12/13)\n", - " long_name: Quality check results on field: Time offset from mi...\n", - " units: unitless\n", - " description: This field contains bit packed values which should ...\n", - " bit_1_description: Delta time between current and previous samples is ...\n", - " bit_1_assessment: Bad\n", - " bit_2_description: Delta time between current and previous samples is ...\n", - " ... ...\n", - " bit_3_description: Delta time between current and previous samples is ...\n", - " bit_3_assessment: Bad\n", - " delta_t_lower_limit: 20.0\n", - " delta_t_upper_limit: 20.0\n", - " prior_sample_flag: 1\n", - " comment: If the 'prior_sample_flag' is set the first sample ..., 'pres': \n", - "[2975 values with dtype=float32]\n", - "Attributes:\n", - " long_name: Barometric pressure\n", - " units: hPa\n", - " valid_min: 0.0\n", - " valid_max: 1100.0, 'qc_pres': \n", - "[2975 values with dtype=int32]\n", - "Attributes:\n", - " long_name: Quality check results on field: Barometric pressure\n", - " units: unitless\n", - " description: This field contains bit packed values which should be...\n", - " bit_1_description: Value is less than the valid_min.\n", - " bit_1_assessment: Indeterminate\n", - " bit_2_description: Value is greater than the valid_max.\n", - " bit_2_assessment: Indeterminate\n", - " bit_3_description: Data value not available in input file, data value se...\n", - " bit_3_assessment: Bad, 'tdry': \n", - "[2975 values with dtype=float32]\n", - "Attributes:\n", - " long_name: Dry bulb temperature\n", - " units: C\n", - " valid_min: -80.0\n", - " valid_max: 50.0, 'qc_tdry': \n", - "[2975 values with dtype=int32]\n", - "Attributes:\n", - " long_name: Quality check results on field: Dry bulb temperature\n", - " units: unitless\n", - " description: This field contains bit packed values which should be...\n", - " bit_1_description: Value is less than the valid_min.\n", - " bit_1_assessment: Indeterminate\n", - " bit_2_description: Value is greater than the valid_max.\n", - " bit_2_assessment: Indeterminate\n", - " bit_3_description: Data value not available in input file, data value se...\n", - " bit_3_assessment: Bad, 'dp': \n", - "[2975 values with dtype=float32]\n", - "Attributes:\n", - " long_name: Dewpoint temperature\n", - " units: C\n", - " valid_min: -110.0\n", - " valid_max: 50.0, 'qc_dp': \n", - "[2975 values with dtype=int32]\n", - "Attributes:\n", - " long_name: Quality check results on field: Dewpoint temperature\n", - " units: unitless\n", - " description: This field contains bit packed values which should be...\n", - " bit_1_description: Value is less than the valid_min.\n", - " bit_1_assessment: Indeterminate\n", - " bit_2_description: Value is greater than the valid_max.\n", - " bit_2_assessment: Indeterminate\n", - " bit_3_description: Data value not available in input file, data value se...\n", - " bit_3_assessment: Bad, 'wspd': \n", - "[2975 values with dtype=float32]\n", - "Attributes:\n", - " long_name: Wind speed\n", - " units: m/s\n", - " valid_min: 0.0\n", - " valid_max: 100.0, 'qc_wspd': \n", - "[2975 values with dtype=int32]\n", - "Attributes:\n", - " long_name: Quality check results on field: Wind speed\n", - " units: unitless\n", - " description: This field contains bit packed values which should be...\n", - " bit_1_description: Value is less than the valid_min.\n", - " bit_1_assessment: Indeterminate\n", - " bit_2_description: Value is greater than the valid_max.\n", - " bit_2_assessment: Indeterminate\n", - " bit_3_description: Data value not available in input file, data value se...\n", - " bit_3_assessment: Bad, 'deg': \n", - "[2975 values with dtype=float32]\n", - "Attributes:\n", - " long_name: Wind direction\n", - " units: deg\n", - " valid_min: 0.0\n", - " valid_max: 360.0, 'qc_deg': \n", - "[2975 values with dtype=int32]\n", - "Attributes:\n", - " long_name: Quality check results on field: Wind direction\n", - " units: unitless\n", - " description: This field contains bit packed values which should be...\n", - " bit_1_description: Value is less than the valid_min.\n", - " bit_1_assessment: Indeterminate\n", - " bit_2_description: Value is greater than the valid_max.\n", - " bit_2_assessment: Indeterminate\n", - " bit_3_description: Data value not available in input file, data value se...\n", - " bit_3_assessment: Bad, 'rh': \n", - "[2975 values with dtype=float32]\n", - "Attributes:\n", - " long_name: Relative humidity\n", - " units: %\n", - " valid_min: 0.0\n", - " valid_max: 105.0, 'qc_rh': \n", - "[2975 values with dtype=int32]\n", - "Attributes:\n", - " long_name: Quality check results on field: Relative humidity\n", - " units: unitless\n", - " description: This field contains bit packed values which should be...\n", - " bit_1_description: Value is less than the valid_min.\n", - " bit_1_assessment: Indeterminate\n", - " bit_2_description: Value is greater than the valid_max.\n", - " bit_2_assessment: Indeterminate\n", - " bit_3_description: Data value not available in input file, data value se...\n", - " bit_3_assessment: Bad, 'u_wind': \n", - "[2975 values with dtype=float32]\n", - "Attributes:\n", - " long_name: Eastward wind component\n", - " units: m/s\n", - " calc: -1 * sin( deg ) * wspd\n", - " valid_min: -75.0\n", - " valid_max: 75.0, 'qc_u_wind': \n", - "[2975 values with dtype=int32]\n", - "Attributes:\n", - " long_name: Quality check results on field: Eastward wind component\n", - " units: unitless\n", - " description: This field contains bit packed values which should be...\n", - " bit_1_description: Value is less than the valid_min.\n", - " bit_1_assessment: Indeterminate\n", - " bit_2_description: Value is greater than the valid_max.\n", - " bit_2_assessment: Indeterminate\n", - " bit_3_description: Data value not available in input file, data value se...\n", - " bit_3_assessment: Bad, 'v_wind': \n", - "[2975 values with dtype=float32]\n", - "Attributes:\n", - " long_name: Northward wind component\n", - " units: m/s\n", - " calc: -1 * cos( deg ) * wspd\n", - " valid_min: -75.0\n", - " valid_max: 75.0, 'qc_v_wind': \n", - "[2975 values with dtype=int32]\n", - "Attributes:\n", - " long_name: Quality check results on field: Northward wind component\n", - " units: unitless\n", - " description: This field contains bit packed values which should be...\n", - " bit_1_description: Value is less than the valid_min.\n", - " bit_1_assessment: Indeterminate\n", - " bit_2_description: Value is greater than the valid_max.\n", - " bit_2_assessment: Indeterminate\n", - " bit_3_description: Data value not available in input file, data value se...\n", - " bit_3_assessment: Bad, 'wstat': \n", - "[2975 values with dtype=float32]\n", - "Attributes:\n", - " long_name: Wind status\n", - " units: unitless, 'asc': \n", - "[2975 values with dtype=float32]\n", - "Attributes:\n", - " long_name: Ascent rate\n", - " units: m/s\n", - " valid_min: -10.0\n", - " valid_max: 20.0, 'qc_asc': \n", - "[2975 values with dtype=int32]\n", - "Attributes:\n", - " long_name: Quality check results on field: Ascent rate\n", - " units: unitless\n", - " description: This field contains bit packed values which should be...\n", - " bit_1_description: Value is less than the valid_min.\n", - " bit_1_assessment: Indeterminate\n", - " bit_2_description: Value is greater than the valid_max.\n", - " bit_2_assessment: Indeterminate\n", - " bit_3_description: Data value not available in input file, data value se...\n", - " bit_3_assessment: Bad, 'rh_smooth': \n", - "[2975 values with dtype=float32]\n", - "Attributes:\n", - " long_name: Smoothed original relative humidity\n", - " units: %\n", - " valid_min: 0.0\n", - " valid_max: 100.0\n", - " note: Intermediate RH profile created by smoothing original RH sond..., 'qc_rh_smooth': \n", - "[2975 values with dtype=int32]\n", - "Attributes:\n", - " long_name: Quality check results on field: Smoothed original rel...\n", - " units: unitless\n", - " description: This field contains bit packed values which should be...\n", - " bit_1_description: Value is less than the valid_min.\n", - " bit_1_assessment: Indeterminate\n", - " bit_2_description: Value is greater than the valid_max.\n", - " bit_2_assessment: Indeterminate\n", - " bit_3_description: Data value not available in input file, data value se...\n", - " bit_3_assessment: Bad, 'rh_biased': \n", - "[2975 values with dtype=float32]\n", - "Attributes:\n", - " long_name: Dry bias corrected relative humidity\n", - " units: %\n", - " valid_min: 0.0\n", - " valid_max: 100.0\n", - " note1: Eliminates the dry bias as described in Wang 2002\n", - " note2: This field differs from the rh_smooth field for only the RS-8..., 'qc_rh_biased': \n", - "[2975 values with dtype=int32]\n", - "Attributes:\n", - " long_name: Quality check results on field: Dry bias corrected re...\n", - " units: unitless\n", - " description: This field contains bit packed values which should be...\n", - " bit_1_description: Value is less than the valid_min.\n", - " bit_1_assessment: Indeterminate\n", - " bit_2_description: Value is greater than the valid_max.\n", - " bit_2_assessment: Indeterminate\n", - " bit_3_description: Data value not available in input file, data value se...\n", - " bit_3_assessment: Bad, 'rh_adjust': \n", - "[2975 values with dtype=float32]\n", - "Attributes:\n", - " long_name: Final corrected ambient relative humidity\n", - " units: %\n", - " valid_min: 0.0\n", - " valid_max: 100.0\n", - " note: corrects for sensor time-lag (RS-80 sondes) and the solar war..., 'qc_rh_adjust': \n", - "[2975 values with dtype=int32]\n", - "Attributes:\n", - " long_name: Quality check results on field: Final corrected ambie...\n", - " units: unitless\n", - " description: This field contains bit packed values which should be...\n", - " bit_1_description: Value is less than the valid_min.\n", - " bit_1_assessment: Indeterminate\n", - " bit_2_description: Value is greater than the valid_max.\n", - " bit_2_assessment: Indeterminate\n", - " bit_3_description: Data value not available in input file, data value se...\n", - " bit_3_assessment: Bad, 'rh_scaled': \n", - "[2975 values with dtype=float32]\n", - "Attributes:\n", - " long_name: Scaled final corrected ambient relative humidity\n", - " units: %\n", - " valid_min: 0.0\n", - " valid_max: 100.0\n", - " note1: scale factor is the be_pwv from mwrret1liljclou datasteam\n", - " note2: when there is no mwr or when pwv < 0.8, values are -9999, 'qc_rh_scaled': \n", - "[2975 values with dtype=int32]\n", - "Attributes:\n", - " long_name: Quality check results on field: Scaled final correcte...\n", - " units: unitless\n", - " description: This field contains bit packed values which should be...\n", - " bit_1_description: Value is less than the valid_min.\n", - " bit_1_assessment: Indeterminate\n", - " bit_2_description: Value is greater than the valid_max.\n", - " bit_2_assessment: Indeterminate\n", - " bit_3_description: Data value not available in input file, data value se...\n", - " bit_3_assessment: Bad\n", - " bit_4_description: The value of the pwv from the mwr file used to scale ...\n", - " bit_4_assessment: Bad, 'dp_scaled': \n", - "[2975 values with dtype=float32]\n", - "Attributes:\n", - " long_name: Scaled dewpoint temperature\n", - " units: C\n", - " valid_min: -110.0\n", - " valid_max: 50.0\n", - " note1: scale factor is the be_pwv from mwrret1liljclou datastream\n", - " note2: when there is no mwr or when pwv < 0.8, values are -9999, 'qc_dp_scaled': \n", - "[2975 values with dtype=int32]\n", - "Attributes:\n", - " long_name: Quality check results on field: Scaled dewpoint tempe...\n", - " units: unitless\n", - " description: This field contains bit packed values which should be...\n", - " bit_1_description: Value is less than the valid_min.\n", - " bit_1_assessment: Indeterminate\n", - " bit_2_description: Value is greater than the valid_max.\n", - " bit_2_assessment: Indeterminate\n", - " bit_3_description: Data value not available in input file, data value se...\n", - " bit_3_assessment: Bad\n", - " bit_4_description: The value of the pwv from the mwr file used to scale ...\n", - " bit_4_assessment: Bad, 'lat': \n", - "[2975 values with dtype=float32]\n", - "Attributes:\n", - " long_name: North latitude\n", - " units: degree_N\n", - " valid_min: -90.0\n", - " valid_max: 90.0, 'lon': \n", - "[2975 values with dtype=float32]\n", - "Attributes:\n", - " long_name: East longitude\n", - " units: degree_E\n", - " valid_min: -180.0\n", - " valid_max: 180.0, 'alt': \n", - "[2975 values with dtype=float32]\n", - "Attributes:\n", - " long_name: Altitude above mean sea level\n", - " units: m})" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Data array, Variable\n", - "# Note: the info can be overwhelming. Do not attempt to grasp everything at the first glance.\n", - "ds.variables" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "8c41b67e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Frozen({'time': 2975})" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# dimenssions\n", - "ds.dims" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "156f1dfc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Coordinates:\n", - " * time (time) datetime64[ns] 2008-06-01T05:35:00 ... 2008-06-01T07:14:08" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# coordinates\n", - "ds.coords" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "277d6064", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'process_version': '$State: vap-sonde_adjust-8.0-0.sol5_10$',\n", - " 'command_line': 'sonde_adjust -d 20080601 -f nsaC1 -a 0',\n", - " 'site_id': 'nsa',\n", - " 'facility_id': 'C1: Barrow, Alaska',\n", - " 'reference1': 'Wang et.al. 2002. \"Corrections of Humidity Measurement Errors from the Vaisala RS80-Radiosonde -- Application to TOGA COARE Data.\" Journal of Atmospheric and Oceanic Technology',\n", - " 'reference2': 'Miloshevich et.al. 2004. \"Development and Validation of a Time-Lag Correction for Vaisala Radiosonde Humidity Measurement.\" Journal of Atmospheric and Oceanic Technology',\n", - " 'reference3': 'Miloshevich et.al. 2009. \"Accuracy Assessment and Correction of Vaisala RS92 Radiosonde Water Vapor Measurements.\" Journal of Geophysical Research--Atmospheres',\n", - " 'qc_standards_version': '1.0',\n", - " 'dod_version': '5.0',\n", - " 'sonde_serial_number': 'C3526394',\n", - " 'station_elevation': '8 m MSL',\n", - " 'input_datastreams_description': 'A string consisting of the datastream(s), datastream version(s), and datastream date (range).',\n", - " 'input_datastreams_num': 6,\n", - " 'input_datastreams': 'nsasondewnpnC1.b1 : 5.190000 : 20080601.053500-20080601.175900 ;\\nnsametC1.b1 : 4.100000 : 20080531.234500-20080602.000000 ;',\n", - " 'zeb_platform': 'nsasondeadjustC1.c1',\n", - " 'history': 'created by user gervais on machine emerald at 2-Sep-2014,22:07:00, using $State: zebra-zeblib-4.23-0.el5 $'}" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Meta Data (Attributes)\n", - "ds.attrs" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "d334681f", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n", - "\n", - "\n", - "\n" - ] - } - ], - "source": [ - "# type\n", - "print(type(ds))\n", - "print(type(ds.variables))\n", - "print(type(ds.dims))\n", - "print(type(ds.coords))\n", - "print(type(ds.attrs))" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "643399d6", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['base_time',\n", - " 'time_offset',\n", - " 'time',\n", - " 'qc_time',\n", - " 'pres',\n", - " 'qc_pres',\n", - " 'tdry',\n", - " 'qc_tdry',\n", - " 'dp',\n", - " 'qc_dp',\n", - " 'wspd',\n", - " 'qc_wspd',\n", - " 'deg',\n", - " 'qc_deg',\n", - " 'rh',\n", - " 'qc_rh',\n", - " 'u_wind',\n", - " 'qc_u_wind',\n", - " 'v_wind',\n", - " 'qc_v_wind',\n", - " 'wstat',\n", - " 'asc',\n", - " 'qc_asc',\n", - " 'rh_smooth',\n", - " 'qc_rh_smooth',\n", - " 'rh_biased',\n", - " 'qc_rh_biased',\n", - " 'rh_adjust',\n", - " 'qc_rh_adjust',\n", - " 'rh_scaled',\n", - " 'qc_rh_scaled',\n", - " 'dp_scaled',\n", - " 'qc_dp_scaled',\n", - " 'lat',\n", - " 'lon',\n", - " 'alt']" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# bonus: retrieve variable names only\n", - "list(ds.variables)" - ] - }, - { - "cell_type": "markdown", - "id": "1cf36878", - "metadata": {}, - "source": [ - "#### Discussion: variable vs. coordinates vs. dimenssions. \n", - "From [Components of a NetCDF Dataset](https://iprc.soest.hawaii.edu/users/xfu/tool/guidef-7.html#):\n", - "\n", - "* Dimession: A dimension may be used to represent a real physical dimension, for example, time, latitude, longitude, or height. A dimension might also be used to index other quantities, for example station or model-run-number.\n", - "A netCDF dimension has both a name and a length. A dimension length is an arbitrary positive integer, except that one dimension in a netCDF dataset can have the length UNLIMITED.\n", - "\n", - "* Variables: Variables are used to store the bulk of the data in a netCDF dataset. A variable represents an array of values of the same type. A scalar value is treated as a 0-dimensional array. A variable has a name, a data type, and a shape described by its list of dimensions specified when the variable is created. A variable may also have associated attributes, which may be added, deleted or changed after the variable is created.\n", - "\n", - "* Coordinate (Variables): It is legal for a variable to have the same name as a dimension. Such variables have no special meaning to the netCDF library. However there is a convention that such variables should be treated in a special way by software using this library.\n", - "A variable with the same name as a dimension is called a coordinate variable. It typically defines a physical coordinate corresponding to that dimension.\n", - "\n", - "\n", - "Tips: By definition, coordinates and dimenssions are also variable. For example ds.time is a coordinate but it is also a special variable. But in practice, when we talk about variable, it implies regular/non-coordinate variable. We will use this convention for the remaining notebook. " - ] - }, - { - "cell_type": "markdown", - "id": "d47120f9", - "metadata": {}, - "source": [ - "### Xarray Variable\n", - "\n", - "\n", - "We can use `ds.variables` to access varialbes of a dataset. But many times viewing print-out the whole datasets can be overwhemling. Instead, we would work on the individual variable (also called Xarray data array.) using the `ds[var_name]` syntax, where `var_name` is the name of the variable. See the following examples in action.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "5a57e136", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.DataArray 'time' (time: 2975)>\n",
-       "array(['2008-06-01T05:35:00.000000000', '2008-06-01T05:35:02.000000000',\n",
-       "       '2008-06-01T05:35:04.000000000', ..., '2008-06-01T07:14:04.000000000',\n",
-       "       '2008-06-01T07:14:06.000000000', '2008-06-01T07:14:08.000000000'],\n",
-       "      dtype='datetime64[ns]')\n",
-       "Coordinates:\n",
-       "  * time     (time) datetime64[ns] 2008-06-01T05:35:00 ... 2008-06-01T07:14:08\n",
-       "Attributes:\n",
-       "    long_name:  Time offset from midnight
" - ], - "text/plain": [ - "\n", - "array(['2008-06-01T05:35:00.000000000', '2008-06-01T05:35:02.000000000',\n", - " '2008-06-01T05:35:04.000000000', ..., '2008-06-01T07:14:04.000000000',\n", - " '2008-06-01T07:14:06.000000000', '2008-06-01T07:14:08.000000000'],\n", - " dtype='datetime64[ns]')\n", - "Coordinates:\n", - " * time (time) datetime64[ns] 2008-06-01T05:35:00 ... 2008-06-01T07:14:08\n", - "Attributes:\n", - " long_name: Time offset from midnight" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "print(type(ds[\"time\"]))\n", - "ds[\"time\"]" - ] - }, - { - "cell_type": "markdown", - "id": "51ec643e", - "metadata": {}, - "source": [ - "#### Variable properties\n", - "For individual varible, the following properties are mostly used with the assocaited xarray method to retrieve them. (assuming var = ds[var_name])\n", - "* name: `var.name`\n", - "* data content: `var.data`\n", - "* attributes: `var.attrs`\n", - "* dimenstions: `var.dims`\n", - "* data type: `var.data.dtype`" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "a632a525", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "var.name: \n", - " time \n", - "\n", - "var.data: \n", - " ['2008-06-01T05:35:00.000000000' '2008-06-01T05:35:02.000000000'\n", - " '2008-06-01T05:35:04.000000000' ... '2008-06-01T07:14:04.000000000'\n", - " '2008-06-01T07:14:06.000000000' '2008-06-01T07:14:08.000000000'] \n", - "\n", - "var.attrs: \n", - " {'long_name': 'Time offset from midnight'} \n", - "\n", - "var.dims: \n", - " ('time',) \n", - "\n", - "var.data.dtype: \n", - " datetime64[ns] \n", - "\n" - ] - } - ], - "source": [ - "var_name = \"time\"\n", - "var = ds[var_name]\n", - "\n", - "print(\"var.name: \\n\", var.name, \"\\n\")\n", - "print(\"var.data: \\n\", var.data, \"\\n\")\n", - "print(\"var.attrs: \\n\", var.attrs, \"\\n\")\n", - "print(\"var.dims: \\n\", var.dims, \"\\n\")\n", - "print(\"var.data.dtype: \\n\", var.data.dtype, \"\\n\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "b256b07f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
var_namedimsis_dimn_dimattrsdtype
0base_time()False0{'string': '1-Jun-2008,5:35:00 GMT', 'long_nam...datetime64[ns]
1time_offset(time,)False1{'long_name': 'Time offset from base_time'}datetime64[ns]
2time(time,)True1{'long_name': 'Time offset from midnight'}datetime64[ns]
3qc_time(time,)False1{'long_name': 'Quality check results on field:...int32
4pres(time,)False1{'long_name': 'Barometric pressure', 'units': ...float32
5qc_pres(time,)False1{'long_name': 'Quality check results on field:...int32
6tdry(time,)False1{'long_name': 'Dry bulb temperature', 'units':...float32
7qc_tdry(time,)False1{'long_name': 'Quality check results on field:...int32
8dp(time,)False1{'long_name': 'Dewpoint temperature', 'units':...float32
9qc_dp(time,)False1{'long_name': 'Quality check results on field:...int32
10wspd(time,)False1{'long_name': 'Wind speed', 'units': 'm/s', 'v...float32
11qc_wspd(time,)False1{'long_name': 'Quality check results on field:...int32
12deg(time,)False1{'long_name': 'Wind direction', 'units': 'deg'...float32
13qc_deg(time,)False1{'long_name': 'Quality check results on field:...int32
14rh(time,)False1{'long_name': 'Relative humidity', 'units': '%...float32
15qc_rh(time,)False1{'long_name': 'Quality check results on field:...int32
16u_wind(time,)False1{'long_name': 'Eastward wind component', 'unit...float32
17qc_u_wind(time,)False1{'long_name': 'Quality check results on field:...int32
18v_wind(time,)False1{'long_name': 'Northward wind component', 'uni...float32
19qc_v_wind(time,)False1{'long_name': 'Quality check results on field:...int32
20wstat(time,)False1{'long_name': 'Wind status', 'units': 'unitless'}float32
21asc(time,)False1{'long_name': 'Ascent rate', 'units': 'm/s', '...float32
22qc_asc(time,)False1{'long_name': 'Quality check results on field:...int32
23rh_smooth(time,)False1{'long_name': 'Smoothed original relative humi...float32
24qc_rh_smooth(time,)False1{'long_name': 'Quality check results on field:...int32
25rh_biased(time,)False1{'long_name': 'Dry bias corrected relative hum...float32
26qc_rh_biased(time,)False1{'long_name': 'Quality check results on field:...int32
27rh_adjust(time,)False1{'long_name': 'Final corrected ambient relativ...float32
28qc_rh_adjust(time,)False1{'long_name': 'Quality check results on field:...int32
29rh_scaled(time,)False1{'long_name': 'Scaled final corrected ambient ...float32
30qc_rh_scaled(time,)False1{'long_name': 'Quality check results on field:...int32
31dp_scaled(time,)False1{'long_name': 'Scaled dewpoint temperature', '...float32
32qc_dp_scaled(time,)False1{'long_name': 'Quality check results on field:...int32
33lat(time,)False1{'long_name': 'North latitude', 'units': 'degr...float32
34lon(time,)False1{'long_name': 'East longitude', 'units': 'degr...float32
35alt(time,)False1{'long_name': 'Altitude above mean sea level',...float32
\n", - "
" - ], - "text/plain": [ - " var_name dims is_dim n_dim \\\n", - "0 base_time () False 0 \n", - "1 time_offset (time,) False 1 \n", - "2 time (time,) True 1 \n", - "3 qc_time (time,) False 1 \n", - "4 pres (time,) False 1 \n", - "5 qc_pres (time,) False 1 \n", - "6 tdry (time,) False 1 \n", - "7 qc_tdry (time,) False 1 \n", - "8 dp (time,) False 1 \n", - "9 qc_dp (time,) False 1 \n", - "10 wspd (time,) False 1 \n", - "11 qc_wspd (time,) False 1 \n", - "12 deg (time,) False 1 \n", - "13 qc_deg (time,) False 1 \n", - "14 rh (time,) False 1 \n", - "15 qc_rh (time,) False 1 \n", - "16 u_wind (time,) False 1 \n", - "17 qc_u_wind (time,) False 1 \n", - "18 v_wind (time,) False 1 \n", - "19 qc_v_wind (time,) False 1 \n", - "20 wstat (time,) False 1 \n", - "21 asc (time,) False 1 \n", - "22 qc_asc (time,) False 1 \n", - "23 rh_smooth (time,) False 1 \n", - "24 qc_rh_smooth (time,) False 1 \n", - "25 rh_biased (time,) False 1 \n", - "26 qc_rh_biased (time,) False 1 \n", - "27 rh_adjust (time,) False 1 \n", - "28 qc_rh_adjust (time,) False 1 \n", - "29 rh_scaled (time,) False 1 \n", - "30 qc_rh_scaled (time,) False 1 \n", - "31 dp_scaled (time,) False 1 \n", - "32 qc_dp_scaled (time,) False 1 \n", - "33 lat (time,) False 1 \n", - "34 lon (time,) False 1 \n", - "35 alt (time,) False 1 \n", - "\n", - " attrs dtype \n", - "0 {'string': '1-Jun-2008,5:35:00 GMT', 'long_nam... datetime64[ns] \n", - "1 {'long_name': 'Time offset from base_time'} datetime64[ns] \n", - "2 {'long_name': 'Time offset from midnight'} datetime64[ns] \n", - "3 {'long_name': 'Quality check results on field:... int32 \n", - "4 {'long_name': 'Barometric pressure', 'units': ... float32 \n", - "5 {'long_name': 'Quality check results on field:... int32 \n", - "6 {'long_name': 'Dry bulb temperature', 'units':... float32 \n", - "7 {'long_name': 'Quality check results on field:... int32 \n", - "8 {'long_name': 'Dewpoint temperature', 'units':... float32 \n", - "9 {'long_name': 'Quality check results on field:... int32 \n", - "10 {'long_name': 'Wind speed', 'units': 'm/s', 'v... float32 \n", - "11 {'long_name': 'Quality check results on field:... int32 \n", - "12 {'long_name': 'Wind direction', 'units': 'deg'... float32 \n", - "13 {'long_name': 'Quality check results on field:... int32 \n", - "14 {'long_name': 'Relative humidity', 'units': '%... float32 \n", - "15 {'long_name': 'Quality check results on field:... int32 \n", - "16 {'long_name': 'Eastward wind component', 'unit... float32 \n", - "17 {'long_name': 'Quality check results on field:... int32 \n", - "18 {'long_name': 'Northward wind component', 'uni... float32 \n", - "19 {'long_name': 'Quality check results on field:... int32 \n", - "20 {'long_name': 'Wind status', 'units': 'unitless'} float32 \n", - "21 {'long_name': 'Ascent rate', 'units': 'm/s', '... float32 \n", - "22 {'long_name': 'Quality check results on field:... int32 \n", - "23 {'long_name': 'Smoothed original relative humi... float32 \n", - "24 {'long_name': 'Quality check results on field:... int32 \n", - "25 {'long_name': 'Dry bias corrected relative hum... float32 \n", - "26 {'long_name': 'Quality check results on field:... int32 \n", - "27 {'long_name': 'Final corrected ambient relativ... float32 \n", - "28 {'long_name': 'Quality check results on field:... int32 \n", - "29 {'long_name': 'Scaled final corrected ambient ... float32 \n", - "30 {'long_name': 'Quality check results on field:... int32 \n", - "31 {'long_name': 'Scaled dewpoint temperature', '... float32 \n", - "32 {'long_name': 'Quality check results on field:... int32 \n", - "33 {'long_name': 'North latitude', 'units': 'degr... float32 \n", - "34 {'long_name': 'East longitude', 'units': 'degr... float32 \n", - "35 {'long_name': 'Altitude above mean sea level',... float32 " - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# bonus: indivdual variable info table (collect and display variable info as a tabular format)\n", - "df_info = pd.DataFrame()\n", - "df_info[\"var_name\"] = list(ds.variables)\n", - "df_info[\"dims\"] = df_info.var_name.apply(lambda x: ds[x].dims)\n", - "df_info[\"is_dim\"] = df_info.var_name.apply(lambda x: x in ds.dims)\n", - "df_info[\"n_dim\"] = df_info.var_name.apply(lambda x: len(ds[x].dims))\n", - "df_info[\"attrs\"] = df_info.var_name.apply(lambda x: ds[x].attrs)\n", - "df_info[\"dtype\"] = df_info.var_name.apply(lambda x: ds[x].data.dtype)\n", - "df_info" - ] - }, - { - "cell_type": "markdown", - "id": "e6a36b96", - "metadata": {}, - "source": [ - "### Data cleaning/Preprocessing (skipped)\n", - "Data cleaning and preprocessing is an important stage in the the data analysis pipeline. However it will be out of the scope of this notebook. For more information about data cleaning and preprocessing basics, here are some references. \n", - "* [Xarray Fundamentals](https://earth-env-data-science.github.io/lectures/xarray/xarray.html)\n", - "* [Xarray Tutorial — Pangeo Gallery documentation](http://gallery.pangeo.io/repos/pangeo-data/pangeo-tutorial-gallery/xarray.html)\n", - "* [Pythonic Data Cleaning With pandas and NumPy](https://realpython.com/python-data-cleaning-numpy-pandas/)\n", - "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)" - ] - }, - { - "cell_type": "markdown", - "id": "ae6e1a69", - "metadata": {}, - "source": [ - "### Plotting\n", - "Data visualization (or plotting) is another important data analysis topic and deserves its own discussion. Here in this notebook we will only demonstrate basic tool for simple data visualization tasks.\n", - "\n", - "Here are some reference you might find useful:\n", - "* xarray plotting: https://docs.xarray.dev/en/stable/user-guide/plotting.html\n", - "* Atmospheric Community Toolkit (ACT): https://arm-doe.github.io/ACT/index.html\n", - "\n", - "Note: this notebook is auto-generated using a template. It uses a general idea to select variable(s) to plot and is not customized to each indivdual notebook. Feel free to change the variables in intrrests, especially certain figure is failed to plot." - ] - }, - { - "cell_type": "markdown", - "id": "7e417fb2", - "metadata": {}, - "source": [ - "#### 1-dimenssional basic time series plot\n", - "\n", - "For the following plot we would like to find variables such that\n", - "* it has one and only one dimession\n", - "* \"time\" is its coordinate variable\n", - "* it is not a dimenssion itself,\n", - "* it is not a special variable with substrings within [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", - "\n", - "Please see the following example in action. (For more details about pandas filtering, please see the following references.)\n", - "* [pandas: multiple conditions while indexing data frame](https://stackoverflow.com/questions/22591174/pandas-multiple-conditions-while-indexing-data-frame-unexpected-behavior)\n", - "* [pandas.DataFrame.apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)\n", - "* [How to test if a string contains one of the substrings in a list...](https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "30edac2d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
var_namedimsis_dimn_dimattrsdtype
4pres(time,)False1{'long_name': 'Barometric pressure', 'units': ...float32
6tdry(time,)False1{'long_name': 'Dry bulb temperature', 'units':...float32
8dp(time,)False1{'long_name': 'Dewpoint temperature', 'units':...float32
10wspd(time,)False1{'long_name': 'Wind speed', 'units': 'm/s', 'v...float32
12deg(time,)False1{'long_name': 'Wind direction', 'units': 'deg'...float32
14rh(time,)False1{'long_name': 'Relative humidity', 'units': '%...float32
16u_wind(time,)False1{'long_name': 'Eastward wind component', 'unit...float32
18v_wind(time,)False1{'long_name': 'Northward wind component', 'uni...float32
20wstat(time,)False1{'long_name': 'Wind status', 'units': 'unitless'}float32
21asc(time,)False1{'long_name': 'Ascent rate', 'units': 'm/s', '...float32
23rh_smooth(time,)False1{'long_name': 'Smoothed original relative humi...float32
25rh_biased(time,)False1{'long_name': 'Dry bias corrected relative hum...float32
27rh_adjust(time,)False1{'long_name': 'Final corrected ambient relativ...float32
29rh_scaled(time,)False1{'long_name': 'Scaled final corrected ambient ...float32
31dp_scaled(time,)False1{'long_name': 'Scaled dewpoint temperature', '...float32
\n", - "
" - ], - "text/plain": [ - " var_name dims is_dim n_dim \\\n", - "4 pres (time,) False 1 \n", - "6 tdry (time,) False 1 \n", - "8 dp (time,) False 1 \n", - "10 wspd (time,) False 1 \n", - "12 deg (time,) False 1 \n", - "14 rh (time,) False 1 \n", - "16 u_wind (time,) False 1 \n", - "18 v_wind (time,) False 1 \n", - "20 wstat (time,) False 1 \n", - "21 asc (time,) False 1 \n", - "23 rh_smooth (time,) False 1 \n", - "25 rh_biased (time,) False 1 \n", - "27 rh_adjust (time,) False 1 \n", - "29 rh_scaled (time,) False 1 \n", - "31 dp_scaled (time,) False 1 \n", - "\n", - " attrs dtype \n", - "4 {'long_name': 'Barometric pressure', 'units': ... float32 \n", - "6 {'long_name': 'Dry bulb temperature', 'units':... float32 \n", - "8 {'long_name': 'Dewpoint temperature', 'units':... float32 \n", - "10 {'long_name': 'Wind speed', 'units': 'm/s', 'v... float32 \n", - "12 {'long_name': 'Wind direction', 'units': 'deg'... float32 \n", - "14 {'long_name': 'Relative humidity', 'units': '%... float32 \n", - "16 {'long_name': 'Eastward wind component', 'unit... float32 \n", - "18 {'long_name': 'Northward wind component', 'uni... float32 \n", - "20 {'long_name': 'Wind status', 'units': 'unitless'} float32 \n", - "21 {'long_name': 'Ascent rate', 'units': 'm/s', '... float32 \n", - "23 {'long_name': 'Smoothed original relative humi... float32 \n", - "25 {'long_name': 'Dry bias corrected relative hum... float32 \n", - "27 {'long_name': 'Final corrected ambient relativ... float32 \n", - "29 {'long_name': 'Scaled final corrected ambient ... float32 \n", - "31 {'long_name': 'Scaled dewpoint temperature', '... float32 " - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# (pandas query) filter n_dim==1, non-dimenssional, and not contains the following substrings\n", - "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", - "df_filter = df_info[(df_info.n_dim == 1) &\n", - " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", - " (df_info.is_dim==False) &\n", - " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", - " ]\n", - "df_filter" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "4777a659", - "metadata": {}, - "outputs": [], - "source": [ - "try:\n", - " var_1d = df_filter.var_name.values[0]\n", - " var_1d\n", - "except Exception as e:\n", - " print(e) " - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "id": "fb5ae985", - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Note: if failed, change to another variable to plot.\n", - "try:\n", - " ds[var_1d].plot()\n", - " plt.show()\n", - "except Exception as e:\n", - " print(e) " - ] - }, - { - "cell_type": "markdown", - "id": "9e41066e", - "metadata": {}, - "source": [ - "#### 2-dimenssional basic plot" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "09bd4adc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
var_namedimsis_dimn_dimattrsdtype
\n", - "
" - ], - "text/plain": [ - "Empty DataFrame\n", - "Columns: [var_name, dims, is_dim, n_dim, attrs, dtype]\n", - "Index: []" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# (pandas query) filter n_dim==2, non-dimenssional, and not contains the following substrings\n", - "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", - "df_filter_2 = df_info[(df_info.n_dim == 2) &\n", - " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", - " (df_info.is_dim==False) &\n", - " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", - " ]\n", - "df_filter_2" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "2cdea1a9", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "index 0 is out of bounds for axis 0 with size 0\n" - ] - } - ], - "source": [ - "try:\n", - " var_2d = df_filter_2.var_name.values[0]\n", - " var_2d\n", - "except Exception as e:\n", - " print(e) " - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "f5f48879", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "name 'var_2d' is not defined\n" - ] - } - ], - "source": [ - "# Note: if failed, change to another variable to plot.\n", - "try:\n", - " print(ds[var_2d].dims)\n", - " # ds[var_2d].plot()\n", - "\n", - " # conventionally, use \"time\" as x-axis\n", - " ds[var_2d].plot(x=\"time\")\n", - " plt.show()\n", - "except Exception as e:\n", - " print(e) " - ] - }, - { - "cell_type": "markdown", - "id": "7554b2a6", - "metadata": {}, - "source": [ - "#### qc-plotting (optional)\n", - "\n", - "Note: act qc plotting has more strict requirement, one of them is the associated qc variable need to have \"flag_masks\" attributes. Which is added by using ds = act.io.armfiles.read_netcdf(files_list), then ds.clean.cleanup()" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "71c2096f", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "try:\n", - " ds_act = act.io.armfiles.read_netcdf(full_path)\n", - " print(type(ds_act))\n", - " ds_act.clean.cleanup()\n", - "\n", - " # or \n", - " # ds.clean.cleanup()\n", - "except Exception as e:\n", - " print(\"ERROR\", e)\n", - " ds_act = ds" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "844f8505", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
var_namedimsis_dimn_dimattrsdtype
4pres(time,)False1{'long_name': 'Barometric pressure', 'units': ...float32
6tdry(time,)False1{'long_name': 'Dry bulb temperature', 'units':...float32
8dp(time,)False1{'long_name': 'Dewpoint temperature', 'units':...float32
10wspd(time,)False1{'long_name': 'Wind speed', 'units': 'm/s', 'v...float32
12deg(time,)False1{'long_name': 'Wind direction', 'units': 'deg'...float32
14rh(time,)False1{'long_name': 'Relative humidity', 'units': '%...float32
16u_wind(time,)False1{'long_name': 'Eastward wind component', 'unit...float32
18v_wind(time,)False1{'long_name': 'Northward wind component', 'uni...float32
21asc(time,)False1{'long_name': 'Ascent rate', 'units': 'm/s', '...float32
23rh_smooth(time,)False1{'long_name': 'Smoothed original relative humi...float32
25rh_biased(time,)False1{'long_name': 'Dry bias corrected relative hum...float32
27rh_adjust(time,)False1{'long_name': 'Final corrected ambient relativ...float32
29rh_scaled(time,)False1{'long_name': 'Scaled final corrected ambient ...float32
31dp_scaled(time,)False1{'long_name': 'Scaled dewpoint temperature', '...float32
\n", - "
" - ], - "text/plain": [ - " var_name dims is_dim n_dim \\\n", - "4 pres (time,) False 1 \n", - "6 tdry (time,) False 1 \n", - "8 dp (time,) False 1 \n", - "10 wspd (time,) False 1 \n", - "12 deg (time,) False 1 \n", - "14 rh (time,) False 1 \n", - "16 u_wind (time,) False 1 \n", - "18 v_wind (time,) False 1 \n", - "21 asc (time,) False 1 \n", - "23 rh_smooth (time,) False 1 \n", - "25 rh_biased (time,) False 1 \n", - "27 rh_adjust (time,) False 1 \n", - "29 rh_scaled (time,) False 1 \n", - "31 dp_scaled (time,) False 1 \n", - "\n", - " attrs dtype \n", - "4 {'long_name': 'Barometric pressure', 'units': ... float32 \n", - "6 {'long_name': 'Dry bulb temperature', 'units':... float32 \n", - "8 {'long_name': 'Dewpoint temperature', 'units':... float32 \n", - "10 {'long_name': 'Wind speed', 'units': 'm/s', 'v... float32 \n", - "12 {'long_name': 'Wind direction', 'units': 'deg'... float32 \n", - "14 {'long_name': 'Relative humidity', 'units': '%... float32 \n", - "16 {'long_name': 'Eastward wind component', 'unit... float32 \n", - "18 {'long_name': 'Northward wind component', 'uni... float32 \n", - "21 {'long_name': 'Ascent rate', 'units': 'm/s', '... float32 \n", - "23 {'long_name': 'Smoothed original relative humi... float32 \n", - "25 {'long_name': 'Dry bias corrected relative hum... float32 \n", - "27 {'long_name': 'Final corrected ambient relativ... float32 \n", - "29 {'long_name': 'Scaled final corrected ambient ... float32 \n", - "31 {'long_name': 'Scaled dewpoint temperature', '... float32 " - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Filter valid variables for ACT qc plotting\n", - "condition = (\"qc_\" + df_info.var_name).apply(lambda x: ds_act[x].attrs.get(\"flag_masks\") is not None \n", - " if x in list(ds.variables) else False)\n", - "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", - "df_filter_3 = df_info[(df_info.is_dim==False) &\n", - " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", - " (~df_info.var_name.str.contains('|'.join(exclude_substrings))) &\n", - " condition\n", - " ]\n", - "df_filter_3" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "ecd38cc6", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "pres\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/kefeimo/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/utils/datetime_utils.py:136: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning.\n", - " mode = stats.mode(np.diff(time))\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# QC Plot\n", - "try:\n", - " qc_variable = df_filter_3.var_name.values[0]\n", - " print(qc_variable)\n", - "except Exception as e:\n", - " print(e)\n", - " \n", - "try:\n", - "\n", - " # Plot\n", - " qc_display = act.plotting.TimeSeriesDisplay(ds_act)\n", - " qc_display.add_subplots((2,), figsize = (9.5,10))\n", - " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", - " qc_ax.grid()\n", - " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", - " qc_ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", - "\n", - " plt.show()\n", - "except Exception as e:\n", - " print(e)" - ] - }, - { - "cell_type": "markdown", - "id": "d76e4d27", - "metadata": {}, - "source": [ - "#### bonus: choose variables to plot from a dropdown menu " - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "id": "bb733804", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
var_namedimsis_dimn_dimattrsdtype
4pres(time,)False1{'long_name': 'Barometric pressure', 'units': ...float32
6tdry(time,)False1{'long_name': 'Dry bulb temperature', 'units':...float32
8dp(time,)False1{'long_name': 'Dewpoint temperature', 'units':...float32
10wspd(time,)False1{'long_name': 'Wind speed', 'units': 'm/s', 'v...float32
12deg(time,)False1{'long_name': 'Wind direction', 'units': 'deg'...float32
14rh(time,)False1{'long_name': 'Relative humidity', 'units': '%...float32
16u_wind(time,)False1{'long_name': 'Eastward wind component', 'unit...float32
18v_wind(time,)False1{'long_name': 'Northward wind component', 'uni...float32
20wstat(time,)False1{'long_name': 'Wind status', 'units': 'unitless'}float32
21asc(time,)False1{'long_name': 'Ascent rate', 'units': 'm/s', '...float32
23rh_smooth(time,)False1{'long_name': 'Smoothed original relative humi...float32
25rh_biased(time,)False1{'long_name': 'Dry bias corrected relative hum...float32
27rh_adjust(time,)False1{'long_name': 'Final corrected ambient relativ...float32
29rh_scaled(time,)False1{'long_name': 'Scaled final corrected ambient ...float32
31dp_scaled(time,)False1{'long_name': 'Scaled dewpoint temperature', '...float32
\n", - "
" - ], - "text/plain": [ - " var_name dims is_dim n_dim \\\n", - "4 pres (time,) False 1 \n", - "6 tdry (time,) False 1 \n", - "8 dp (time,) False 1 \n", - "10 wspd (time,) False 1 \n", - "12 deg (time,) False 1 \n", - "14 rh (time,) False 1 \n", - "16 u_wind (time,) False 1 \n", - "18 v_wind (time,) False 1 \n", - "20 wstat (time,) False 1 \n", - "21 asc (time,) False 1 \n", - "23 rh_smooth (time,) False 1 \n", - "25 rh_biased (time,) False 1 \n", - "27 rh_adjust (time,) False 1 \n", - "29 rh_scaled (time,) False 1 \n", - "31 dp_scaled (time,) False 1 \n", - "\n", - " attrs dtype \n", - "4 {'long_name': 'Barometric pressure', 'units': ... float32 \n", - "6 {'long_name': 'Dry bulb temperature', 'units':... float32 \n", - "8 {'long_name': 'Dewpoint temperature', 'units':... float32 \n", - "10 {'long_name': 'Wind speed', 'units': 'm/s', 'v... float32 \n", - "12 {'long_name': 'Wind direction', 'units': 'deg'... float32 \n", - "14 {'long_name': 'Relative humidity', 'units': '%... float32 \n", - "16 {'long_name': 'Eastward wind component', 'unit... float32 \n", - "18 {'long_name': 'Northward wind component', 'uni... float32 \n", - "20 {'long_name': 'Wind status', 'units': 'unitless'} float32 \n", - "21 {'long_name': 'Ascent rate', 'units': 'm/s', '... float32 \n", - "23 {'long_name': 'Smoothed original relative humi... float32 \n", - "25 {'long_name': 'Dry bias corrected relative hum... float32 \n", - "27 {'long_name': 'Final corrected ambient relativ... float32 \n", - "29 {'long_name': 'Scaled final corrected ambient ... float32 \n", - "31 {'long_name': 'Scaled dewpoint temperature', '... float32 " - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Valid variables filtering\n", - "exclude_substrings = [\"time\", \"lat\", \"lon\", \"alt\", \"qc\"]\n", - "df_filter_4 = df_info[(df_info.is_dim==False) &\n", - " (df_info.dims.apply(lambda x: \"time\" in x)) &\n", - " (~df_info.var_name.str.contains('|'.join(exclude_substrings)))\n", - " ]\n", - "df_filter_4" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "id": "82d5c20d", - "metadata": {}, - "outputs": [], - "source": [ - "# example 1: using xarray plot\n", - "\n", - "# Uncomment the following cell to try the interactive plot (ctrl + /)" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "id": "2fc9aaa4", - "metadata": {}, - "outputs": [], - "source": [ - "# %matplotlib widget\n", - "# plt.clf()\n", - "\n", - "# fig, ax = plt.subplots(figsize=(10, 4))\n", - "\n", - "# available_variables = df_filter_4.var_name.values\n", - "# @widgets.interact(var=available_variables)\n", - "# def update(var = available_variables[0]):\n", - "# fig.clear() # Remove old lines from plot and plot new one\n", - "# if len(ds[var].dims)==2:\n", - "# ds[var].plot(x=\"time\", add_colorbar=False)\n", - "# else:\n", - "# ds[var].plot()\n", - "# plt.grid()\n", - "# plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "id": "fabdd802", - "metadata": {}, - "outputs": [], - "source": [ - "# example 2: using act plot\n", - "\n", - "# Uncomment the following cell to try the interactive plot (ctrl + /)" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "id": "962b4186", - "metadata": {}, - "outputs": [], - "source": [ - "# %matplotlib widget\n", - "# plt.clf()\n", - "\n", - "# available_variables = df_filter_4.var_name.values\n", - "\n", - "\n", - "# @widgets.interact(var=available_variables)\n", - "# def update(var = available_variables[0]):\n", - "\n", - "# i_display = act.plotting.TimeSeriesDisplay(ds_act)\n", - "# i_display.add_subplots((1,), figsize=(10, 4))\n", - "# ax = i_display.plot(var, subplot_index=(0,), set_title=f\"{var} ({ds_act[var].attrs['long_name']})\",)\n", - "\n", - "# ax.set_xlabel(f\"UTC Time starts at {ds.time.data[0]}\")\n", - "# ax.grid()\n", - "# plt.show()\n" - ] - }, - { - "cell_type": "markdown", - "id": "ba714aa6", - "metadata": {}, - "source": [ - "## Skew-T Plot" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "606dc551", - "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'files_list' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[37], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m launch_times \u001b[38;5;241m=\u001b[39m [\u001b[38;5;28mstr\u001b[39m(datetime\u001b[38;5;241m.\u001b[39mstrptime(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(f\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m'\u001b[39m)[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m3\u001b[39m:\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]), \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mY\u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mm\u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mH\u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mM\u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mS\u001b[39m\u001b[38;5;124m'\u001b[39m)) \u001b[38;5;28;01mfor\u001b[39;00m f \u001b[38;5;129;01min\u001b[39;00m \u001b[43mfiles_list\u001b[49m]\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mAvailable sonde launch times:\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 3\u001b[0m display(pd\u001b[38;5;241m.\u001b[39mDataFrame(launch_times, columns\u001b[38;5;241m=\u001b[39m[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mLaunch Time\u001b[39m\u001b[38;5;124m'\u001b[39m]))\n", - "\u001b[0;31mNameError\u001b[0m: name 'files_list' is not defined" - ] - } - ], - "source": [ - "launch_times = [str(datetime.strptime(''.join(f.split('.')[-3:-1]), '%Y%m%d%H%M%S')) for f in files_list]\n", - "print('Available sonde launch times:')\n", - "display(pd.DataFrame(launch_times, columns=['Launch Time']))\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d85d73a9", - "metadata": {}, - "outputs": [], - "source": [ - "# select sonde launch time from the list\n", - "launch_time_index = 0\n", - "sonde_file = files_list[launch_time_index]\n", - "sonde_ds = act.io.armfiles.read_netcdf(sonde_file)\n", - "\n", - "# Calculate stability indicies\n", - "sonde_ds = act.retrievals.calculate_stability_indicies(\n", - " sonde_ds, temp_name='tdry', td_name='dp', p_name='pres', rh_name='rh'\n", - ")\n", - "\n", - "# Set up plot\n", - "skewt = act.plotting.SkewTDisplay(sonde_ds, figsize=(7, 10))\n", - "\n", - "# Add data\n", - "skewt.plot_from_u_and_v('u_wind', 'v_wind', 'pres', 'tdry', 'dp', set_title=f'Skew-T Plot for {launch_times[launch_time_index]}')\n", - "sonde_ds.close()\n", - "plt.show()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "state": {}, - "version_major": 2, - "version_minor": 0 - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/VAPs/quicklook/SWFLUXANAL/.ipynb_checkpoints/1swfanalsirs1long.c1-checkpoint.ipynb b/VAPs/quicklook/SWFLUXANAL/.ipynb_checkpoints/1swfanalsirs1long.c1-checkpoint.ipynb deleted file mode 100644 index 108d3647..00000000 --- a/VAPs/quicklook/SWFLUXANAL/.ipynb_checkpoints/1swfanalsirs1long.c1-checkpoint.ipynb +++ /dev/null @@ -1,2654 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "70840257-70e4-45e2-b491-14bff5a257a3", - "metadata": {}, - "source": [ - "# 1SWFANALSIRS1LONG.C1 Plots\n", - "\n", - "[Click here](https://www.arm.gov/capabilities/vaps/swfluxanal) for more information about this vap." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "460fd89f-e034-452c-b837-f65c5958264f", - "metadata": {}, - "outputs": [], - "source": [ - "%matplotlib widget\n", - "import ipywidgets as widgets\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import ipywidgets as widgets\n", - "import numpy as np\n", - "import pandas as pd\n", - "import os\n", - "from datetime import datetime\n", - "\n", - "import act\n", - "import xarray as xr\n", - "\n", - "# Data archive directory\n", - "DATA_DIR = r'/data/archive/'\n", - "\n", - "# Datastream info\n", - "DATASTREAM_NAME = '1swfanalsirs1long'\n", - "DATA_LEVEL = 'c1'\n", - "LOCATIONS = [{'end_date': '2015-05-18', 'facility': 'C1', 'site': 'sgp', 'start_date': '1997-03-25'}, {'end_date': '2011-10-15', 'facility': 'E10', 'site': 'sgp', 'start_date': '1997-02-16'}, {'end_date': '2015-05-18', 'facility': 'E11', 'site': 'sgp', 'start_date': '1995-09-26'}, {'end_date': '2015-05-26', 'facility': 'E12', 'site': 'sgp', 'start_date': '1996-01-21'}, {'end_date': '2015-05-18', 'facility': 'E13', 'site': 'sgp', 'start_date': '1994-01-07'}, {'end_date': '2015-05-18', 'facility': 'E15', 'site': 'sgp', 'start_date': '1994-03-31'}, {'end_date': '2011-11-09', 'facility': 'E16', 'site': 'sgp', 'start_date': '1995-09-22'}, {'end_date': '2009-11-06', 'facility': 'E18', 'site': 'sgp', 'start_date': '1996-06-20'}, {'end_date': '2011-05-21', 'facility': 'E19', 'site': 'sgp', 'start_date': '1998-07-21'}, {'end_date': '2009-05-07', 'facility': 'E1', 'site': 'sgp', 'start_date': '1995-11-16'}, {'end_date': '2011-11-14', 'facility': 'E20', 'site': 'sgp', 'start_date': '1995-04-02'}, {'end_date': '2015-05-11', 'facility': 'E21', 'site': 'sgp', 'start_date': '1999-09-13'}, {'end_date': '2009-11-29', 'facility': 'E22', 'site': 'sgp', 'start_date': '1995-11-09'}, {'end_date': '2009-11-06', 'facility': 'E24', 'site': 'sgp', 'start_date': '1995-11-08'}, {'end_date': '2002-04-03', 'facility': 'E25', 'site': 'sgp', 'start_date': '1997-11-19'}, {'end_date': '2009-07-15', 'facility': 'E27', 'site': 'sgp', 'start_date': '2003-05-16'}, {'end_date': '2009-10-18', 'facility': 'E2', 'site': 'sgp', 'start_date': '1996-04-02'}, {'end_date': '2015-05-12', 'facility': 'E31', 'site': 'sgp', 'start_date': '2011-10-13'}, {'end_date': '2015-05-18', 'facility': 'E32', 'site': 'sgp', 'start_date': '2012-02-05'}, {'end_date': '2015-05-18', 'facility': 'E33', 'site': 'sgp', 'start_date': '2011-08-26'}, {'end_date': '2015-05-26', 'facility': 'E34', 'site': 'sgp', 'start_date': '2011-09-04'}, {'end_date': '2015-05-18', 'facility': 'E35', 'site': 'sgp', 'start_date': '2011-10-06'}, {'end_date': '2015-05-18', 'facility': 'E36', 'site': 'sgp', 'start_date': '2011-09-29'}, {'end_date': '2015-05-18', 'facility': 'E37', 'site': 'sgp', 'start_date': '2011-09-30'}, {'end_date': '2015-05-27', 'facility': 'E38', 'site': 'sgp', 'start_date': '2011-09-05'}, {'end_date': '2009-08-30', 'facility': 'E3', 'site': 'sgp', 'start_date': '1996-03-07'}, {'end_date': '2011-09-25', 'facility': 'E4', 'site': 'sgp', 'start_date': '1996-03-07'}, {'end_date': '2009-10-31', 'facility': 'E5', 'site': 'sgp', 'start_date': '1996-06-17'}, {'end_date': '2011-10-15', 'facility': 'E6', 'site': 'sgp', 'start_date': '1996-03-07'}, {'end_date': '2011-11-12', 'facility': 'E7', 'site': 'sgp', 'start_date': '1995-10-20'}, {'end_date': '2009-11-04', 'facility': 'E8', 'site': 'sgp', 'start_date': '1995-09-29'}, {'end_date': '2015-05-26', 'facility': 'E9', 'site': 'sgp', 'start_date': '1994-01-19'}]" - ] - }, - { - "cell_type": "markdown", - "id": "9faaf875", - "metadata": {}, - "source": [ - "## Define site, facility, and date range" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "ac6764f5", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The following locations and date ranges are available for this VAP:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sitefacilitystart_dateend_date
0sgpC11997-03-252015-05-18
1sgpE101997-02-162011-10-15
2sgpE111995-09-262015-05-18
3sgpE121996-01-212015-05-26
4sgpE131994-01-072015-05-18
5sgpE151994-03-312015-05-18
6sgpE161995-09-222011-11-09
7sgpE181996-06-202009-11-06
8sgpE191998-07-212011-05-21
9sgpE11995-11-162009-05-07
10sgpE201995-04-022011-11-14
11sgpE211999-09-132015-05-11
12sgpE221995-11-092009-11-29
13sgpE241995-11-082009-11-06
14sgpE251997-11-192002-04-03
15sgpE272003-05-162009-07-15
16sgpE21996-04-022009-10-18
17sgpE312011-10-132015-05-12
18sgpE322012-02-052015-05-18
19sgpE332011-08-262015-05-18
20sgpE342011-09-042015-05-26
21sgpE352011-10-062015-05-18
22sgpE362011-09-292015-05-18
23sgpE372011-09-302015-05-18
24sgpE382011-09-052015-05-27
25sgpE31996-03-072009-08-30
26sgpE41996-03-072011-09-25
27sgpE51996-06-172009-10-31
28sgpE61996-03-072011-10-15
29sgpE71995-10-202011-11-12
30sgpE81995-09-292009-11-04
31sgpE91994-01-192015-05-26
\n", - "
" - ], - "text/plain": [ - " site facility start_date end_date\n", - "0 sgp C1 1997-03-25 2015-05-18\n", - "1 sgp E10 1997-02-16 2011-10-15\n", - "2 sgp E11 1995-09-26 2015-05-18\n", - "3 sgp E12 1996-01-21 2015-05-26\n", - "4 sgp E13 1994-01-07 2015-05-18\n", - "5 sgp E15 1994-03-31 2015-05-18\n", - "6 sgp E16 1995-09-22 2011-11-09\n", - "7 sgp E18 1996-06-20 2009-11-06\n", - "8 sgp E19 1998-07-21 2011-05-21\n", - "9 sgp E1 1995-11-16 2009-05-07\n", - "10 sgp E20 1995-04-02 2011-11-14\n", - "11 sgp E21 1999-09-13 2015-05-11\n", - "12 sgp E22 1995-11-09 2009-11-29\n", - "13 sgp E24 1995-11-08 2009-11-06\n", - "14 sgp E25 1997-11-19 2002-04-03\n", - "15 sgp E27 2003-05-16 2009-07-15\n", - "16 sgp E2 1996-04-02 2009-10-18\n", - "17 sgp E31 2011-10-13 2015-05-12\n", - "18 sgp E32 2012-02-05 2015-05-18\n", - "19 sgp E33 2011-08-26 2015-05-18\n", - "20 sgp E34 2011-09-04 2015-05-26\n", - "21 sgp E35 2011-10-06 2015-05-18\n", - "22 sgp E36 2011-09-29 2015-05-18\n", - "23 sgp E37 2011-09-30 2015-05-18\n", - "24 sgp E38 2011-09-05 2015-05-27\n", - "25 sgp E3 1996-03-07 2009-08-30\n", - "26 sgp E4 1996-03-07 2011-09-25\n", - "27 sgp E5 1996-06-17 2009-10-31\n", - "28 sgp E6 1996-03-07 2011-10-15\n", - "29 sgp E7 1995-10-20 2011-11-12\n", - "30 sgp E8 1995-09-29 2009-11-04\n", - "31 sgp E9 1994-01-19 2015-05-26" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "print(\"The following locations and date ranges are available for this VAP:\")\n", - "display(pd.DataFrame(LOCATIONS, columns=['site', 'facility', 'start_date', 'end_date']))" - ] - }, - { - "cell_type": "markdown", - "id": "8d132223", - "metadata": {}, - "source": [ - "#### Define site, facility, and date range (date format: YYYY-MM-DD) using the variables below:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "e563983a", - "metadata": {}, - "outputs": [], - "source": [ - "site_facility = ( 'sgp', 'C1' )\n", - "\n", - "date_start = '2015-05-15'\n", - "date_end = '2015-05-17'" - ] - }, - { - "cell_type": "markdown", - "id": "bccd3dfe-2f99-49a5-bace-ea37e8dd8fc3", - "metadata": {}, - "source": [ - "## Load data files\n", - "Load data files from /data/archive/" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "cb4b9a26-c574-49c0-a521-658fa553e39e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'/data/archive/sgp/sgp1swfanalsirs1longC1.c1'" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Compile list of files\n", - "site, facility = site_facility\n", - "d_date_start = datetime.strptime(date_start, '%Y-%m-%d')\n", - "d_date_end = datetime.strptime(date_end, '%Y-%m-%d')\n", - "dir_path = os.path.join(DATA_DIR + site, site + DATASTREAM_NAME + facility + r'.' + DATA_LEVEL )\n", - "dir_path\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "6be8f3dc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['20150515', '20150516', '20150517']" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from datetime import date, timedelta\n", - "import pandas as pd\n", - "\n", - "def get_ARM_formated_dates(start_date, end_date):\n", - " \"\"\"\n", - " Get a list of ARM conventional formated date lists, based on start_date and end_date(inclusive)\n", - " EXAMPLE:\n", - " get_ARM_formated_dates(start_date=\"20180219\", end_date=\"20180221\")\n", - " >> [\"20180219\", \"20180220\", \"20180221\"] \n", - " \"\"\"\n", - " \n", - " _start_date = pd.to_datetime(start_date)\n", - " _end_date = pd.to_datetime(end_date)\n", - " \n", - " delta = _end_date - _start_date # returns timedelta \n", - " dates = []\n", - "\n", - " for i in range(delta.days + 1):\n", - " day = _start_date + timedelta(days=i)\n", - " day_formated = day.strftime(format=\"%Y%m%d\")\n", - " dates.append(day_formated)\n", - " return dates\n", - "\n", - "\n", - "get_ARM_formated_dates(start_date=date_start, end_date=date_end)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "51feea2e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['/data/archive/sgp/sgp1swfanalsirs1longC1.c1/sgp1swfanalsirs1longC1.c1.20150515.112900.cdf',\n", - " '/data/archive/sgp/sgp1swfanalsirs1longC1.c1/sgp1swfanalsirs1longC1.c1.20150516.112800.cdf',\n", - " '/data/archive/sgp/sgp1swfanalsirs1longC1.c1/sgp1swfanalsirs1longC1.c1.20150517.112700.cdf']" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Filter a list of files based on date pattern\n", - "import glob\n", - "dates = get_ARM_formated_dates(start_date=date_start, end_date=date_end)\n", - "files_filter = []\n", - "for date in dates:\n", - " files_filter += glob.glob(f'{dir_path}/*.{date}*.*')\n", - " files_filter\n", - "files_filter" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "b0e5d0d7", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "89 files loaded\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset>\n",
-       "Dimensions:                          (time: 838)\n",
-       "Coordinates:\n",
-       "  * time                             (time) timedelta64[ns] 00:00:00 ... 13:5...\n",
-       "Data variables: (12/47)\n",
-       "    base_time                        object ...\n",
-       "    time_offset                      (time) timedelta64[ns] dask.array<chunksize=(838,), meta=np.ndarray>\n",
-       "    base_time_LST                    object ...\n",
-       "    time_offset_LST                  (time) timedelta64[ns] dask.array<chunksize=(838,), meta=np.ndarray>\n",
-       "    site                             |S64 ...\n",
-       "    coef_date                        float64 ...\n",
-       "    ...                               ...\n",
-       "    qc_difswfluxdn                   (time) int16 dask.array<chunksize=(838,), meta=np.ndarray>\n",
-       "    qc_dirswfluxdn                   (time) int16 dask.array<chunksize=(838,), meta=np.ndarray>\n",
-       "    qc_sswfluxdn                     (time) int16 dask.array<chunksize=(838,), meta=np.ndarray>\n",
-       "    lat                              float32 ...\n",
-       "    lon                              float32 ...\n",
-       "    alt                              float32 ...\n",
-       "Attributes: (12/14)\n",
-       "    Date:                      Sat Jun 20 17:08:21 GMT 2015\n",
-       "    Fitmode:                   01\n",
-       "    Version:                   $State: vap-swfanal1long-3.12-0.sol5_10$\n",
-       "    Number_Input_Platforms:    1\n",
-       "    Input_Platforms:           sgpsirsC1.b1\n",
-       "    Input_Platforms_Versions:  /usr/lib/ld.so.1\n",
-       "    ...                        ...\n",
-       "    comment:                   fitmode=01 indicates a daily fit, fitmode=00 i...\n",
-       "    _file_dates:               ['20150515']\n",
-       "    _file_times:               ['112900']\n",
-       "    datastream:                sgp1swfanalsirs1longC1.c1\n",
-       "    _datastream:               sgp1swfanalsirs1longC1.c1\n",
-       "    _arm_standards_flag:       1
" - ], - "text/plain": [ - "\n", - "Dimensions: (time: 838)\n", - "Coordinates:\n", - " * time (time) timedelta64[ns] 00:00:00 ... 13:5...\n", - "Data variables: (12/47)\n", - " base_time object ...\n", - " time_offset (time) timedelta64[ns] dask.array\n", - " base_time_LST object ...\n", - " time_offset_LST (time) timedelta64[ns] dask.array\n", - " site |S64 ...\n", - " coef_date float64 ...\n", - " ... ...\n", - " qc_difswfluxdn (time) int16 dask.array\n", - " qc_dirswfluxdn (time) int16 dask.array\n", - " qc_sswfluxdn (time) int16 dask.array\n", - " lat float32 ...\n", - " lon float32 ...\n", - " alt float32 ...\n", - "Attributes: (12/14)\n", - " Date: Sat Jun 20 17:08:21 GMT 2015\n", - " Fitmode: 01\n", - " Version: $State: vap-swfanal1long-3.12-0.sol5_10$\n", - " Number_Input_Platforms: 1\n", - " Input_Platforms: sgpsirsC1.b1\n", - " Input_Platforms_Versions: /usr/lib/ld.so.1\n", - " ... ...\n", - " comment: fitmode=01 indicates a daily fit, fitmode=00 i...\n", - " _file_dates: ['20150515']\n", - " _file_times: ['112900']\n", - " datastream: sgp1swfanalsirs1longC1.c1\n", - " _datastream: sgp1swfanalsirs1longC1.c1\n", - " _arm_standards_flag: 1" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load files as a single dataset\n", - "files_list = files_filter [0]\n", - "ds = act.io.armfiles.read_netcdf(files_list)\n", - "ds.clean.cleanup()\n", - "print(f'{len(files_list)} files loaded')\n", - "ds\n" - ] - }, - { - "cell_type": "markdown", - "id": "4a551094-9ec0-4b64-b80a-9940573c2f50", - "metadata": {}, - "source": [ - "## Plot time series data\n", - "#### Define the list of variables to be plotted:" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "d09b789e-84f1-4605-846b-a72c110c8048", - "metadata": {}, - "outputs": [], - "source": [ - "variables_to_plot = ['gswfluxdn_measured', 'gswfluxdn_clearskyfit', 'difswfluxdn_measured']" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "3458fb08-035b-4898-9253-0a94e6f9c97b", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/kefeimo/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/coding/variables.py:147: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison\n", - " condition |= data == fv\n" - ] - }, - { - "ename": "OverflowError", - "evalue": "int too big to convert", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mOverflowError\u001b[0m Traceback (most recent call last)", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/IPython/core/formatters.py:972\u001b[0m, in \u001b[0;36mMimeBundleFormatter.__call__\u001b[0;34m(self, obj, include, exclude)\u001b[0m\n\u001b[1;32m 969\u001b[0m method \u001b[38;5;241m=\u001b[39m get_real_method(obj, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprint_method)\n\u001b[1;32m 971\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m method \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 972\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\u001b[43minclude\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minclude\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexclude\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mexclude\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 973\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 974\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/ipympl/backend_nbagg.py:336\u001b[0m, in \u001b[0;36mCanvas._repr_mimebundle_\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 333\u001b[0m plaintext \u001b[38;5;241m=\u001b[39m plaintext[:\u001b[38;5;241m110\u001b[39m] \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m…\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 335\u001b[0m buf \u001b[38;5;241m=\u001b[39m io\u001b[38;5;241m.\u001b[39mBytesIO()\n\u001b[0;32m--> 336\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfigure\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msavefig\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbuf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mformat\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mpng\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdpi\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mfigure\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 338\u001b[0m base64_image \u001b[38;5;241m=\u001b[39m b64encode(buf\u001b[38;5;241m.\u001b[39mgetvalue())\u001b[38;5;241m.\u001b[39mdecode(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mutf-8\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 339\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_data_url \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdata:image/png;base64,\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mbase64_image\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/figure.py:3343\u001b[0m, in \u001b[0;36mFigure.savefig\u001b[0;34m(self, fname, transparent, **kwargs)\u001b[0m\n\u001b[1;32m 3339\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m ax \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maxes:\n\u001b[1;32m 3340\u001b[0m stack\u001b[38;5;241m.\u001b[39menter_context(\n\u001b[1;32m 3341\u001b[0m ax\u001b[38;5;241m.\u001b[39mpatch\u001b[38;5;241m.\u001b[39m_cm_set(facecolor\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnone\u001b[39m\u001b[38;5;124m'\u001b[39m, edgecolor\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnone\u001b[39m\u001b[38;5;124m'\u001b[39m))\n\u001b[0;32m-> 3343\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcanvas\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprint_figure\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/backend_bases.py:2366\u001b[0m, in \u001b[0;36mFigureCanvasBase.print_figure\u001b[0;34m(self, filename, dpi, facecolor, edgecolor, orientation, format, bbox_inches, pad_inches, bbox_extra_artists, backend, **kwargs)\u001b[0m\n\u001b[1;32m 2362\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 2363\u001b[0m \u001b[38;5;66;03m# _get_renderer may change the figure dpi (as vector formats\u001b[39;00m\n\u001b[1;32m 2364\u001b[0m \u001b[38;5;66;03m# force the figure dpi to 72), so we need to set it again here.\u001b[39;00m\n\u001b[1;32m 2365\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m cbook\u001b[38;5;241m.\u001b[39m_setattr_cm(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfigure, dpi\u001b[38;5;241m=\u001b[39mdpi):\n\u001b[0;32m-> 2366\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mprint_method\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2367\u001b[0m \u001b[43m \u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2368\u001b[0m \u001b[43m \u001b[49m\u001b[43mfacecolor\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfacecolor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2369\u001b[0m \u001b[43m \u001b[49m\u001b[43medgecolor\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43medgecolor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2370\u001b[0m \u001b[43m \u001b[49m\u001b[43morientation\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43morientation\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2371\u001b[0m \u001b[43m \u001b[49m\u001b[43mbbox_inches_restore\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_bbox_inches_restore\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2372\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2373\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 2374\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m bbox_inches \u001b[38;5;129;01mand\u001b[39;00m restore_bbox:\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/backend_bases.py:2232\u001b[0m, in \u001b[0;36mFigureCanvasBase._switch_canvas_and_return_print_method..\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 2228\u001b[0m optional_kws \u001b[38;5;241m=\u001b[39m { \u001b[38;5;66;03m# Passed by print_figure for other renderers.\u001b[39;00m\n\u001b[1;32m 2229\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdpi\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfacecolor\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124medgecolor\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124morientation\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 2230\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbbox_inches_restore\u001b[39m\u001b[38;5;124m\"\u001b[39m}\n\u001b[1;32m 2231\u001b[0m skip \u001b[38;5;241m=\u001b[39m optional_kws \u001b[38;5;241m-\u001b[39m {\u001b[38;5;241m*\u001b[39minspect\u001b[38;5;241m.\u001b[39msignature(meth)\u001b[38;5;241m.\u001b[39mparameters}\n\u001b[0;32m-> 2232\u001b[0m print_method \u001b[38;5;241m=\u001b[39m functools\u001b[38;5;241m.\u001b[39mwraps(meth)(\u001b[38;5;28;01mlambda\u001b[39;00m \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: \u001b[43mmeth\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2233\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m{\u001b[49m\u001b[43mk\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mv\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mk\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mv\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mitems\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mk\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mskip\u001b[49m\u001b[43m}\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 2234\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m: \u001b[38;5;66;03m# Let third-parties do as they see fit.\u001b[39;00m\n\u001b[1;32m 2235\u001b[0m print_method \u001b[38;5;241m=\u001b[39m meth\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/backends/backend_agg.py:509\u001b[0m, in \u001b[0;36mFigureCanvasAgg.print_png\u001b[0;34m(self, filename_or_obj, metadata, pil_kwargs)\u001b[0m\n\u001b[1;32m 462\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mprint_png\u001b[39m(\u001b[38;5;28mself\u001b[39m, filename_or_obj, \u001b[38;5;241m*\u001b[39m, metadata\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, pil_kwargs\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[1;32m 463\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 464\u001b[0m \u001b[38;5;124;03m Write the figure to a PNG file.\u001b[39;00m\n\u001b[1;32m 465\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 507\u001b[0m \u001b[38;5;124;03m *metadata*, including the default 'Software' key.\u001b[39;00m\n\u001b[1;32m 508\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 509\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_print_pil\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilename_or_obj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpng\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpil_kwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmetadata\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/backends/backend_agg.py:457\u001b[0m, in \u001b[0;36mFigureCanvasAgg._print_pil\u001b[0;34m(self, filename_or_obj, fmt, pil_kwargs, metadata)\u001b[0m\n\u001b[1;32m 452\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_print_pil\u001b[39m(\u001b[38;5;28mself\u001b[39m, filename_or_obj, fmt, pil_kwargs, metadata\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[1;32m 453\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 454\u001b[0m \u001b[38;5;124;03m Draw the canvas, then save it using `.image.imsave` (to which\u001b[39;00m\n\u001b[1;32m 455\u001b[0m \u001b[38;5;124;03m *pil_kwargs* and *metadata* are forwarded).\u001b[39;00m\n\u001b[1;32m 456\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 457\u001b[0m \u001b[43mFigureCanvasAgg\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdraw\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 458\u001b[0m mpl\u001b[38;5;241m.\u001b[39mimage\u001b[38;5;241m.\u001b[39mimsave(\n\u001b[1;32m 459\u001b[0m filename_or_obj, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbuffer_rgba(), \u001b[38;5;28mformat\u001b[39m\u001b[38;5;241m=\u001b[39mfmt, origin\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mupper\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 460\u001b[0m dpi\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfigure\u001b[38;5;241m.\u001b[39mdpi, metadata\u001b[38;5;241m=\u001b[39mmetadata, pil_kwargs\u001b[38;5;241m=\u001b[39mpil_kwargs)\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/backends/backend_agg.py:400\u001b[0m, in \u001b[0;36mFigureCanvasAgg.draw\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 396\u001b[0m \u001b[38;5;66;03m# Acquire a lock on the shared font cache.\u001b[39;00m\n\u001b[1;32m 397\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m RendererAgg\u001b[38;5;241m.\u001b[39mlock, \\\n\u001b[1;32m 398\u001b[0m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtoolbar\u001b[38;5;241m.\u001b[39m_wait_cursor_for_draw_cm() \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtoolbar\n\u001b[1;32m 399\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m nullcontext()):\n\u001b[0;32m--> 400\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfigure\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdraw\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrenderer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 401\u001b[0m \u001b[38;5;66;03m# A GUI class may be need to update a window using this draw, so\u001b[39;00m\n\u001b[1;32m 402\u001b[0m \u001b[38;5;66;03m# don't forget to call the superclass.\u001b[39;00m\n\u001b[1;32m 403\u001b[0m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39mdraw()\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/artist.py:95\u001b[0m, in \u001b[0;36m_finalize_rasterization..draw_wrapper\u001b[0;34m(artist, renderer, *args, **kwargs)\u001b[0m\n\u001b[1;32m 93\u001b[0m \u001b[38;5;129m@wraps\u001b[39m(draw)\n\u001b[1;32m 94\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdraw_wrapper\u001b[39m(artist, renderer, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m---> 95\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mdraw\u001b[49m\u001b[43m(\u001b[49m\u001b[43martist\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrenderer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 96\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m renderer\u001b[38;5;241m.\u001b[39m_rasterizing:\n\u001b[1;32m 97\u001b[0m renderer\u001b[38;5;241m.\u001b[39mstop_rasterizing()\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/artist.py:72\u001b[0m, in \u001b[0;36mallow_rasterization..draw_wrapper\u001b[0;34m(artist, renderer)\u001b[0m\n\u001b[1;32m 69\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m artist\u001b[38;5;241m.\u001b[39mget_agg_filter() \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 70\u001b[0m renderer\u001b[38;5;241m.\u001b[39mstart_filter()\n\u001b[0;32m---> 72\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mdraw\u001b[49m\u001b[43m(\u001b[49m\u001b[43martist\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrenderer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 73\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 74\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m artist\u001b[38;5;241m.\u001b[39mget_agg_filter() \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/figure.py:3140\u001b[0m, in \u001b[0;36mFigure.draw\u001b[0;34m(self, renderer)\u001b[0m\n\u001b[1;32m 3137\u001b[0m \u001b[38;5;66;03m# ValueError can occur when resizing a window.\u001b[39;00m\n\u001b[1;32m 3139\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpatch\u001b[38;5;241m.\u001b[39mdraw(renderer)\n\u001b[0;32m-> 3140\u001b[0m \u001b[43mmimage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_draw_list_compositing_images\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3141\u001b[0m \u001b[43m \u001b[49m\u001b[43mrenderer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43martists\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msuppressComposite\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3143\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m sfig \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msubfigs:\n\u001b[1;32m 3144\u001b[0m sfig\u001b[38;5;241m.\u001b[39mdraw(renderer)\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/image.py:131\u001b[0m, in \u001b[0;36m_draw_list_compositing_images\u001b[0;34m(renderer, parent, artists, suppress_composite)\u001b[0m\n\u001b[1;32m 129\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m not_composite \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m has_images:\n\u001b[1;32m 130\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m a \u001b[38;5;129;01min\u001b[39;00m artists:\n\u001b[0;32m--> 131\u001b[0m \u001b[43ma\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdraw\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrenderer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 132\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 133\u001b[0m \u001b[38;5;66;03m# Composite any adjacent images together\u001b[39;00m\n\u001b[1;32m 134\u001b[0m image_group \u001b[38;5;241m=\u001b[39m []\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/artist.py:72\u001b[0m, in \u001b[0;36mallow_rasterization..draw_wrapper\u001b[0;34m(artist, renderer)\u001b[0m\n\u001b[1;32m 69\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m artist\u001b[38;5;241m.\u001b[39mget_agg_filter() \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 70\u001b[0m renderer\u001b[38;5;241m.\u001b[39mstart_filter()\n\u001b[0;32m---> 72\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mdraw\u001b[49m\u001b[43m(\u001b[49m\u001b[43martist\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrenderer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 73\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 74\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m artist\u001b[38;5;241m.\u001b[39mget_agg_filter() \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/axes/_base.py:3064\u001b[0m, in \u001b[0;36m_AxesBase.draw\u001b[0;34m(self, renderer)\u001b[0m\n\u001b[1;32m 3061\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m artists_rasterized:\n\u001b[1;32m 3062\u001b[0m _draw_rasterized(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfigure, artists_rasterized, renderer)\n\u001b[0;32m-> 3064\u001b[0m \u001b[43mmimage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_draw_list_compositing_images\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3065\u001b[0m \u001b[43m \u001b[49m\u001b[43mrenderer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43martists\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfigure\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msuppressComposite\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3067\u001b[0m renderer\u001b[38;5;241m.\u001b[39mclose_group(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124maxes\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 3068\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstale \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/image.py:131\u001b[0m, in \u001b[0;36m_draw_list_compositing_images\u001b[0;34m(renderer, parent, artists, suppress_composite)\u001b[0m\n\u001b[1;32m 129\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m not_composite \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m has_images:\n\u001b[1;32m 130\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m a \u001b[38;5;129;01min\u001b[39;00m artists:\n\u001b[0;32m--> 131\u001b[0m \u001b[43ma\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdraw\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrenderer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 132\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 133\u001b[0m \u001b[38;5;66;03m# Composite any adjacent images together\u001b[39;00m\n\u001b[1;32m 134\u001b[0m image_group \u001b[38;5;241m=\u001b[39m []\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/artist.py:72\u001b[0m, in \u001b[0;36mallow_rasterization..draw_wrapper\u001b[0;34m(artist, renderer)\u001b[0m\n\u001b[1;32m 69\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m artist\u001b[38;5;241m.\u001b[39mget_agg_filter() \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 70\u001b[0m renderer\u001b[38;5;241m.\u001b[39mstart_filter()\n\u001b[0;32m---> 72\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mdraw\u001b[49m\u001b[43m(\u001b[49m\u001b[43martist\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrenderer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 73\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 74\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m artist\u001b[38;5;241m.\u001b[39mget_agg_filter() \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/axis.py:1376\u001b[0m, in \u001b[0;36mAxis.draw\u001b[0;34m(self, renderer, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1373\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[1;32m 1374\u001b[0m renderer\u001b[38;5;241m.\u001b[39mopen_group(\u001b[38;5;18m__name__\u001b[39m, gid\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_gid())\n\u001b[0;32m-> 1376\u001b[0m ticks_to_draw \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_update_ticks\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1377\u001b[0m tlb1, tlb2 \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_ticklabel_bboxes(ticks_to_draw, renderer)\n\u001b[1;32m 1379\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m tick \u001b[38;5;129;01min\u001b[39;00m ticks_to_draw:\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/axis.py:1263\u001b[0m, in \u001b[0;36mAxis._update_ticks\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1258\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1259\u001b[0m \u001b[38;5;124;03mUpdate ticks (position and labels) using the current data interval of\u001b[39;00m\n\u001b[1;32m 1260\u001b[0m \u001b[38;5;124;03mthe axes. Return the list of ticks that will be drawn.\u001b[39;00m\n\u001b[1;32m 1261\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1262\u001b[0m major_locs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_majorticklocs()\n\u001b[0;32m-> 1263\u001b[0m major_labels \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmajor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mformatter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mformat_ticks\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmajor_locs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1264\u001b[0m major_ticks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_major_ticks(\u001b[38;5;28mlen\u001b[39m(major_locs))\n\u001b[1;32m 1265\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmajor\u001b[38;5;241m.\u001b[39mformatter\u001b[38;5;241m.\u001b[39mset_locs(major_locs)\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/ticker.py:218\u001b[0m, in \u001b[0;36mFormatter.format_ticks\u001b[0;34m(self, values)\u001b[0m\n\u001b[1;32m 216\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Return the tick labels for all the ticks at once.\"\"\"\u001b[39;00m\n\u001b[1;32m 217\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mset_locs(values)\n\u001b[0;32m--> 218\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m [\u001b[38;5;28mself\u001b[39m(value, i) \u001b[38;5;28;01mfor\u001b[39;00m i, value \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(values)]\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/ticker.py:218\u001b[0m, in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 216\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Return the tick labels for all the ticks at once.\"\"\"\u001b[39;00m\n\u001b[1;32m 217\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mset_locs(values)\n\u001b[0;32m--> 218\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m [\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mi\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m i, value \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(values)]\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/dates.py:651\u001b[0m, in \u001b[0;36mDateFormatter.__call__\u001b[0;34m(self, x, pos)\u001b[0m\n\u001b[1;32m 650\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, x, pos\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m):\n\u001b[0;32m--> 651\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mnum2date\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtz\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mstrftime(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfmt)\n\u001b[1;32m 652\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _wrap_in_tex(result) \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_usetex \u001b[38;5;28;01melse\u001b[39;00m result\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/dates.py:544\u001b[0m, in \u001b[0;36mnum2date\u001b[0;34m(x, tz)\u001b[0m\n\u001b[1;32m 518\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 519\u001b[0m \u001b[38;5;124;03mConvert Matplotlib dates to `~datetime.datetime` objects.\u001b[39;00m\n\u001b[1;32m 520\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 541\u001b[0m \u001b[38;5;124;03mFor details, see the module docstring.\u001b[39;00m\n\u001b[1;32m 542\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 543\u001b[0m tz \u001b[38;5;241m=\u001b[39m _get_tzinfo(tz)\n\u001b[0;32m--> 544\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_from_ordinalf_np_vectorized\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtz\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mtolist()\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/numpy/lib/function_base.py:2329\u001b[0m, in \u001b[0;36mvectorize.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 2326\u001b[0m vargs \u001b[38;5;241m=\u001b[39m [args[_i] \u001b[38;5;28;01mfor\u001b[39;00m _i \u001b[38;5;129;01min\u001b[39;00m inds]\n\u001b[1;32m 2327\u001b[0m vargs\u001b[38;5;241m.\u001b[39mextend([kwargs[_n] \u001b[38;5;28;01mfor\u001b[39;00m _n \u001b[38;5;129;01min\u001b[39;00m names])\n\u001b[0;32m-> 2329\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_vectorize_call\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mvargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/numpy/lib/function_base.py:2412\u001b[0m, in \u001b[0;36mvectorize._vectorize_call\u001b[0;34m(self, func, args)\u001b[0m\n\u001b[1;32m 2409\u001b[0m \u001b[38;5;66;03m# Convert args to object arrays first\u001b[39;00m\n\u001b[1;32m 2410\u001b[0m inputs \u001b[38;5;241m=\u001b[39m [asanyarray(a, dtype\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mobject\u001b[39m) \u001b[38;5;28;01mfor\u001b[39;00m a \u001b[38;5;129;01min\u001b[39;00m args]\n\u001b[0;32m-> 2412\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[43mufunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2414\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ufunc\u001b[38;5;241m.\u001b[39mnout \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m 2415\u001b[0m res \u001b[38;5;241m=\u001b[39m asanyarray(outputs, dtype\u001b[38;5;241m=\u001b[39motypes[\u001b[38;5;241m0\u001b[39m])\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/matplotlib/dates.py:359\u001b[0m, in \u001b[0;36m_from_ordinalf\u001b[0;34m(x, tz)\u001b[0m\n\u001b[1;32m 346\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 347\u001b[0m \u001b[38;5;124;03mConvert Gregorian float of the date, preserving hours, minutes,\u001b[39;00m\n\u001b[1;32m 348\u001b[0m \u001b[38;5;124;03mseconds and microseconds. Return value is a `.datetime`.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 353\u001b[0m \u001b[38;5;124;03m:rc:`timezone`.\u001b[39;00m\n\u001b[1;32m 354\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 356\u001b[0m tz \u001b[38;5;241m=\u001b[39m _get_tzinfo(tz)\n\u001b[1;32m 358\u001b[0m dt \u001b[38;5;241m=\u001b[39m (np\u001b[38;5;241m.\u001b[39mdatetime64(get_epoch()) \u001b[38;5;241m+\u001b[39m\n\u001b[0;32m--> 359\u001b[0m \u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtimedelta64\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mint\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mround\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mMUSECONDS_PER_DAY\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mus\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 360\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m dt \u001b[38;5;241m<\u001b[39m np\u001b[38;5;241m.\u001b[39mdatetime64(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m0001-01-01\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m dt \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mdatetime64(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m10000-01-01\u001b[39m\u001b[38;5;124m'\u001b[39m):\n\u001b[1;32m 361\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mDate ordinal \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mx\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m converts to \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdt\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m (using \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 362\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mepoch \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mget_epoch()\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m), but Matplotlib dates must be \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 363\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbetween year 0001 and 9999.\u001b[39m\u001b[38;5;124m'\u001b[39m)\n", - "\u001b[0;31mOverflowError\u001b[0m: int too big to convert" - ] - }, - { - "data": { - "text/plain": [ - "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous view', 'arrow-left', 'back'), ('Forward', 'Forward to next view', 'arrow-right', 'forward'), ('Pan', 'Left button pans, Right button zooms\\nx/y fixes axis, CTRL fixes aspect', 'arrows', 'pan'), ('Zoom', 'Zoom to rectangle\\nx/y fixes axis', 'square-o', 'zoom'), ('Download', 'Download plot', 'floppy-o', 'save_figure')]))" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "ts_display = act.plotting.TimeSeriesDisplay(ds)\n", - "ts_display.add_subplots((len(variables_to_plot),), figsize = (9.5,4*len(variables_to_plot)))\n", - "\n", - "for i,v in enumerate(variables_to_plot):\n", - " ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n", - " ts_ax.grid()\n", - "\n", - "plt.show()\n" - ] - }, - { - "cell_type": "markdown", - "id": "194399aa-1907-452b-8ba9-bc31d7f60291", - "metadata": {}, - "source": [ - "## Quality check plots\n", - "#### Define variable for QC plot" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "9a0ef63d-3eeb-48fc-a24a-43258b6134b8", - "metadata": {}, - "outputs": [], - "source": [ - "qc_variable = 'gswfluxdn'" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4", - "metadata": {}, - "outputs": [ - { - "ename": "KeyError", - "evalue": "'gswfluxdn'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/dataset.py:1348\u001b[0m, in \u001b[0;36mDataset._construct_dataarray\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 1347\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1348\u001b[0m variable \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_variables\u001b[49m\u001b[43m[\u001b[49m\u001b[43mname\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 1349\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n", - "\u001b[0;31mKeyError\u001b[0m: 'gswfluxdn'", - "\nDuring handling of the above exception, another exception occurred:\n", - "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[12], line 7\u001b[0m\n\u001b[1;32m 5\u001b[0m qc_display \u001b[38;5;241m=\u001b[39m act\u001b[38;5;241m.\u001b[39mplotting\u001b[38;5;241m.\u001b[39mTimeSeriesDisplay(ds)\n\u001b[1;32m 6\u001b[0m qc_display\u001b[38;5;241m.\u001b[39madd_subplots((\u001b[38;5;241m2\u001b[39m,), figsize \u001b[38;5;241m=\u001b[39m (\u001b[38;5;241m9.5\u001b[39m,\u001b[38;5;241m10\u001b[39m))\n\u001b[0;32m----> 7\u001b[0m qc_ax \u001b[38;5;241m=\u001b[39m \u001b[43mqc_display\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mplot\u001b[49m\u001b[43m(\u001b[49m\u001b[43mqc_variable\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msubplot_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mset_title\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mQC results on field: \u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mqc_variable\u001b[49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 8\u001b[0m qc_ax\u001b[38;5;241m.\u001b[39mgrid()\n\u001b[1;32m 9\u001b[0m qc_display\u001b[38;5;241m.\u001b[39mqc_flag_block_plot(qc_variable, subplot_index\u001b[38;5;241m=\u001b[39m(\u001b[38;5;241m1\u001b[39m,))\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/act/plotting/timeseriesdisplay.py:418\u001b[0m, in \u001b[0;36mTimeSeriesDisplay.plot\u001b[0;34m(self, field, dsname, subplot_index, cmap, set_title, add_nan, day_night_background, invert_y_axis, abs_limits, time_rng, y_rng, use_var_for_y, set_shading, assessment_overplot, overplot_marker, overplot_behind, overplot_markersize, assessment_overplot_category, assessment_overplot_category_color, force_line_plot, labels, cbar_label, cbar_h_adjust, secondary_y, y_axis_flag_meanings, colorbar_labels, cb_friendly, **kwargs)\u001b[0m\n\u001b[1;32m 415\u001b[0m assessment_overplot_category_color[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mAcceptable\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m (\u001b[38;5;241m0.0\u001b[39m, \u001b[38;5;241m0.4240129715562796\u001b[39m, \u001b[38;5;241m0.4240129715562796\u001b[39m),\n\u001b[1;32m 417\u001b[0m \u001b[38;5;66;03m# Get data and dimensions\u001b[39;00m\n\u001b[0;32m--> 418\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_obj\u001b[49m\u001b[43m[\u001b[49m\u001b[43mdsname\u001b[49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[43mfield\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 419\u001b[0m dim \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_obj[dsname][field]\u001b[38;5;241m.\u001b[39mdims)\n\u001b[1;32m 420\u001b[0m xdata \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_obj[dsname][dim[\u001b[38;5;241m0\u001b[39m]]\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/dataset.py:1439\u001b[0m, in \u001b[0;36mDataset.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1437\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39misel(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkey)\n\u001b[1;32m 1438\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m utils\u001b[38;5;241m.\u001b[39mhashable(key):\n\u001b[0;32m-> 1439\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_construct_dataarray\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1440\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m utils\u001b[38;5;241m.\u001b[39miterable_of_hashable(key):\n\u001b[1;32m 1441\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_copy_listed(key)\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/dataset.py:1350\u001b[0m, in \u001b[0;36mDataset._construct_dataarray\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 1348\u001b[0m variable \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_variables[name]\n\u001b[1;32m 1349\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[0;32m-> 1350\u001b[0m _, name, variable \u001b[38;5;241m=\u001b[39m \u001b[43m_get_virtual_variable\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_variables\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdims\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1352\u001b[0m needed_dims \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m(variable\u001b[38;5;241m.\u001b[39mdims)\n\u001b[1;32m 1354\u001b[0m coords: \u001b[38;5;28mdict\u001b[39m[Hashable, Variable] \u001b[38;5;241m=\u001b[39m {}\n", - "File \u001b[0;32m~/.conda/envs/jupyter-vaps/lib/python3.8/site-packages/xarray/core/dataset.py:186\u001b[0m, in \u001b[0;36m_get_virtual_variable\u001b[0;34m(variables, key, dim_sizes)\u001b[0m\n\u001b[1;32m 184\u001b[0m split_key \u001b[38;5;241m=\u001b[39m key\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m 185\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(split_key) \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m2\u001b[39m:\n\u001b[0;32m--> 186\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key)\n\u001b[1;32m 188\u001b[0m ref_name, var_name \u001b[38;5;241m=\u001b[39m split_key\n\u001b[1;32m 189\u001b[0m ref_var \u001b[38;5;241m=\u001b[39m variables[ref_name]\n", - "\u001b[0;31mKeyError\u001b[0m: 'gswfluxdn'" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "fd79e0ba342048999f2bf386b4218b60", - "version_major": 2, - "version_minor": 0 - }, - "image/png": "", - "text/html": [ - "\n", - "
\n", - "
\n", - " Figure\n", - "
\n", - " \n", - "
\n", - " " - ], - "text/plain": [ - "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# QC Plot\n", - "if ('qc_'+qc_variable) in ds.variables:\n", - "\n", - " # Plot\n", - " qc_display = act.plotting.TimeSeriesDisplay(ds)\n", - " qc_display.add_subplots((2,), figsize = (9.5,10))\n", - " qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n", - " qc_ax.grid()\n", - " qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n", - "\n", - " plt.show()\n", - "else:\n", - " print(f'QC not available for the selected field: {qc_variable}')\n" - ] - }, - { - "cell_type": "markdown", - "id": "2308db16-f362-4033-a11a-c3e5e75ad9ba", - "metadata": {}, - "source": [ - "## Field selection dropdown menu\n", - "Select variable to be plotted from a dropdown menu" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d5792fa8-2253-441a-8ac6-1e07a0345031", - "metadata": {}, - "outputs": [], - "source": [ - "plt.ioff()\n", - "\n", - "# populate dropdown menu with available variables \n", - "available_variables = [v for v in ds.variables if not('time' in v or v.startswith('qc_') or v.startswith('source_')) and 'long_name' in ds.variables[v].attrs]\n", - "d_variable = 'gswfluxdn_measured'\n", - "dropdown = widgets.Dropdown(\n", - " options = [(ds.variables[v].attrs['long_name'], v) for v in available_variables],\n", - " value= d_variable,\n", - " description='Field:',\n", - " disabled=False,\n", - ")\n", - "dropdown.layout.margin = '0px 30% 0px 20%'\n", - "dropdown.layout.width = '50%'\n", - "\n", - "# set up display\n", - "i_display = act.plotting.TimeSeriesDisplay(ds)\n", - "i_display.add_subplots((1,), figsize = (9.5,5))\n", - "i_ax = i_display.plot(d_variable, subplot_index=(0,), set_title=ds.variables[d_variable].attrs['long_name'],)\n", - "i_ax.grid()\n", - "i_fig = i_display.fig\n", - "\n", - "# update plot callback function\n", - "def update_plot(change):\n", - " i_ax.cla()\n", - " i_ax_new = i_display.plot(change.new, subplot_index=(0,), set_title=ds.variables[change.new].attrs['long_name'],)\n", - " i_ax_new.grid()\n", - " i_fig.canvas.draw()\n", - " i_fig.canvas.flush_events()\n", - "\n", - "dropdown.observe(update_plot, names='value')\n", - "\n", - "widgets.AppLayout(\n", - " header=dropdown,\n", - " center=i_fig.canvas,\n", - " pane_heights=[1, 6,1]\n", - ")\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "state": {}, - "version_major": 2, - "version_minor": 0 - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}