-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathgeo_process_Landsat8.py
632 lines (487 loc) · 37 KB
/
geo_process_Landsat8.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
# Process: Landsat 8 multi-spectral imagery
# Import required packages
import os, sys, subprocess
import shutil
import requests
import gzip
import pandas as pd
import datetime
import numpy as np
from math import floor, ceil
import matplotlib.pyplot as plt
# Import helper functions relevant to this script
sys.path.append('E:/mdm123/D/scripts/geo/')
from geo_helpers import extract_projection_info, get_geotiff_props, get_geotiff_projection, get_geotiff_nodatavalue, geotiff_to_array, array_to_geotiff, ls_subfolder_valid, ls8_spectral_index, create_bounded_geotiff
# List paths to GDAL scripts
gdal_warp = 'C:/Anaconda3/envs/geo/Library/bin/gdalwarp.exe'
gdal_calc = 'C:/Anaconda3/envs/geo/Scripts/gdal_calc.py'
gdal_merge = 'C:/Anaconda3/envs/geo/Scripts/gdal_merge.py'
gdal_buildvrt = 'C:/Anaconda3/envs/geo/Library/bin/gdalbuildvrt.exe'
# Define path to relevant folders
folder_srtm = 'E:/mdm123/D/data/DSM/SRTM/'
folder_dtm = 'E:/mdm123/D/data/DTM/proc/'
folder_ls8 = 'E:/mdm123/D/data/Landsat8/'
folder_archive = 'V:/mdm123/Landsat8/'
folder_fig = 'E:/mdm123/D/figures/'
# Define list of zones to be processed (separate LiDAR coverage areas)
zones = ['MRL18_WPE', 'MRL18_WVL', 'MRL18_WKW', 'MRL18_FGA', 'TSM17_STA', 'TSM17_LDM', 'TSM17_GLB', 'TSM16_ATG']
# Define dictionary to hold information relating to each zone covered by the Marlborough (2018) survey
dtm_dict = {'MRL18_WPE':{'label':'Wairau Plains East (Marlborough 2018)',
'wrs_prs':['073089'],
'survey_start':datetime.date(2018,7,28),
'survey_end':datetime.date(2018,8,9)},
'MRL18_WVL':{'label':'Wairau Valley (Marlborough 2018)',
'wrs_prs':['073089','074089'],
'survey_start':datetime.date(2018,7,28),
'survey_end':datetime.date(2018,8,9)},
'MRL18_WKW':{'label':'Picton - Waikawa (Marlborough 2018)',
'wrs_prs':['073089'],
'survey_start':datetime.date(2018,5,26),
'survey_end':datetime.date(2018,9,12)},
'MRL18_FGA':{'label':'Flaxbourne, Grassmere & Lower Awatere (Marlborough 2018)',
'wrs_prs':['073089'],
'survey_start':datetime.date(2018,5,26),
'survey_end':datetime.date(2018,9,12)},
'TSM17_STA':{'label':'St Arnaud (Tasman 2017)',
'wrs_prs':['073089','074089'],
'survey_start':datetime.date(2017,11,24),
'survey_end':datetime.date(2017,12,16)},
'TSM17_LDM':{'label':'Lee Dam (Tasman 2017)',
'wrs_prs':['073089','074089'],
'survey_start':datetime.date(2017,11,24),
'survey_end':datetime.date(2017,12,16)},
'TSM17_GLB':{'label':'Golden Bay & Farewell Spit (Tasman 2017)',
'wrs_prs':['074088'],
'survey_start':datetime.date(2017,11,24),
'survey_end':datetime.date(2017,12,16)},
'TSM16_ATG':{'label':'Abel Tasman & Golden Bay (Tasman 2016)',
'wrs_prs':['074088','074089'],
'survey_start':datetime.date(2016,12,13),
'survey_end':datetime.date(2016,12,14)}}
# Define the range of survey windows to be tested (the duration of the period for which Ls8 data should be processed, centred on the SRTM survey period)
query_windows = [60, 90, 120, 150, 180] # Number of days
# Define the number of cells of padding to add along each raster boundary
pad = 44
# Define the no_data value to be used for GeoTIFF creation
no_data_value = -9999
###############################################################################
# 1. Archive to Public folder any scenes for which cloud cover (land) > 85% #
###############################################################################
# Define a threshold for the maximum cloud coverage (over land) to be allowed
cloud_cover_max = 85
# Download the up-to-date Landsat 8 metadata from USGS
ls8_metadata_url = 'https://landsat.usgs.gov/landsat/metadata_service/bulk_metadata_files/LANDSAT_8_C1.csv.gz'
ls8_metadata_read = requests.get(ls8_metadata_url)
ls8_metadata_path = '{}metadata/{}'.format(folder_ls8, ls8_metadata_url.split('/')[-1])
with open(ls8_metadata_path, 'wb') as ls8_metadata_write:
ls8_metadata_write.write(ls8_metadata_read.content)
# Extract the compressed file (.gz) & read the CSV inside into a pandas dataframe
with gzip.open(ls8_metadata_path) as ls8_metadata_csv:
ls8_metadata = pd.read_csv(ls8_metadata_csv)
# Initialise a count of valid & invalid scenes (wrt cloud cover over land)
valid_scenes = 0
invalid_scenes = 0
# Loop through each subfolder in the Level-2 folder, checking its cloud cover over land
print('Archiving scenes for which cloud cover (over land) > {}%:'.format(cloud_cover_max))
for local_subfolder in os.listdir('{}raw/Level2'.format(folder_ls8)):
# Process further if path is to a folder (rather than a file)
if os.path.isdir('{}raw/Level2/{}'.format(folder_ls8, local_subfolder)):
# Check the cloud cover (over land) associated with that scene
cloud_cover = ls8_metadata.loc[ls8_metadata['LANDSAT_PRODUCT_ID']==local_subfolder, 'CLOUD_COVER_LAND'].values[0]
# Update scene counts based on whether cloud cover exceeds threshold or not
if cloud_cover > cloud_cover_max:
print(' - {} ({:.1f}%):'.format(local_subfolder, cloud_cover), end=' ')
# Update count of invalid scenes
invalid_scenes += 1
# Level-2 subfolder: move from Local to Public archive folder
l2_local = '{}raw/Level2/{}'.format(folder_ls8, local_subfolder)
l2_archive = '{}raw/Level2/{}'.format(folder_archive, local_subfolder)
shutil.move(l2_local, l2_archive)
print('Level-2', end=' ')
# Level-1: move the corresponding subfolder from Local to Public archive folder too
l1_local = '{}raw/Level1/{}'.format(folder_ls8, local_subfolder)
l1_archive = '{}raw/Level1/{}'.format(folder_archive, local_subfolder)
shutil.move(l1_local, l1_archive)
print('Level-1')
else:
valid_scenes += 1
print('\n{} scenes retained, with {} scenes rejected (cloud cover over land > {}%) & archived'.format(valid_scenes, invalid_scenes, cloud_cover_max))
###############################################################################
# 2. Archive redundant files to Public folder (to save space on local drive) #
###############################################################################
# Bands 1-7 are available as Level-2 products (surface reflectance) so aren't required in Level-1 folders
for local_subfolder in os.listdir('{}raw/Level1'.format(folder_ls8)):
# Process further if path is to a folder (rather than a file)
if os.path.isdir('{}raw/Level1/{}'.format(folder_ls8, local_subfolder)):
# If a corresponding subfolder doesn't exist in the Public folder, create one
archive_path = '{}raw/Level1/{}'.format(folder_archive, local_subfolder)
if not os.path.exists(archive_path): os.mkdir(archive_path)
# Move rasters corresponding to Bands 1-7
for f in os.listdir('{}raw/Level1/{}'.format(folder_ls8, local_subfolder)):
if f.lower().endswith('b1.tif') or f.lower().endswith('b2.tif') or f.lower().endswith('b3.tif') or f.lower().endswith('b4.tif') or f.lower().endswith('b5.tif') or f.lower().endswith('b6.tif') or f.lower().endswith('b7.tif'):
shutil.move('{}raw/Level1/{}/{}'.format(folder_ls8, local_subfolder, f), '{}/{}'.format(archive_path, f))
###############################################################################
# 3. Develop cloud-free composites (original grids) for all temporal windows #
###############################################################################
# Define the QA pixel values considered acceptable (relevant to SR Bands 1-7 of Level 2 products). See https://www.usgs.gov/media/files/land-surface-reflectance-code-lasrc-product-guide
pqa_acceptable_values_hq = [322, 386, 834, 898, 1346, 324, 388, 836, 900, 1348] # High quality filter: only entries for "Clear" & "Water"
pqa_acceptable_values_lq = pqa_acceptable_values_hq + [328, 352, 416, 480] # Low quality filter: also allow low-confidence cloud codes
# Define the BQA pixel values considered acceptable (relevant to Bands 8-11 of Level 1 products). See https://www.usgs.gov/land-resources/nli/landsat/landsat-collection-1-level-1-quality-assessment-band?qt-science_support_page_related_con=0#qt-science_support_page_related_con
bqa_acceptable_values_hq = [2720, 2724, 2728, 2732] # High quality filter: only entries for "Clear"
bqa_acceptable_values_lq = bqa_acceptable_values_hq + [2752, 2976, 3008, 3744, 3776] # Low quality filter: also allow low/medium cloud/shadow/cirrus codes (with no radiation saturation)
# Loop through each LiDAR survey zone
for zone in zones:
print('\nProcessing Landsat 8 imagery for {}...'.format(dtm_dict[zone]['label']))
# Create necessary result folder if it doesn't yet exist
folder_original_zone = '{}proc/cloudfree/Original/{}/'.format(folder_ls8, zone)
if not os.path.exists(folder_original_zone): os.mkdir(folder_original_zone)
# Define path to the padded GeoTIFF describing that zone's SRTM coverage
srtm_tif = '{}proc/{}/SRTM_{}_Z_Pad44.tif'.format(folder_srtm, zone, zone)
srtm_proj = get_geotiff_projection(srtm_tif)
# Define which Ls8 WRS path-rows are relevant for that zone
zone_wrs_prs = dtm_dict[zone]['wrs_prs']
# Loop through the list of possible query windows
for query_window in query_windows:
print(' - Using a {} day search window'.format(query_window))
# Define the period over which Landsat 8 data should be sought (centred on each survey's collection period)
query_window = datetime.timedelta(days=query_window)
# Determine start & end dates of that zone's LiDAR survey period
survey_start = dtm_dict[zone]['survey_start']
survey_end = dtm_dict[zone]['survey_end']
survey_window = survey_end - survey_start
# Determine search window for tiles, by buffering the start & end dates retrieved above
query_start = survey_start - (query_window - survey_window)/2
query_end = survey_end + (query_window - survey_window)/2
# Loop through all bands, setting parameters as appropriate (based on whether band is from Level-1 or Level-2 collection)
for band in range(1,12):
# Define collection level & folder containing raw data for processing & lists of QA codes for processing
level = 2 if band <= 7 else 1
folder = '{}raw/Level{}/'.format(folder_ls8, level)
print(' - Band {} (Level-{} data):'.format(band, level), end=' ')
# Assign other raster processing parameters based on collection level
if level == 2:
# Level-2 collection
band_prefix = 'sr_band'
qa_suffix = 'pixel_qa'
qa_acceptable_values_hq, qa_acceptable_values_lq = pqa_acceptable_values_hq, pqa_acceptable_values_lq
valid_from = 0.
valid_to = 10000.
elif level == 1:
# Level-1 collection
band_prefix = 'B'
qa_suffix = 'BQA'
qa_acceptable_values_hq, qa_acceptable_values_lq = bqa_acceptable_values_hq, bqa_acceptable_values_lq
valid_from = 0.
valid_to = 65535.
# Get list of rasters available in selected folder
subfolders = os.listdir(folder)
# Loop through subfolders, clipping each based on extent info extracted above, masking based on QA raster, and adding to a list of masked arrays
# Initialise lists to hold masked rasters relating to that band
band_masked_arrays_hq = []
band_masked_arrays_lq = []
# Initialise a count of the number of valid layers available
n_valid_layers = 0
# Loop through each available subfolder, extracting relevant data if it's within search window
for subfolder in subfolders:
# Process data if capture date is within search window
if ls_subfolder_valid(subfolder, zone_wrs_prs, query_start, query_end):
# Update the count of valid layers
n_valid_layers += 1
# Define paths to the band raster & its associated QA raster
band_path = '{}{}/{}_{}{}.tif'.format(folder, subfolder, subfolder, band_prefix, band)
qa_path = '{}{}/{}_{}.tif'.format(folder, subfolder, subfolder, qa_suffix)
# Get the projection properties of the band raster
band_proj = get_geotiff_projection(band_path)
band_props = get_geotiff_props(band_path)
# Project the padded GeoTIFF describing that zone's SRTM coverage into the band's CRS
srtm_projected = '{}proc/{}/SRTM_{}_Z_Pad44_Projected.tif'.format(folder_srtm, zone, zone)
if os.path.exists(srtm_projected): os.remove(srtm_projected)
project_command = [gdal_warp, '-s_srs', srtm_proj, '-t_srs', band_proj, srtm_tif, srtm_projected]
project_result = subprocess.run(project_command, stdout=subprocess.PIPE)
if project_result.returncode != 0:
print('\nProcess failed, with error message: {}\n'.format(project_result.stdout))
break
# Get the projected GeoTIFF's extent (in the CRS used by the input band)
srtm_projected_props = get_geotiff_props(srtm_projected)
os.remove(srtm_projected)
AOI_x_min = srtm_projected_props['x_min']
AOI_x_max = srtm_projected_props['x_max']
AOI_y_min = srtm_projected_props['y_min']
AOI_y_max = srtm_projected_props['y_max']
# Snap these extents to match the band raster's grid, adding a buffer of 50 cells along each edge to be safe
buffer = 50
clip_x_min = band_props['x_min'] + ceil((AOI_x_min - band_props['x_min'])/band_props['res_x'])*band_props['res_x'] - buffer*band_props['res_x']
clip_x_max = band_props['x_max'] + floor((AOI_x_max - band_props['x_max'])/band_props['res_x'])*band_props['res_x'] + buffer*band_props['res_x']
clip_y_min = band_props['y_min'] + floor((AOI_y_min - band_props['y_min'])/-band_props['res_y'])*-band_props['res_y'] - buffer*-band_props['res_y']
clip_y_max = band_props['y_max'] + ceil((AOI_y_max - band_props['y_max'])/-band_props['res_y'])*-band_props['res_y'] + buffer*-band_props['res_y']
# Clip the band raster such that its extent matches AOI but its grid alignment matches original band raster
band_clip = '{}{}/{}_{}{}_{}.tif'.format(folder, subfolder, subfolder, band_prefix, band, zone)
warp_band_command = [gdal_warp, '-overwrite', band_path, band_clip, '-s_srs', band_props['proj'], '-t_srs', band_props['proj'], '-tr', str(band_props['res_x']), str(-band_props['res_y']), '-te', str(clip_x_min), str(clip_y_min), str(clip_x_max), str(clip_y_max), '-te_srs', band_props['proj'], '-r', 'near', '-dstnodata', '-9999']
subprocess.call(warp_band_command)
# Clip the QA raster too, to be used as the mask
qa_clip = '{}{}/{}_{}_{}.tif'.format(folder, subfolder, subfolder, qa_suffix, zone)
warp_qa_command = [gdal_warp, '-overwrite', qa_path, qa_clip, '-s_srs', band_props['proj'], '-t_srs', band_props['proj'], '-tr', str(band_props['res_x']), str(-band_props['res_y']), '-te', str(clip_x_min), str(clip_y_min), str(clip_x_max), str(clip_y_max), '-te_srs', band_props['proj'], '-r', 'near', '-dstnodata', '-9999']
subprocess.call(warp_qa_command)
# Read in arrays of the clipped band raster & the clipped QA raster (to use as a mask)
band_array = geotiff_to_array(band_clip)
qa_array = geotiff_to_array(qa_clip)
# Check the no_data_value used by the input band raster
band_nodatavalue = get_geotiff_nodatavalue(band_clip)
# Mask the band array wherever the band_nodatavalue is present
band_masked_array = np.ma.masked_where(band_array==band_nodatavalue, band_array)
# Restrict to valid range & rescale
# Replace values below lower bound with no_data_value
band_masked_array[band_masked_array < valid_from] = no_data_value
# Replace values above upper bound with no_data_value
band_masked_array[band_masked_array > valid_to] = no_data_value
# Re-mask, using the newly applied no_data_value
band_masked_array = np.ma.masked_where(band_masked_array==no_data_value, band_masked_array)
# Rescale values by appropriate factor
band_masked_array = band_masked_array / valid_to
# Mask the band array using the HQ version of the QA array
band_masked_array_hq = np.ma.masked_where(~np.isin(qa_array, qa_acceptable_values_hq), band_masked_array)
band_masked_arrays_hq.append(band_masked_array_hq)
del band_masked_array_hq
# Set up a second masked band array, using the LQ version of the QA array (for gap-filling later)
band_masked_array_lq = np.ma.masked_where(~np.isin(qa_array, qa_acceptable_values_lq), band_masked_array)
band_masked_arrays_lq.append(band_masked_array_lq)
del band_masked_array_lq
# Clean up arrays no longer needed
del band_array, qa_array, band_masked_array
print('{} images available'.format(n_valid_layers), end=' ')
# Calculate median of the HQ array list representing data for the selected band
band_masked_arrays_hq_stack = np.ma.array(band_masked_arrays_hq)
band_masked_median_hq = np.ma.median(band_masked_arrays_hq_stack, axis=0, overwrite_input=True)
del band_masked_arrays_hq, band_masked_arrays_hq_stack
# Calculate median of the LQ array list representing data for the selected band
band_masked_arrays_lq_stack = np.ma.array(band_masked_arrays_lq)
band_masked_median_lq = np.ma.median(band_masked_arrays_lq_stack, axis=0)
del band_masked_arrays_lq, band_masked_arrays_lq_stack
# Save results in which the LQ results are used to gap-fill the HQ results
# Retrieve the mask derived from the HQ filter
mask_hq = np.ma.getmask(band_masked_median_hq)
# Make copies of the HQ median results
band_masked_median_gapfill = np.ma.copy(band_masked_median_hq)
# Try to fill in masked values using results from the LQ results, where possible
band_masked_median_gapfill[mask_hq] = band_masked_median_lq[mask_hq]
del band_masked_median_hq, band_masked_median_lq, mask_hq
# Fill remaining gaps with the same no_data value used previously
band_masked_median_gapfill = band_masked_median_gapfill.filled(fill_value=no_data_value)
# Update the dictionary describing band raster properties
band_props['x_min'] = clip_x_min
band_props['x_max'] = clip_x_max
band_props['y_min'] = clip_y_min
band_props['y_max'] = clip_y_max
band_props['width'] = int((clip_x_max - clip_x_min)/band_props['res_x'])
band_props['height'] = int((clip_y_max - clip_y_min)/-band_props['res_y'])
# Save these new arrays to TIF files
tif_median_path_gapfill = '{}LS8_L{}_B{}_{}d_GapFill_Median_{}.tif'.format(folder_original_zone, level, band, str(query_window.days).zfill(3), zone)
array_to_geotiff(band_masked_median_gapfill, tif_median_path_gapfill, no_data_value, band_props)
del band_masked_median_gapfill, band_props
print('DONE')
###############################################################################
# 4. Calculate spectral index products, using original composite rasters #
###############################################################################
# Define the list of spectral index products to be generated
spectral_index_products = ['NDVI','EVI','AVI','SAVI','MSAVI','SI','BSI','NDMI','MNDWI','AWEInsh','AWEIsh','NDBI']
# Loop through all available survey zones
for zone in zones:
print('\nCalculating Landsat 8 spectral indices for {}...'.format(dtm_dict[zone]['label']))
folder_ls8_zone = '{}proc/cloudfree/Original/{}'.format(folder_ls8, zone)
# Loop through all considered query windows
for query_window in query_windows:
print(' - Indices based on {}d window:'.format(query_window), end=' ')
# Define paths to relevant band rasters
band_2_path = '{}/LS8_L2_B2_{}d_GapFill_Median_{}.tif'.format(folder_ls8_zone, str(query_window).zfill(3), zone)
band_3_path = '{}/LS8_L2_B3_{}d_GapFill_Median_{}.tif'.format(folder_ls8_zone, str(query_window).zfill(3), zone)
band_4_path = '{}/LS8_L2_B4_{}d_GapFill_Median_{}.tif'.format(folder_ls8_zone, str(query_window).zfill(3), zone)
band_5_path = '{}/LS8_L2_B5_{}d_GapFill_Median_{}.tif'.format(folder_ls8_zone, str(query_window).zfill(3), zone)
band_6_path = '{}/LS8_L2_B6_{}d_GapFill_Median_{}.tif'.format(folder_ls8_zone, str(query_window).zfill(3), zone)
band_7_path = '{}/LS8_L2_B7_{}d_GapFill_Median_{}.tif'.format(folder_ls8_zone, str(query_window).zfill(3), zone)
# Read data arrays for each into memory
b2, b3, b4, b5, b6, b7 = [geotiff_to_array(band_path) for band_path in [band_2_path, band_3_path, band_4_path, band_5_path, band_6_path, band_7_path]]
b_props = get_geotiff_props(band_2_path)
# Loop through all spectral index products
for product in spectral_index_products:
# Define output path
product_path = '{}/LS8_{}_{}d_{}.tif'.format(folder_ls8_zone, product, str(query_window).zfill(3), zone)
# Use Landsat 8 function to calculate the spectral index (including gap-filling)
with np.errstate(divide='ignore'): # Divide by zero issues are handled already - suppressing warnings
ls8_spectral_index(b2, b3, b4, b5, b6, b7, product, b_props, product_path, no_data_value)
print(product, end=' ')
# Build a VRT (Virtual Dataset) containing the R,G,B bands - for visualisation as a True Colour Image (TCI)
vrt_path = '{}/LS8_TCI_{}d_{}.vrt'.format(folder_ls8_zone, str(query_window).zfill(3), zone)
vrt_command = [gdal_buildvrt, '-separate', vrt_path] + ['{}/LS8_L2_B{}_{}d_GapFill_Median_{}.tif'.format(folder_ls8_zone, band, str(query_window).zfill(3), zone) for band in ['4','3','2']]
vrt_result = subprocess.run(vrt_command, stdout=subprocess.PIPE)
if vrt_result.returncode != 0:
print(vrt_result.stdout)
break
print('TCI')
###############################################################################
# 5. Resample cloud-free composites (all time windows) to match SRTM grids #
###############################################################################
# The 'cubicspline' resampling method was found to provide the best results based on visual comparisons
resampling = 'cubicspline'
# Loop through all zones, resampling all available Landsat 8 imagery to arrays coincident with the SRTM arrays
for zone in zones:
print('\nProcessing Landsat 8 data for {}...'.format(dtm_dict[zone]['label']))
# 5a. Read the appropriate SRTM DSM raster into memory & retrieve its properties
print(' - Analysing zonal SRTM raster to align grids...')
srtm_filename = '{}proc/{}/SRTM_{}_Z.tif'.format(folder_srtm, zone, zone)
srtm_proj, srtm_res_x, srtm_res_y, srtm_x_min, srtm_x_max, srtm_y_min, srtm_y_max, srtm_width, srtm_height = extract_projection_info(srtm_filename)
# Define a new bounding box, including the padding required for the 2D convnet data pre-processing
pad_x_min = srtm_x_min - pad*srtm_res_x
pad_x_max = srtm_x_max + pad*srtm_res_x
pad_y_min = srtm_y_min - pad*-srtm_res_y
pad_y_max = srtm_y_max + pad*-srtm_res_y
pad_width = srtm_width + 2*pad
pad_height = srtm_height + 2*pad
# Create new folder for resampled rasters, if it doesn't exist already
folder_ls8_zone = '{}proc/cloudfree/Resampled/{}'.format(folder_ls8, zone)
if not os.path.exists(folder_ls8_zone):
os.makedirs(folder_ls8_zone)
# 5b. Resample each band, setting parameters as appropriate (based on whether band is from Level-1 or Level-2 collection)
for band in range(1,12):
# Define collection level
level = 2 if band <= 7 else 1
print(' - Band {} (Level-{} data):'.format(band, level), end=' ')
# Loop through the processed results, for the various time windows trialled
for query_window in query_windows:
# Open band raster and extract its coordinate reference system (CRS)
ls8_tile_path = '{}proc/cloudfree/Original/{}/LS8_L{}_B{}_{}d_GapFill_Median_{}.tif'.format(folder_ls8, zone, level, band, str(query_window).zfill(3), zone)
ls8_proj = get_geotiff_projection(ls8_tile_path)
# Warp band raster to WGS84, aligning with padded SRTM raster
ls8_resample_pad = '{}/LS8_B{}_{}d_{}_Pad44.tif'.format(folder_ls8_zone, band, str(query_window).zfill(3), zone)
warp_command_pad = [gdal_warp, '-overwrite', ls8_tile_path, ls8_resample_pad, '-s_srs', ls8_proj, '-t_srs', srtm_proj, '-tr', str(srtm_res_x), str(-srtm_res_y), '-te', str(pad_x_min), str(pad_y_min), str(pad_x_max), str(pad_y_max), '-te_srs', srtm_proj, '-r', resampling, '-dstnodata', str(no_data_value)]
warp_result_pad = subprocess.run(warp_command_pad, stdout=subprocess.PIPE)
if warp_result_pad.returncode != 0:
print(warp_result_pad.stdout)
break
# Now clip that padded raster to the unpadded extent (i.e. same as SRTM/DTM zone raster)
ls8_resample = '{}/LS8_B{}_{}d_{}.tif'.format(folder_ls8_zone, band, str(query_window).zfill(3), zone)
warp_command = [gdal_warp, '-overwrite', ls8_resample_pad, ls8_resample, '-s_srs', srtm_proj, '-t_srs', srtm_proj, '-tr', str(srtm_res_x), str(-srtm_res_y), '-te', str(srtm_x_min), str(srtm_y_min), str(srtm_x_max), str(srtm_y_max), '-te_srs', srtm_proj, '-r', 'near', '-dstnodata', str(no_data_value)]
warp_result = subprocess.run(warp_command, stdout=subprocess.PIPE)
if warp_result.returncode != 0:
print(warp_result.stdout)
break
print('DONE')
# 5c. Resample each index product
for product in spectral_index_products:
print(' - {} spectral index:'.format(product), end=' ')
# Loop through the processed results, for the three time windows trialled
for query_window in query_windows:
# Open product raster and extract its coordinate reference system (CRS)
ls8_tile_path = '{}proc/cloudfree/Original/{}/LS8_{}_{}d_{}.tif'.format(folder_ls8, zone, product, str(query_window).zfill(3), zone)
ls8_proj = get_geotiff_projection(ls8_tile_path)
# Warp band raster to WGS84, aligning with padded SRTM raster
ls8_resample_pad = '{}/LS8_{}_{}d_{}_Pad44.tif'.format(folder_ls8_zone, product, str(query_window).zfill(3), zone)
warp_command_pad = [gdal_warp, '-overwrite', ls8_tile_path, ls8_resample_pad, '-s_srs', ls8_proj, '-t_srs', srtm_proj, '-tr', str(srtm_res_x), str(-srtm_res_y), '-te', str(pad_x_min), str(pad_y_min), str(pad_x_max), str(pad_y_max), '-te_srs', srtm_proj, '-r', resampling, '-dstnodata', str(no_data_value)]
warp_result_pad = subprocess.run(warp_command_pad, stdout=subprocess.PIPE)
if warp_result_pad.returncode != 0:
print(warp_result_pad.stdout)
break
# Now clip that padded raster to the unpadded extent (i.e. same as SRTM/DTM zone raster)
ls8_resample = '{}/LS8_{}_{}d_{}.tif'.format(folder_ls8_zone, product, str(query_window).zfill(3), zone)
warp_command = [gdal_warp, '-overwrite', ls8_resample_pad, ls8_resample, '-s_srs', srtm_proj, '-t_srs', srtm_proj, '-tr', str(srtm_res_x), str(-srtm_res_y), '-te', str(srtm_x_min), str(srtm_y_min), str(srtm_x_max), str(srtm_y_max), '-te_srs', srtm_proj, '-r', 'near', '-dstnodata', str(no_data_value)]
warp_result = subprocess.run(warp_command, stdout=subprocess.PIPE)
if warp_result.returncode != 0:
print(warp_result.stdout)
break
print('DONE')
###############################################################################
# 6. Compare band/product value distribution across all available zones #
###############################################################################
# Loop through all available resampled rasters (bands & products)
for raster in ['B{}'.format(b) for b in range(1,12)] + spectral_index_products:
print('\nProcessing {} raster...'.format(raster), end=' ')
# Loop through all available temporary search windows
for query_window in query_windows:
# Initialise a figure to show that raster's data distribution for all zones
fig, axes = plt.subplots(nrows=len(zones), sharex=True, figsize=(9,15))
# Loop through all available zones
for i, zone in enumerate(zones):
# Define path to appropriate raster & retrieve its values
raster_path = '{}proc/cloudfree/Resampled/{}/LS8_{}_{}d_{}.tif'.format(folder_ls8, zone, raster, str(query_window).zfill(3), zone)
raster_array = geotiff_to_array(raster_path)
raster_values = [val for val in raster_array.flatten() if val != no_data_value]
# Show that zone's data distribution using a histogram & label figure with zone name
axes[i].hist(raster_values, bins=100)
axes[i].annotate(zone, xy=(0.99, 0.96), xycoords='axes fraction', ha='right', va='top')
# Show extreme values using dashed red lines
axes[i].axvline(x=min(raster_values), color='red', linestyle='dashed', alpha=0.3)
axes[i].axvline(x=max(raster_values), color='red', linestyle='dashed', alpha=0.3)
# Finalise figure
fig.suptitle('Distribution of {} values across all zones ({} day window)'.format(raster, query_window), fontweight='bold')
fig.tight_layout()
fig.subplots_adjust(top=0.96)
fig.savefig('{}All/Distributions/Landsat8/{}_{}d.png'.format(folder_fig, raster, str(query_window).zfill(3)), dpi=300)
plt.close()
print('{}d'.format(query_window), end=' ')
###############################################################################
# 7. Bound all bands & appropriate spectral indices, to truncate artefacts #
###############################################################################
# Define a dictionary of bounds for each spectral index product
bounds_dict = {'NDVI':(-1., 1.), # Source: https://www.usgs.gov/land-resources/nli/landsat/landsat-normalized-difference-vegetation-index
'EVI':(-1., 1.), # Source: https://www.usgs.gov/land-resources/nli/landsat/landsat-enhanced-vegetation-index
'AVI':(-1., 1.),
'SAVI':(-1., 1.), # Source: https://www.usgs.gov/land-resources/nli/landsat/landsat-soil-adjusted-vegetation-index
'MSAVI':(-1., 1.), # Source: https://www.usgs.gov/land-resources/nli/landsat/landsat-modified-soil-adjusted-vegetation-index
'SI':(0., 1.), # Source: based on equation used
'BSI':(-1., 1.), # Source: based on equation used
'NDMI':(-1., 1.), # Source: https://www.usgs.gov/land-resources/nli/landsat/normalized-difference-moisture-index
'MNDWI':(-1., 1.), # Source: Xu 2006
'AWEInsh':(-7., 4.), # Source: based on equation used
'AWEIsh':(-3.25, 3.5), # Source: based on equation used
'NDBI':(-1., 1.)} # Source: based on equation used
# Add bounds for all bands too
for band in range(1,12):
bounds_dict['B{}'.format(band)] = (0., 1.)
# Loop through all bands & spectral index products
for raster in ['B{}'.format(b) for b in range(1,12)] + spectral_index_products:
print('\nBounding {} raster...'.format(raster), end=' ')
# Retrieve the bounds defined for that raster (if any)
bounds = bounds_dict[raster]
lower_bound = bounds[0]
upper_bound = bounds[1]
# Loop through all available temporary search windows
for query_window in query_windows:
# Loop through all available zones
for i, zone in enumerate(zones):
# Loop through both versions of each raster (padded & unpadded)
for padding in ['', '_Pad44']:
# Define path to that raster GeoTIFF
raster_unbounded = '{}proc/cloudfree/Resampled/{}/LS8_{}_{}d_{}{}.tif'.format(folder_ls8, zone, raster, str(query_window).zfill(3), zone, padding)
raster_bounded = '{}proc/cloudfree/Resampled/{}/LS8_{}_{}d_{}{}_Bounded.tif'.format(folder_ls8, zone, raster, str(query_window).zfill(3), zone, padding)
# Create a bounded GeoTIFF, using the helper function defined
with np.errstate(invalid='ignore'):
create_bounded_geotiff(raster_unbounded, raster_bounded, lower_bound, upper_bound, no_data_value)
print('{}d'.format(query_window), end=' ')
###############################################################################
# 8. Compare BOUNDED band/product value distribution for all available zones #
###############################################################################
# Loop through all available resampled rasters (bands & products)
for raster in ['B{}'.format(b) for b in range(1,12)] + spectral_index_products:
print('\nProcessing {} raster...'.format(raster), end=' ')
# Loop through all available temporary search windows
for query_window in query_windows:
# Initialise a figure to show that raster's data distribution for all zones
fig, axes = plt.subplots(nrows=len(zones), sharex=True, figsize=(9,15))
# Loop through all available zones
for i, zone in enumerate(zones):
# Define path to appropriate raster & retrieve its values
raster_path = '{}proc/cloudfree/Resampled/{}/LS8_{}_{}d_{}_Bounded.tif'.format(folder_ls8, zone, raster, str(query_window).zfill(3), zone)
raster_array = geotiff_to_array(raster_path)
raster_values = raster_array[raster_array != no_data_value].flatten()
# Show that zone's data distribution using a histogram & label figure with zone name
axes[i].hist(raster_values, bins=100, color='green', alpha=0.5)
axes[i].annotate(zone, xy=(0.99, 0.96), xycoords='axes fraction', ha='right', va='top')
# Show extreme values using dashed red lines
axes[i].axvline(x=min(raster_values), color='red', linestyle='dashed', alpha=0.3)
axes[i].axvline(x=max(raster_values), color='red', linestyle='dashed', alpha=0.3)
# Finalise figure
fig.suptitle('Distribution of {} values (bounded) across all zones ({} day window)'.format(raster, query_window), fontweight='bold')
fig.tight_layout()
fig.subplots_adjust(top=0.96)
fig.savefig('{}All/Distributions/Landsat8/{}_{}d_Bounded.png'.format(folder_fig, raster, str(query_window).zfill(3)), dpi=300)
plt.close()
print('{}d'.format(query_window), end=' ')