-
Notifications
You must be signed in to change notification settings - Fork 0
/
SICE_processing.py
253 lines (183 loc) · 8.59 KB
/
SICE_processing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
# -*- coding: utf-8 -*-
"""
@author: Adrien Wehrlé, Jason E. Box, GEUS (Geological Survey of Denmark and Greenland)
Computation of an empirical Broandband Albedo (BBA) from Top of Atmosphere
reflectances (r_TOA), further combined with planar shortwave broadband albedo
values when the latter are below bare ice albedo (0.565).
Application of a temporal filtering based on outlier detection modified after
Box et al, 2017 (OD='B') or Wehrle et al, 2020 (OD='W').
Production of a daily cumulative ("gapless") product (updating pixel values
when an area is considered cloud free).
"""
import glob
import rasterio
import numpy as np
import os
import pickle
import time
from multiprocessing import Pool, freeze_support
import warnings
warnings.filterwarnings("ignore")
# folder containing SICE outputs
SICE_path = "H:/SICE/"
# folder to store outputs
output_path = "H:/SICE_PP/"
# list SICE folders of a given year
year = 2019
SICE_folders = list(sorted(glob.glob(SICE_path + str(year) + "*/")))
# keep only folder where needed r_TOAs and planar BBA are available
SICE_folders_av = [
folder
for folder in SICE_folders
if os.path.isfile(folder + "albedo_bb_planar_sw.tif")
and os.path.isfile(folder + "r_TOA_01.tif")
and os.path.isfile(folder + "r_TOA_06.tif")
and os.path.isfile(folder + "r_TOA_17.tif")
and os.path.isfile(folder + "r_TOA_21.tif")
]
# list SICE planar broadband albedo
planar_BBA_files = [path + "albedo_bb_planar_sw.tif" for path in SICE_folders_av]
# load an example of raster
ex = rasterio.open(planar_BBA_files[0]).read(1)
# load profile to further save outputs
profile = rasterio.open(planar_BBA_files[0]).profile
# parameters for temporal filtering
deviation_threshold = (
0.15 # compute temporal average for deviations below deviation_threshold
)
rolling_window = 10 # center value +-rolling_window days (11 days)
limit_valid_days = (
4 # need at least limit_valid_days valid days to compute temporal average
)
def SICE_processing(k, OD="W"):
"""
Compute an empirical Broandband Albedo (empirical_BBA), combine with SICE
planar shortwave broadband albedo (planar_BBA) when below bare ice albedo
(0.565) and apply a temporal filtering based on outlier detection.
INPUTS:
k: iterator from zero to the number of available SICE folders [int]
OD: use outlier detection after Box et al, 2017 (B) or
Wehrle et al, 2020 (W) [str]
OUTPUTS:
filtered_BBAs[date]: filtered broadband albedo combination at the date
associated with the iterator k, stored in filtered_BBAs
dictionnary [array]
"""
# load several rasters into a 3D list
def load_rasters(files):
data = [rasterio.open(file).read(1) for file in files]
return data
# do not compute around boundaries
if k > rolling_window / 2 and k < len(planar_BBA_files) - rolling_window / 2:
# initialize 3D matrix
BBAs_window = np.zeros((np.shape(ex)[0], np.shape(ex)[1], rolling_window + 1))
BBAs_window[:] = np.nan
# compute empirical albedo rasters, combine with planar albedo and stack
# resulting rasters k within rolling_window in 3D matrix
for w, j in enumerate(
range(k - int(rolling_window / 2), k + int(rolling_window / 2 + 1))
):
r_TOA_files = [
SICE_folders_av[j] + var
for var in [
"r_TOA_01.tif",
"r_TOA_06.tif",
"r_TOA_17.tif",
"r_TOA_21.tif",
]
]
r_TOAs = load_rasters(r_TOA_files)
r_TOAs_combination = np.nanmean(r_TOAs, axis=0) / 4
empirical_BBA = 0.901 * r_TOAs_combination + 0.124
planar_BBA = rasterio.open(planar_BBA_files[j]).read(1)
planar_BBA[(planar_BBA < 0) & (planar_BBA > 1)] = np.nan
# integrate empirical_BBA when planar_BBA values are below bare ice albedo
planar_BBA[planar_BBA <= 0.565] = empirical_BBA[planar_BBA <= 0.565]
BBAs_window[:, :, w] = planar_BBA
if OD == "B":
# compute median for each pixel along rolling_window
median_window = np.nanmedian(BBAs_window, 2, keepdims=True)
# compute deviations from median for each pixel along rolling_window
deviations = np.abs((BBAs_window - median_window) / median_window)
# count valid days for each pixel along rolling_window
nb_valid_days = np.sum(deviations < deviation_threshold, axis=2)
# exclude invalid cases
BBAs_window[deviations > deviation_threshold] = np.nan
elif OD == "W":
# load albedo raster at the center of rolling_window
BBA_center = BBAs_window[:, :, int(rolling_window / 2)]
# compute median for each pixel time series
median_window = np.nanmedian(BBAs_window, axis=2)
# per-pixel deviations within rolling_window
deviations = np.abs((BBA_center - median_window) / median_window)
date = planar_BBA_files[k].split(os.sep)[-2]
filtered_BBA = np.zeros((np.shape(ex)[0], np.shape(ex)[1]))
filtered_BBA[:] = np.nan
if OD == "B":
# store albedo pixel in filtered_BBAs if nb_valid_days is above limit_valid_days
filtered_BBA[nb_valid_days > limit_valid_days] = np.nanmean(
BBAs_window, axis=2
)[nb_valid_days > limit_valid_days]
if OD == "W":
# store albedo pixel in filtered_BBAs if median deviation is lower than deviation_threshold
filtered_BBA[deviations < deviation_threshold] = np.nanmean(
BBAs_window, axis=2
)[deviations < deviation_threshold]
else:
filtered_BBA = None
date = None
print(k, "/", len(planar_BBA_files))
return filtered_BBA, date
if __name__ == "__main__":
# initialize a dictionnary thereafter containing dates as keys
# and daily filtered BBAs as values
filtered_BBAs = {}
# computer cores to use for multiprocessing
nb_cores = 5
# save start time to compute processing time
start_time = time.time()
start_local_time = time.ctime(start_time)
print("SICE_processing...")
freeze_support()
with Pool(nb_cores) as p:
for f_BBA, date in p.map(SICE_processing, range(0, len(SICE_folders_av))):
if f_BBA is not None:
filtered_BBAs[date] = f_BBA
print(list(filtered_BBAs.keys()))
end_time = time.time()
end_local_time = time.ctime(end_time)
processing_time = (end_time - start_time) / 60
print("--- Processing time: %s minutes ---" % processing_time)
print("--- Start time: %s ---" % start_local_time)
print("--- End time: %s ---" % end_local_time)
# save filtered_BBAs dictionnary in a pickle file
file_name = "H:/SICE_filtered_BBA_4_02_10rm" + str(year) + ".pkl"
f = open(file_name, "wb")
pickle.dump(filtered_BBAs, f)
f.close()
# how to load a pkl file
# with open(file_name, 'rb') as f:
# filtered_BBAs = pickle.load(f)
# use first month average as initialization of the gapless product
fm = list(filtered_BBAs.keys())[0].split("-")[1]
fm_dates = [key for key in list(filtered_BBAs.keys()) if key.split("-")[1] == fm]
fm_BBA = np.zeros((np.shape(ex)[0], np.shape(ex)[1], len(fm_dates)))
for op, date in enumerate(fm_dates):
fm_BBA[:, :, op] = filtered_BBAs[date]
BBA_initialization = np.nanmean(fm_BBA, axis=2)
# initialize a dictionnary thereafter containing dates as keys
# and daily cumulative filtered BBAs as values
filtered_BBAs_cumulative = {}
# forward gap-filling
for i, key in enumerate(filtered_BBAs):
if i == 0:
filtered_BBAs_cumulative[key] = BBA_initialization
valid = [(filtered_BBAs[key] > 0) & (filtered_BBAs[key] < 1)]
filtered_BBAs_cumulative[key][valid] = filtered_BBAs[key][valid]
cuml = filtered_BBAs_cumulative[key]
else:
valid = [(filtered_BBAs[key] > 0) & (filtered_BBAs[key] < 1)]
cuml[valid] = filtered_BBAs[key][valid]
filtered_BBAs_cumulative[key] = cuml
with rasterio.open(output_path + key + ".tif", "w", **profile) as dst:
dst.write(filtered_BBAs_cumulative[key].astype(np.float32), 1)