-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcreate_data_description_tables.py
244 lines (159 loc) · 6.98 KB
/
create_data_description_tables.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
# Copyright (C) 2020 by
# Dominik Traxl <[email protected]>
# All rights reserved.
# MIT license.
import os
import pandas as pd
# file system
cwd = os.getcwd()
def convert_to_different_formats(df, name):
# create dataframe
df = pd.DataFrame(data=df)
# to markdown
df_md = df.copy()
for col in df_md.columns:
df_md[col] = df_md[col].str.replace('*', '*', regex=True)
# to other
# ..
# set index
df_md.set_index('Name', inplace=True)
# store as markdown file
with open(os.path.join(cwd, name + '.md'), 'w') as f:
df_md.to_markdown(f)
# ----------------------------------------------------------------------------
# v.h5
v = {'Name':
['lat', 'lon', 'x', 'y', 'H', 'V', 'i', 'j', 'dtime', 'conf',
'maxFRP', 'satellite', 'neigh', 't', 'country', 'continent', 'neigh_int',
'gl', 'cp'],
'Description':
['location latitude',
'location longitude',
'x-coordinate on global sinusoidal MODIS grid',
'y-coordinate on global sinusoidal MODIS grid',
'horizontal MODIS tile coordinate',
'vertical MODIS tile coordinate',
'row coordinate of the grid cell within MODIS tile (H, V)',
'column coordinate of the grid cell within MODIS tile (H, V)',
'date (YYYY-MM-DD)',
'detection confidence [7: low, 8: nominal, 9: high]',
'maximum fire radiative power',
'which satellite detected the fire [MOD, MYD, both]',
'string representation of "neigh_int"',
'days since 2002-01-01',
'country of occurrence',
'continent of occurrence',
'minimum of fire pixel classes of neighboring grid cells',
'location ID on the global sinusoidal MODIS grid',
'component membership label'],
'Unit':
['degress', 'degrees', '-', '-', '-', '-', '-', '-', '-', '-', 'MW*10',
'-', '-', 'days since 2002-01-01', '-', '-', '-', '-', '-'],
'Valid Range':
['[-180, 180]', '[-90, 90]', '[0, 36*1200-1]', '[0, 18*1200-1]',
'[0, 35]', '[0, 17]', '[0, 1199]', '[0, 1199]', '>= 2002-01-01',
'[7, 9]', '>= 0', '-', '-', '>= 0', '-', '-', '[0, 9]',
'[0, 36*1200*18*1200-1]', '>= 0'],
'Data Type':
['float64', 'float64', 'uint16', 'uint16', 'uint8', 'uint8', 'uint16',
'uint16', 'datetime64', 'uint8', 'int32', 'string', 'string', 'uint16',
'string', 'string', 'uint8', 'uint32', 'int64']}
# ----------------------------------------------------------------------------
# v_lc.h5
v_lc = {'Name':
['lc1', 'lc2', 'lc3', 'lc4', 'dtime'],
'Description':
['land cover type of subpixel 1 (numerical)',
'land cover type of subpixel 2 (numerical)',
'land cover type of subpixel 3 (numerical)',
'land cover type of subpixel 4 (numerical)',
'date (YYYY-MM-DD)'],
'Unit':
['-', '-', '-', '-', '-'],
'Valid Range':
['[0, 255]', '[0, 255]', '[0, 255]', '[0, 255]', '>= 2002-01-01'],
'Data Type':
['uint8', 'uint8', 'uint8', 'uint8', 'datetime64']}
# ----------------------------------------------------------------------------
# cp.h5
cp = {'Name':
['cp', 'n_nodes', 't_min', 't_max', 'dtime_min', 'dtime_max', 'lat_mean',
'lon_mean', 'maxFRP_mean', 'maxFRP_sum', 'neigh_int_min', 'neigh_min',
'duration', 'unique_gls', 'area', 'expansion', 'country', 'continent'],
'Description':
['component index',
'number of constituent fire events',
'ignition date (days since 2002-01-01)',
'extinction date (days since 2002-01-01)', 'ignition date (YYYY-MM-DD)',
'extinction date (YYYY-MM-DD)', 'mean location latitude',
'mean location longitude', 'mean maximum fire radiative power',
'sum of maximum fire radiative powers',
'minimum of "neigh_int" values of constituent fire events',
'string representation of "neigh_int_min"', 'fire duration',
'number of grid locations burnt', 'total area burnt',
'average daily fire expansion',
'country of occurrence', 'continent of occurrence'],
'Unit':
['-', '-', 'days since 2002-01-01', 'days since 2002-01-01', '-', '-',
'degrees', 'degrees', 'MW*10', 'MW*10', '-', '-', 'days', '-', 'km^2',
'km^2 day^-1', '-', '-'],
'Valid Range':
['>= 0', '>= 1', '>= 0', '>= 0', '>= 2002-01-01', '>= 2002-01-01',
'[-180, 180]', '[-90, 90]', '>= 0', '>= 0', '[0, 9]', '-', '>= 1',
'>= 1', '>= 0.86 (1 MODIS pixel)', '> 0', '-', '-'],
'Data Type':
['int64', 'int64', 'uint16', 'uint16', 'datetime64', 'datetime64',
'float64', 'float64', 'float64', 'float64', 'uint8', 'string', 'uint16',
'uint32', 'float64', 'float64', 'string', 'string']}
# ----------------------------------------------------------------------------
# cp_lc.h5
cp_lc = {'Name':
['cp', 'dlc', 'lc_X', 'plc_X', 'flc_X', 'dtime_min'],
'Description':
['component index',
'dominant land cover type*',
'number of subpixels burnt belonging to land cover X',
'proportion of subpixels burnt belonging to land cover X',
'number of ignition subpixels belonging to land cover X',
'ignition date (YYYY-MM-DD)'],
'Unit':
['-', '-', '-', '-', '-', '-'],
'Valid Range':
['>= 0', '-', '>= 0', '[0, 1]', '>= 0', '>= 2002-01-01'],
'Data Type':
['int64', 'string', 'int64', 'float64', 'int64', 'datetime64']}
# ----------------------------------------------------------------------------
# cp_poly
cp_poly = {'Name':
['cp', 'area', 'perimeter', 'geometry'],
'Description':
['component index', 'total area burnt',
'final perimeter',
'(Multi)Polygon vector data of spatiotemporal fire component'],
'Unit':
['-', 'km^2', 'km', '-'],
'Valid Range':
['>= 0', '>= 0.86 (1 MODIS pixel)', '>= 3.71 (1 MODIS pixel)', '-'],
'Data Type':
['int64', 'float64', 'float64', 'GeometryDtype'],}
# ----------------------------------------------------------------------------
# cpt_poly
cpt_poly = {'Name':
['cp', 't', 'area', 'perimeter', 'geometry'],
'Description':
['component index', 'days since 2002-01-01',
'total area burnt', 'perimeter at given day',
'(Multi)Polygon vector data of spatiotemporal fire component'],
'Unit':
['-', 'days since 2002-01-01', 'km^2', 'km', '-'],
'Valid Range':
['>= 0', '>= 0', '>= 0.86 (1 MODIS pixel)',
'>= 3.71 (1 MODIS pixel)', '-'],
'Data Type':
['int64', 'int64', 'float64', 'float64', 'GeometryDtype'],}
# ----------------------------------------------------------------------------
# convert
dfs = [v, v_lc, cp, cp_lc, cp_poly, cpt_poly]
names = ['v', 'v_lc', 'cp', 'cp_lc', 'cp_poly', 'cpt_poly']
for df, name in zip(dfs, names):
convert_to_different_formats(df, name)