diff --git a/docs/source/notebooks/TutorialNotebook.ipynb b/docs/source/notebooks/TutorialNotebook.ipynb index 1cb3163..238726f 100644 --- a/docs/source/notebooks/TutorialNotebook.ipynb +++ b/docs/source/notebooks/TutorialNotebook.ipynb @@ -25,9 +25,18 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 37, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], "source": [ "# Add ldcpy root to system path\n", "import sys\n", @@ -125,7 +134,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 38, "metadata": {}, "outputs": [ { @@ -205,7 +214,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 39, "metadata": {}, "outputs": [ { @@ -604,7 +613,7 @@ " cell_measures: area: cell_area\n", " data_type: cam-fv\n", " file_size: {'orig': 10622798, 'comp': 2994433}\n", - " weighted: True
array([-90. , -89.057592, -88.115183, -87.172775, -86.230366, -85.287958,\n", + " weighted: True
array([-90. , -89.057592, -88.115183, -87.172775, -86.230366, -85.287958,\n", " -84.34555 , -83.403141, -82.460733, -81.518325, -80.575916, -79.633508,\n", " -78.691099, -77.748691, -76.806283, -75.863874, -74.921466, -73.979058,\n", " -73.036649, -72.094241, -71.151832, -70.209424, -69.267016, -68.324607,\n", @@ -635,14 +644,14 @@ " 68.324607, 69.267016, 70.209424, 71.151832, 72.094241, 73.036649,\n", " 73.979058, 74.921466, 75.863874, 76.806283, 77.748691, 78.691099,\n", " 79.633508, 80.575916, 81.518325, 82.460733, 83.403141, 84.34555 ,\n", - " 85.287958, 86.230366, 87.172775, 88.115183, 89.057592, 90. ])
array([ 3.643466, 7.59482 , 14.356632, 24.61222 , 38.2683 , 54.59548 ,\n", + " 85.287958, 86.230366, 87.172775, 88.115183, 89.057592, 90. ])
array([ 3.643466, 7.59482 , 14.356632, 24.61222 , 38.2683 , 54.59548 ,\n", " 72.012451, 87.82123 , 103.317127, 121.547241, 142.994039, 168.22508 ,\n", " 197.908087, 232.828619, 273.910817, 322.241902, 379.100904, 445.992574,\n", " 524.687175, 609.778695, 691.38943 , 763.404481, 820.858369, 859.534767,\n", - " 887.020249, 912.644547, 936.198398, 957.48548 , 976.325407, 992.556095])
array([ 0. , 1.25, 2.5 , ..., 356.25, 357.5 , 358.75])
array([cftime.DatetimeNoLeap(1920, 2, 1, 0, 0, 0, 0, has_year_zero=True),\n", + " 887.020249, 912.644547, 936.198398, 957.48548 , 976.325407, 992.556095])
array([ 0. , 1.25, 2.5 , ..., 356.25, 357.5 , 358.75])
array([cftime.DatetimeNoLeap(1920, 2, 1, 0, 0, 0, 0, has_year_zero=True),\n", " cftime.DatetimeNoLeap(1920, 3, 1, 0, 0, 0, 0, has_year_zero=True),\n", " cftime.DatetimeNoLeap(1920, 4, 1, 0, 0, 0, 0, has_year_zero=True)],\n", - " dtype=object)
\n",
" collection (collection) <U4 'orig' 'comp' array(['orig', 'comp'], dtype='<U4')
time PandasIndex PandasIndex(CFTimeIndex([1920-02-01 00:00:00, 1920-03-01 00:00:00, 1920-04-01 00:00:00],\n", + " dtype='object', length=3, calendar='noleap', freq='MS')) collection PandasIndex PandasIndex(Index(['orig', 'comp'], dtype='object', name='collection'))
| ||||||
99% real information cutoff bit | \n", - "1 | \n", - "1 | \n", - "1 | \n", + "18 | \n", + "25 | \n", + "29 | \n", "
---|---|---|---|---|---|---|
spatial autocorr - latitude | \n", @@ -1056,7 +1065,7 @@ "max value 315.584 315.57 315.576\n", "probability positive 1 1 1\n", "number of zeros 0 0 0\n", - "99% real information cutoff bit 1 1 1\n", + "99% real information cutoff bit 18 25 29\n", "spatial autocorr - latitude 0.993918 0.993911 0.993918\n", "spatial autocorr - longitude 0.996801 0.996791 0.996801\n", "entropy estimate 0.414723 0.247491 0.347534" @@ -1216,7 +1225,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 41, "metadata": {}, "outputs": [ { @@ -1320,9 +1329,9 @@ "||||||
99% real information cutoff bit | \n", - "1 | \n", - "1 | \n", - "1 | \n", + "18 | \n", + "25 | \n", + "29 | \n", "
spatial autocorr - latitude | \n", @@ -1356,7 +1365,7 @@ "max value 315.584 315.57 315.576\n", "probability positive 1 1 1\n", "number of zeros 0 0 0\n", - "99% real information cutoff bit 1 1 1\n", + "99% real information cutoff bit 18 25 29\n", "spatial autocorr - latitude 0.993918 0.993911 0.993918\n", "spatial autocorr - longitude 0.996801 0.996791 0.996801\n", "entropy estimate 0.414723 0.247491 0.347534" diff --git a/ldcpy/calcs.py b/ldcpy/calcs.py index e2c5254..9d37581 100644 --- a/ldcpy/calcs.py +++ b/ldcpy/calcs.py @@ -1048,34 +1048,41 @@ def dec_to_binary_3(self, num): print(b) return '{:032b}'.format(int32bits) - def get_adj_bit(self, bit_pos): - return [bit_pos[0] - 1, bit_pos[1]] + # def get_adj_bit(self, bit_pos): + # return [bit_pos[0] - 1, bit_pos[1]] - def get_dict_list(self, data_array, x_index): + def get_dict_list(self, data_array): dict_list_H = [] + # To do: should make this work for 64 also N_BITS = 32 + + # sz = data_array.size + # Intialize for i in range(N_BITS - 1): new_dict = {'00': 0, '01': 0, '10': 0, '11': 0} dict_list_H.append(new_dict) - b = np.empty(data_array[x_index].size, dtype=object) + # convert data array values to bits (so len(b) = sz) + b = np.empty(data_array.size, dtype=object) i = 0 - for y in np.nditer(np.asarray(data_array[x_index], dtype=np.float32).view(np.int32)): + for y in np.nditer(np.asarray(data_array, dtype=np.float32).view(np.int32)): b[i] = '{:032b}'.format(y) i += 1 + # go through each bit position (i) (except the last) for i in range(N_BITS - 1): count = 0 - for y in range(1, len(b) - 1): - if b[y][i] in ('0', '1') and b[y + 1][i] in ('0', '1'): + for y in range(1, len(b)): # go through each data point + + if b[y - 1][i] in ('0', '1') and b[y][i] in ('0', '1'): count += 1 - p00 = p01 = p10 = p11 = 0 - dict_list_H[i][b[y][i] + b[y + 1][i]] += 1 - dict_list_H[i]['00'] += p00 - dict_list_H[i]['01'] += p01 - dict_list_H[i]['10'] += p10 - dict_list_H[i]['11'] += p11 + dict_list_H[i][b[y - 1][i] + b[y][i]] += 1 + # dict_list_H[i]['00'] + # dict_list_H[i]['01'] + # dict_list_H[i]['10'] + # dict_list_H[i]['11'] + # turn to fractions (probabilities) if count == 0: dict_list_H[i]['00'] = 0 dict_list_H[i]['01'] = 0 @@ -1086,6 +1093,7 @@ def get_dict_list(self, data_array, x_index): dict_list_H[i]['01'] /= count dict_list_H[i]['10'] /= count dict_list_H[i]['11'] /= count + return dict_list_H def get_mutual_info(self, p00, p01, p10, p11): @@ -1107,15 +1115,17 @@ def get_mutual_info(self, p00, p01, p10, p11): return mutual_info - def get_real_info(self, x_index): + def get_real_info(self): # first, flatten the data array by stacking all the dimensions and removing the coordinates # like this: flattened_ds = self._ds.stack(flattened=['lat', 'lon', 'time']).reset_index('flattened') # but over any number of dimensions flattened_ds = self._ds.stack(flattened=self._ds.dims).reset_index('flattened') - dict_list_H = self.get_dict_list(flattened_ds, x_index) + dict_list_H = self.get_dict_list(flattened_ds) + # print(dict_list_H) mutual_info_array = [] + # here we go thru each bit position for bit_pos_dict in dict_list_H: p00 = bit_pos_dict['00'] p01 = bit_pos_dict['01'] @@ -1127,14 +1137,15 @@ def get_real_info(self, x_index): mutual_info_array.append(mutual_info) mutual_info_array = np.array(mutual_info_array) - # print(mutual_info_array) + # print("MIA =", mutual_info_array) + return xr.DataArray(mutual_info_array) @property def real_information(self) -> xr.DataArray: if not self._is_memoized('_real_information'): - # get the first binary digit - self._real_information = self.get_real_info(0) + + self._real_information = self.get_real_info() if hasattr(self._ds, 'units'): self._real_information.attrs['units'] = f'{self._ds.units}' @@ -1146,9 +1157,8 @@ def real_information_cutoff(self) -> xr.DataArray: # get the first binary digit self._real_information_cutoff = 0 self._captured_information = 0 - # if self.real_information.sum() > 0: - normalized_information = self.real_information / self.real_information.sum() # print(self.real_information) + normalized_information = self.real_information / self.real_information.sum() while self._captured_information < 0.99: self._captured_information += normalized_information[self._real_information_cutoff] self._real_information_cutoff += 1