From 02a2af8daa750d67000f1b239280b9dc7b869b12 Mon Sep 17 00:00:00 2001
From: mariavictoriadiaz <diaz.maria@correounivalle.edu.co>
Date: Tue, 21 Nov 2023 13:35:22 -0500
Subject: [PATCH 1/6] calc prec accum for making summary

---
 .../aclimate_run_resampling.py                |   4 +-
 src/aclimate_resampling/resampling.py         | 197 +++++++-----------
 2 files changed, 82 insertions(+), 119 deletions(-)

diff --git a/src/aclimate_resampling/aclimate_run_resampling.py b/src/aclimate_resampling/aclimate_run_resampling.py
index 311e9df..343cde2 100644
--- a/src/aclimate_resampling/aclimate_run_resampling.py
+++ b/src/aclimate_resampling/aclimate_run_resampling.py
@@ -23,6 +23,8 @@ def main():
     parser.add_argument("-m", "--prev-months", type=int, help="Previous months", required=True)
     parser.add_argument("-c", "--cores", type=int, help="Number of cores", required=True)
     parser.add_argument("-y", "--forecast-year", type=int, help="Forecast year", required=True)
+    parser.add_argument("-y", "--actual-month", type=int, help="Actual month", required=True)
+
 
     args = parser.parse_args()
 
@@ -38,7 +40,7 @@ def main():
     start_date = (datetime.date.today() - pd.DateOffset(months=months_previous)).replace(day=1)
     cores = args.cores
     
-    ar = Resampling(path, country, year_forecast = args.forecast_year)
+    ar = Resampling(path, country, year_forecast = args.forecast_year, current_month= args.actual_month)
     ar.resampling()
     dd = CompleteData(start_date,country,path,cores=cores)
     dd.run()
diff --git a/src/aclimate_resampling/resampling.py b/src/aclimate_resampling/resampling.py
index 4c9a7b7..c630c84 100644
--- a/src/aclimate_resampling/resampling.py
+++ b/src/aclimate_resampling/resampling.py
@@ -15,7 +15,7 @@
 
 class Resampling():
 
-  def __init__(self,path,country, year_forecast):
+  def __init__(self,path,country, year_forecast, current_month):
      self.path = path
      self.country = country
      self.path_inputs = os.path.join(self.path,self.country,"inputs")
@@ -27,6 +27,7 @@ def __init__(self,path,country, year_forecast):
      self.path_outputs_prob = os.path.join(self.path_outputs_pred,"probForecast")
 
      self.year_forecast = year_forecast
+     self.current_month = current_month
      self.npartitions = 10 #int(round(cores/3)) 
 
      pass
@@ -184,8 +185,43 @@ def preprocessing(self,prob_root,  ids):
 
     #Return probability DataFrame
     return prob, forecast_period
+  
 
+  def gen_muestras(self, new_data, prob_type):
+    
+         
+    subset = new_data.loc[new_data['condition'] == prob_type]
+    m = subset.sample(1)
 
+    if any(m['year'] == max(new_data['year'])):
+      m = subset[subset['year'] != max(new_data['year'])].sample(1)
+    else:
+       m = m
+    
+    return m['year']
+  
+
+  def process_escenario(self, data, season, year,index):
+
+      if season == 'Nov-Dec-Jan':
+              m1 = data[(data['month'].isin([11,12])) & (data['year']== year)]
+              m2 = pd.concat([m1, data[(data['month'] == 1) & (data['year'] == year+1)]])
+              m2['index'] = index
+      else:
+              if season == 'Dec-Jan-Feb':
+                m1 = data[(data['month'] == 12) & (data['year'] == year)]
+                m2 = pd.concat([m1,data[(data['month'].isin([1,2])) & (data['year'] == year+1)]])
+                m2['index'] = index
+                
+              else:
+                    if season == 'Dec-Jan':
+                        m1 = data[(data['month'] == 12) & (data['year'] == year)]
+                        m2 = pd.concat([m1,data[(data['month'] == 1) & (data['year'] == year + 1)]])
+                        m2.loc['index'] = index
+                    else:
+                        m2 = data[data['year'] == year]
+                        m2['index'] = index
+      return m2
 
   def forecast_station(self, station, prob, daily_data_root, output_root, year_forecast, forecast_period):
     
@@ -312,128 +348,32 @@ def forecast_station(self, station, prob, daily_data_root, output_root, year_for
         muestras = x[['Start', 'End', 'Type', 'Prob']].sample(100, replace = True, weights=x['Prob'])
         muestras = muestras.set_index(pd.Index(list(range(0,100))))
 
-      # Randomly get one year from the total precipitation data based on precipitation conditions selected in the 100 data sample.
         muestras_by_type = []
-        for i in muestras.index:
-          m = new_data.loc[new_data['condition'] == muestras['Type'].iloc[i]].sample(1)
-
-          if any(m['year'] == max(new_data['year'])):
-            b = new_data.loc[new_data['condition'] == muestras['Type'].iloc[i]]
-            m = b[b['year'] != max(new_data['year'])].sample(1)
-          else:
-            m = m
+        for i in range(len(muestras)):
+              m = self.gen_muestras(new_data, muestras.iloc[i]['Type'])
+              muestras_by_type.append(m)           
 
-          muestras_by_type.append(m)
 
         # Join the 100 samples and add sample id
         muestras_by_type = pd.concat(muestras_by_type).reset_index()
         muestras_by_type['index'] = muestras.index
-        #muestras_by_type = muestras_by_type.set_index(pd.Index(list(range(0,100))))
-
 
         # Rename year column with season name
-        muestras_by_type = muestras_by_type.rename(columns = {'year':season})
+        muestras_by_type = muestras_by_type.rename(columns={'year': season})
 
-        #Set the sample years as list and sort
+        # Set the sample years as a list and sort
         years = list(muestras_by_type[season])
-        years.sort()
-
-
-        if season == 'Nov-Dec-Jan':
-          # If season is November-December-January
-
-          # Calculate the next year of the year sample and assign the same sample id
-          muestras_by_type['plus'] = list(map(lambda x: x + 1, muestras_by_type[season]))
-
-
-          years_plus = list(map(lambda x: x + 1, years))
-          years_plus.sort()
-
-          # Filter the climate data of the last two months of the years in the sample and get the sample id
-          merge_a =  data[data['year'].isin(years)]
-          merge_a = merge_a[merge_a['month'].isin([11,12])]
-          merge_a = pd.merge(merge_a, muestras_by_type[['index', season]], left_on = 'year', right_on = season)
-          merge_a.drop(season, axis = 1,inplace = True)
-
-          # Filter the climate data of the first month in the next year of the years in sample and get the sample id
-          merge_b = data[data['year'].isin(years_plus)]
-          merge_b = merge_b[merge_b['month'] == 1]
-          merge_b = pd.merge(merge_b, muestras_by_type[['index', 'plus']], left_on = 'year', right_on = 'plus')
-          merge_b.drop('plus', axis = 1,inplace = True)
-
-          # Merge the climate data filtered
-          merge = pd.concat([merge_a, merge_b])
-
-
-        else:
-          if season == 'Dec-Jan-Feb':
-            # If season is December-January-February
-
-
-            # Calculate the next year of the year sample and assign the same sample id
-            muestras_by_type['plus'] = list(map(lambda x: x + 1, muestras_by_type[season]))
-
-            years_plus = list(map(lambda x: x + 1, years))
-            years_plus.sort()
-
-            # Filter the climate data of the last month of the years in the sample and get the sample id
-
-            merge_a = data[data['year'].isin(years)]
-            merge_a = merge_a[merge_a['month'] == 12]
-            merge_a = pd.merge(merge_a, muestras_by_type[['index', season]], left_on = 'year', right_on = season)
-            merge_a = merge_a.drop(columns = [season])
-
-            # Filter the climate data of the first two months in the next year of the years in sample and get the sample id
-
-            merge_b = data[data['year'].isin(years_plus)]
-            merge_b = merge_b[merge_b['month'].isin([1,2])]
-            merge_b = pd.merge(merge_b, muestras_by_type[['index', 'plus']], left_on = 'year', right_on = 'plus')
-            merge_b = merge_b.drop(columns = ['plus'])
-
-            # Merge filtered data
-            merge = pd.concat([merge_a, merge_b])
-
-
-          else:
-            if season == 'Dec-Jan':
-
-                    # Calculate the next year of the year sample and assign the same sample id
-                    muestras_by_type['plus'] = list(map(lambda x: x + 1, muestras_by_type[season]))
-
-                    years_plus = list(map(lambda x: x + 1, years))
-                    years_plus.sort()
-
-                    # Filter the climate data of the last month of the years in the sample and get the sample id
-
-                    merge_a = data[data['year'].isin(years)]
-                    merge_a = merge_a[merge_a['month'] == 12]
-                    merge_a = pd.merge(merge_a, muestras_by_type[['index', season]], left_on = 'year', right_on = season)
-                    merge_a = merge_a.drop(columns = [season])
-
-                    # Filter the climate data of the first two months in the next year of the years in sample and get the sample id
-
-                    merge_b = data[data['year'].isin(years_plus)]
-                    merge_b = merge_b[merge_b['month'] == 1]
-                    merge_b = pd.merge(merge_b, muestras_by_type[['index', 'plus']], left_on = 'year', right_on = 'plus')
-                    merge_b = merge_b.drop(columns = ['plus'])
-
-                    # Merge filtered data
-                    merge = pd.concat([merge_a, merge_b])
-
-            else:
-                    # If season is another, filter climate data of the years in sample and get the sample id
-
-                    merge = data.loc[data['year'].isin(years)]
-                    merge = merge.loc[(merge['month'] >= x['Start'].iloc[0]) & (merge['month'] <= x['End'].iloc[0])]
-                    merge = pd.merge(merge,muestras_by_type[['index',season]],left_on = 'year', right_on = season)
-                    merge = merge.drop(columns = [season])
 
+        p = pd.DataFrame()
+        for x in range(len(years)):
+            p1 = self.process_escenario(data=data, season=season, year=years[x], index=muestras_by_type.iloc[x]['index'])
+            p = pd.concat([p, p1], ignore_index=True)
 
         # Join seasons samples by column by sample id
         base_years = pd.concat([base_years, muestras_by_type[['index',season]]], axis = 1,ignore_index=True)
 
         # Join climate data filtered for the seasons
-        seasons_range = pd.concat([seasons_range, merge])
+        seasons_range = pd.concat([seasons_range, p])
 
 
       seasons_range = seasons_range.rename(columns = {'index': 'id'})
@@ -494,9 +434,9 @@ def forecast_station(self, station, prob, daily_data_root, output_root, year_for
             return base_years, seasons_range, problem
 
 
-  def add_year(self, year_forecast, m):
+  def add_year(self, year_forecast, observed_month, current_month):
   
-    if m < datetime.today().month:
+    if observed_month < current_month:
      a = year_forecast + 1
     else:
       a = year_forecast
@@ -505,7 +445,7 @@ def add_year(self, year_forecast, m):
 
 
 
-  def save_forecast(self, station, output_root, year_forecast, seasons_range, base_years):
+  def save_forecast(self, station, output_root, year_forecast, seasons_range, base_years, current_month):
 
 
     if isinstance(base_years, pd.DataFrame):
@@ -529,7 +469,7 @@ def save_forecast(self, station, output_root, year_forecast, seasons_range, base
           df = df.drop(columns = ['year'])
 
           for j in list(range(len(df))):          
-              df.loc[j, 'year'] = self.add_year(year_forecast = year_forecast, m = df.loc[j, 'month'])
+              df.loc[j, 'year'] = self.add_year(year_forecast = year_forecast, observed_month= df.loc[j, 'month'], current_month= current_month)
 
           df = df.drop(['index','id', 'season'], axis = 1)
           df['year'] = df['year'].astype('int')
@@ -556,12 +496,31 @@ def save_forecast(self, station, output_root, year_forecast, seasons_range, base
       vars = [item for item in vars if item != "month"]
       vars = [item for item in vars if item != "day"]
 
+      accum = df.groupby(['id', 'month'])['prec'].sum().reset_index().rename(columns = {'id': 'escenario_id'})#.sort_values(['id', 'month'], ascending = True).reset_index()#
+      prom = df.groupby(['id', 'month'])[vars].mean().rename(columns = {'id': 'escenario_id'})#.reset_index()#.sort_values(['id', 'month'], ascending = True).reset_index()#.rename(columns = {vars[i]: 'max'})
+
+      summary = pd.merge(accum, prom, on=["escenario_id", "month"])
+
+      summary_min = summary.groupby(['month']).min().reset_index().drop(['escenario_id'], axis = 1)#.sort_values(['id', 'month'], ascending = True).reset_index()#.rename(columns = {vars[i]: 'max'})
+      summary_min = self.add_year(summary_min, year_forecast, current_month=current_month)
+
+      summary_max = summary.groupby(['month']).max().reset_index().drop(['escenario_id'], axis = 1)
+      summary_max = self.add_year(summary_max, year_forecast, current_month=current_month)
+
+      
+      summary_avg = summary.groupby(['month']).mean().reset_index().drop(['escenario_id'], axis = 1)
+      summary_avg = self.add_year(summary_avg, year_forecast, current_month=current_month)
+
+      vars = [item for item in vars if item != "id"]
+      vars.append('prec')
+
       for i in range(len(vars)):
-         print(df.groupby(['year', 'month'])[vars[i]].mean().reset_index().rename(columns = {vars[i]: 'avg'}).sort_values(['year', 'month'], ascending = True))
+         
+
+         summary_min[['year','month',vars[i]]].sort_values(['year', 'month'], ascending = True).to_csv(os.path.join(output_summary, f"{station}_{vars[i]}_min.csv"), index=False)
+         summary_max[['year','month',vars[i]]].sort_values(['year', 'month'], ascending = True).to_csv(os.path.join(output_summary, f"{station}_{vars[i]}_max.csv"), index=False)
+         summary_avg[['year','month',vars[i]]].sort_values(['year', 'month'], ascending = True).to_csv(os.path.join(output_summary, f"{station}_{vars[i]}_avg.csv"), index=False)
 
-         df.groupby(['year', 'month'])[vars[i]].max().reset_index().rename(columns = {vars[i]: 'max'}).sort_values(['year', 'month'], ascending = True).to_csv(os.path.join(output_summary, f"{station}_{vars[i]}_max.csv"), index=False)
-         df.groupby(['year', 'month'])[vars[i]].min().reset_index().rename(columns = {vars[i]: 'min'}).sort_values(['year', 'month'], ascending = True).to_csv(os.path.join(output_summary, f"{station}_{vars[i]}_min.csv"), index=False)
-         df.groupby(['year', 'month'])[vars[i]].mean().reset_index().rename(columns = {vars[i]: 'avg'}).sort_values(['year', 'month'], ascending = True).to_csv(os.path.join(output_summary, f"{station}_{vars[i]}_avg.csv"), index=False)
 
 
       print("Minimum, Maximum and Average of variables by escenary is saved in {}".format(output_summary))
@@ -574,7 +533,7 @@ def save_forecast(self, station, output_root, year_forecast, seasons_range, base
     
     
 
-  def master_processing(self,station, input_root, climate_data_root, verifica ,output_root,  year_forecast):
+  def master_processing(self,station, input_root, climate_data_root, verifica ,output_root,  year_forecast, current_month):
 
 
     if not os.path.exists(output_root):
@@ -599,7 +558,8 @@ def master_processing(self,station, input_root, climate_data_root, verifica ,out
                   output_root = output_root,
                   year_forecast = year_forecast,
                   base_years = resampling_forecast[0],
-                  seasons_range = resampling_forecast[1])
+                  seasons_range = resampling_forecast[1],
+                  current_month= current_month)
 
     if len(resampling_forecast) == 3:
         oth =os.path.join(output_root, "issues.csv")
@@ -638,7 +598,8 @@ def resampling(self):
                                                climate_data_root = self.path_inputs_daily,
                                                output_root = self.path_outputs_res,
                                                verifica = verifica,
-                                               year_forecast = self.year_forecast)
+                                               year_forecast = self.year_forecast,
+                                               current_month= self.current_month)
                                                   ), meta=_col
                                                   ).compute(scheduler='processes')
     return sample

From 971bcc19c2a90dc71b4e2cafe7905f500d37372f Mon Sep 17 00:00:00 2001
From: mariavictoriadiaz <diaz.maria@correounivalle.edu.co>
Date: Tue, 21 Nov 2023 16:13:22 -0500
Subject: [PATCH 2/6] fix months issue

---
 src/aclimate_resampling/aclimate_run_resampling.py |  2 +-
 src/aclimate_resampling/resampling.py              | 10 ++++++----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/aclimate_resampling/aclimate_run_resampling.py b/src/aclimate_resampling/aclimate_run_resampling.py
index 343cde2..34bea42 100644
--- a/src/aclimate_resampling/aclimate_run_resampling.py
+++ b/src/aclimate_resampling/aclimate_run_resampling.py
@@ -23,7 +23,7 @@ def main():
     parser.add_argument("-m", "--prev-months", type=int, help="Previous months", required=True)
     parser.add_argument("-c", "--cores", type=int, help="Number of cores", required=True)
     parser.add_argument("-y", "--forecast-year", type=int, help="Forecast year", required=True)
-    parser.add_argument("-y", "--actual-month", type=int, help="Actual month", required=True)
+    parser.add_argument("-a", "--actual-month", type=int, help="Actual month", required=True)
 
 
     args = parser.parse_args()
diff --git a/src/aclimate_resampling/resampling.py b/src/aclimate_resampling/resampling.py
index c630c84..935e273 100644
--- a/src/aclimate_resampling/resampling.py
+++ b/src/aclimate_resampling/resampling.py
@@ -201,8 +201,8 @@ def gen_muestras(self, new_data, prob_type):
     return m['year']
   
 
-  def process_escenario(self, data, season, year,index):
-
+  def process_escenario(self,data, season, month_start, month_end, year,index):
+          #data = s
       if season == 'Nov-Dec-Jan':
               m1 = data[(data['month'].isin([11,12])) & (data['year']== year)]
               m2 = pd.concat([m1, data[(data['month'] == 1) & (data['year'] == year+1)]])
@@ -219,8 +219,10 @@ def process_escenario(self, data, season, year,index):
                         m2 = pd.concat([m1,data[(data['month'] == 1) & (data['year'] == year + 1)]])
                         m2.loc['index'] = index
                     else:
-                        m2 = data[data['year'] == year]
+                        m2 = data[(data['year'] == year)]
+                        m2 = m2[(m2['month']  >= month_start) & (m2['month'] <= month_end)]
                         m2['index'] = index
+
       return m2
 
   def forecast_station(self, station, prob, daily_data_root, output_root, year_forecast, forecast_period):
@@ -366,7 +368,7 @@ def forecast_station(self, station, prob, daily_data_root, output_root, year_for
 
         p = pd.DataFrame()
         for x in range(len(years)):
-            p1 = self.process_escenario(data=data, season=season, year=years[x], index=muestras_by_type.iloc[x]['index'])
+            p1 = self.process_escenario(data=data, season=season, month_start= x['Start'].iloc[0], month_end = x['End'].iloc[0],year=years[j], index=muestras_by_type.iloc[j]['index'])
             p = pd.concat([p, p1], ignore_index=True)
 
         # Join seasons samples by column by sample id

From a4d02e572da47c3d77a876486d475724f9965c9e Mon Sep 17 00:00:00 2001
From: mariavictoriadiaz <diaz.maria@correounivalle.edu.co>
Date: Tue, 21 Nov 2023 16:20:14 -0500
Subject: [PATCH 3/6] fix months issue

---
 src/aclimate_resampling/resampling.py | 126 +++++++++++++-------------
 1 file changed, 65 insertions(+), 61 deletions(-)

diff --git a/src/aclimate_resampling/resampling.py b/src/aclimate_resampling/resampling.py
index 935e273..ec35b24 100644
--- a/src/aclimate_resampling/resampling.py
+++ b/src/aclimate_resampling/resampling.py
@@ -321,38 +321,42 @@ def forecast_station(self, station, prob, daily_data_root, output_root, year_for
 
       # Start the resampling process for every season of analysis in CPT probabilities file
 
-      base_years =  pd.DataFrame() # List to store years of sample for each season
-      seasons_range = pd.DataFrame() # List to store climate data in the years of sample for each season
+      # Create empty DataFrames to store the results
+      base_years = pd.DataFrame()
+      seasons_range = pd.DataFrame()
 
-      for season in  list(np.unique(cpt_prob['season'])):
+    # Iterate over seasons
+      for season in season:
+        print(season)
 
         # Select the probabilities for the season
         x = cpt_prob[cpt_prob['season'] == season]
 
-
         predictand = cpt_prob['predictand'].iloc[0]
 
-
-      # Compute total precipitation for each year in the climate data range selected
-        new_data = data[['year',predictand]].groupby(['year']).sum().reset_index()
-
+        # Compute total precipitation for each year in the climate data range selected
+        new_data = data[['year', predictand]].groupby(['year']).sum().reset_index()
         data['season'] = season
 
+        # Calculate quantiles to determine precipitation conditions for every year in climate data selected
+        cuantiles = list(np.quantile(new_data['prec'], [0.33, 0.66]))
+        new_data['condition'] = 'NA'
+        new_data.loc[new_data[predictand] <= cuantiles[0], 'condition'] = 'below'
+        new_data.loc[new_data[predictand] >= cuantiles[1], 'condition'] = 'above'
+        new_data.loc[
+            (new_data[predictand] > cuantiles[0]) & (new_data[predictand] < cuantiles[1]),
+            'condition'
+        ] = 'normal'
 
-      # Calculate quantiles to determine precipitation conditions for every year in climate data selected
-        cuantiles = list(np.quantile(new_data['prec'], [.33,.66]))
-        new_data['condition'] =  'NA'
-        new_data.loc[new_data[predictand]<= cuantiles[0], 'condition'] = 'below'
-        new_data.loc[new_data[predictand]>= cuantiles[1], 'condition'] =  'above'
-        new_data.loc[(new_data[predictand]> cuantiles[0]) & (new_data[predictand]< cuantiles[1]), 'condition'] =  'normal'
+        # Sample 100 records in probability file of season based on probability from CPT as weights
+        muestras = x[['Start', 'End', 'Type', 'Prob']].sample(100, replace=True, weights=x['Prob'])
+        muestras = muestras.set_index(pd.Index(list(range(0, 100))))
 
-      # Sample 100 records in probability file of season based on probability from CPT as weights
-        muestras = x[['Start', 'End', 'Type', 'Prob']].sample(100, replace = True, weights=x['Prob'])
-        muestras = muestras.set_index(pd.Index(list(range(0,100))))
+       
 
         muestras_by_type = []
         for i in range(len(muestras)):
-              m = self.gen_muestras(new_data, muestras.iloc[i]['Type'])
+              m = gen_muestras(new_data, muestras.iloc[i]['Type'])
               muestras_by_type.append(m)           
 
 
@@ -367,73 +371,73 @@ def forecast_station(self, station, prob, daily_data_root, output_root, year_for
         years = list(muestras_by_type[season])
 
         p = pd.DataFrame()
-        for x in range(len(years)):
-            p1 = self.process_escenario(data=data, season=season, month_start= x['Start'].iloc[0], month_end = x['End'].iloc[0],year=years[j], index=muestras_by_type.iloc[j]['index'])
+        for j in range(len(years)):
+            p1 = process_escenario(data=data, season=season, month_start= x['Start'].iloc[0], month_end = x['End'].iloc[0],year=years[j], index=muestras_by_type.iloc[j]['index'])
             p = pd.concat([p, p1], ignore_index=True)
-
         # Join seasons samples by column by sample id
-        base_years = pd.concat([base_years, muestras_by_type[['index',season]]], axis = 1,ignore_index=True)
+        base_years = pd.concat([base_years, muestras_by_type[['index', season]]], axis=1, ignore_index=True)
 
         # Join climate data filtered for the seasons
-        seasons_range = pd.concat([seasons_range, p])
-
+        seasons_range = pd.concat([seasons_range, p], ignore_index=True)
+          
 
       seasons_range = seasons_range.rename(columns = {'index': 'id'})
 
       if (forecast_period == 'tri') and (len(list(np.unique(cpt_prob['season']))) == 2):
 
-            s = list(np.unique(cpt_prob['season']))
-            base_years = base_years.iloc[:,[0,1,3] ]
-            base_years = base_years.rename(columns={0: 'id',1: s[0], 3: s[1]})
-            base_years['id'] = base_years['id'] + 1
-            seasons_range['id'] = seasons_range['id']+1
-            seasons_range = seasons_range.sort_values(by=['year', 'month'], ascending=True)
-            base_years.to_csv(os.path.join(val_root,  f"{station}_Escenario_A.csv"), index = False)
+              s = list(np.unique(cpt_prob['season']))
+              base_years = base_years.iloc[:,[0,1,3] ]
+              base_years = base_years.rename(columns={0: 'id',1: s[0], 3: s[1]})
+              base_years['id'] = base_years['id'] + 1
+              seasons_range['id'] = seasons_range['id']+1
+              seasons_range = seasons_range.sort_values(by=['year', 'month'], ascending=True)
+              base_years.to_csv(os.path.join(val_root,  f"{station}_Escenario_A.csv"), index = False)
 
 
-            #Return climate data filtered with sample id
-            return base_years, seasons_range
+              #Return climate data filtered with sample id
+              return base_years, seasons_range
 
       else:
-          if (forecast_period == 'bi') and (len(list(np.unique(cpt_prob['season']))) == 3) :
+            if (forecast_period == 'bi') and (len(list(np.unique(cpt_prob['season']))) == 3) :
 
-            s = list(np.unique(cpt_prob['season']))
-            base_years = base_years.iloc[:,[0,1,3,5] ]
-            base_years = base_years.rename(columns={0: 'id',1: s[0], 3: s[1], 5: s[2]})
-            base_years['id'] = base_years['id'] + 1
-            seasons_range['id'] = seasons_range['id']+1
-            seasons_range = seasons_range.sort_values(by=['year', 'month'], ascending=True)
-            base_years.to_csv(os.path.join(val_root,  f"{station}_Escenario_A.csv"), index = False)
+              s = list(np.unique(cpt_prob['season']))
+              base_years = base_years.iloc[:,[0,1,3,5] ]
+              base_years = base_years.rename(columns={0: 'id',1: s[0], 3: s[1], 5: s[2]})
+              base_years['id'] = base_years['id'] + 1
+              seasons_range['id'] = seasons_range['id']+1
+              seasons_range = seasons_range.sort_values(by=['year', 'month'], ascending=True)
+              base_years.to_csv(os.path.join(val_root,  f"{station}_Escenario_A.csv"), index = False)
 
 
-            #Return climate data filtered with sample id
-            return base_years, seasons_range
+              #Return climate data filtered with sample id
+              return base_years, seasons_range
 
-          else:
+            else:
 
-            print('Station does not have all the seasons availables')
+              print('Station does not have all the seasons availables')
 
-            s = list(np.unique(cpt_prob['season']))
-            if len(base_years.columns) == 2:
-              base_years = base_years.iloc[:,[0,1] ]
-              base_years = base_years.rename(columns={0: 'id',1: s[0]})
-            else:
-              if len(base_years.columns == 4):
-                base_years = base_years.rename(columns={0: 'id',1: s[0], 3: s[1]})
-              else:
+              s = list(np.unique(cpt_prob['season']))
+              if len(base_years.columns) == 2:
+                base_years = base_years.iloc[:,[0,1] ]
                 base_years = base_years.rename(columns={0: 'id',1: s[0]})
+              else:
+                if len(base_years.columns == 4):
+                  base_years = base_years.rename(columns={0: 'id',1: s[0], 3: s[1]})
+                else:
+                  base_years = base_years.rename(columns={0: 'id',1: s[0]})
 
-            base_years['id'] = base_years['id'] + 1
-            seasons_range['id'] = seasons_range['id']+1
+              base_years['id'] = base_years['id'] + 1
+              seasons_range['id'] = seasons_range['id']+1
 
 
-            p = {'id': [station],'issue': ['Station does not have all the seasons availables'], 'Seasons available': ", ".join([str(item) for item in s])}
-            problem = pd.DataFrame(p)
-            print(problem)
-            base_years.to_csv(os.path.join(val_root, f"{station}_Escenario_A.csv"), index = False)
+              p = {'id': [station],'issue': ['Station does not have all the seasons availables'], 'Seasons available': ", ".join([str(item) for item in s])}
+              problem = pd.DataFrame(p)
+              print(problem)
+              base_years.to_csv(os.path.join(val_root, f"{station}_Escenario_A.csv"), index = False)
 
-            #Return climate data filtered with sample id
-            return base_years, seasons_range, problem
+              #Return climate data filtered with sample id
+              return base_years, seasons_range, problem
+            
 
 
   def add_year(self, year_forecast, observed_month, current_month):

From 68e568493abb9c6303e1f8390d77ee15d55435f0 Mon Sep 17 00:00:00 2001
From: mariavictoriadiaz <diaz.maria@correounivalle.edu.co>
Date: Tue, 21 Nov 2023 16:26:15 -0500
Subject: [PATCH 4/6] fix

---
 src/aclimate_resampling/resampling.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/aclimate_resampling/resampling.py b/src/aclimate_resampling/resampling.py
index ec35b24..84b90ab 100644
--- a/src/aclimate_resampling/resampling.py
+++ b/src/aclimate_resampling/resampling.py
@@ -356,7 +356,7 @@ def forecast_station(self, station, prob, daily_data_root, output_root, year_for
 
         muestras_by_type = []
         for i in range(len(muestras)):
-              m = gen_muestras(new_data, muestras.iloc[i]['Type'])
+              m = self.gen_muestras(new_data, muestras.iloc[i]['Type'])
               muestras_by_type.append(m)           
 
 

From 899bcd4566e521e585d26c6f24a039fa30953ee8 Mon Sep 17 00:00:00 2001
From: mariavictoriadiaz <diaz.maria@correounivalle.edu.co>
Date: Wed, 22 Nov 2023 08:37:56 -0500
Subject: [PATCH 5/6] fix

---
 src/aclimate_resampling/resampling.py | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/src/aclimate_resampling/resampling.py b/src/aclimate_resampling/resampling.py
index 84b90ab..c6028a1 100644
--- a/src/aclimate_resampling/resampling.py
+++ b/src/aclimate_resampling/resampling.py
@@ -372,7 +372,7 @@ def forecast_station(self, station, prob, daily_data_root, output_root, year_for
 
         p = pd.DataFrame()
         for j in range(len(years)):
-            p1 = process_escenario(data=data, season=season, month_start= x['Start'].iloc[0], month_end = x['End'].iloc[0],year=years[j], index=muestras_by_type.iloc[j]['index'])
+            p1 = self.process_escenario(data=data, season=season, month_start= x['Start'].iloc[0], month_end = x['End'].iloc[0],year=years[j], index=muestras_by_type.iloc[j]['index'])
             p = pd.concat([p, p1], ignore_index=True)
         # Join seasons samples by column by sample id
         base_years = pd.concat([base_years, muestras_by_type[['index', season]]], axis=1, ignore_index=True)
@@ -451,7 +451,7 @@ def add_year(self, year_forecast, observed_month, current_month):
 
 
 
-  def save_forecast(self, station, output_root, year_forecast, seasons_range, base_years, current_month):
+  def save_forecast(self, station, output_root, year_forecast, current_month, seasons_range, base_years):
 
 
     if isinstance(base_years, pd.DataFrame):
@@ -470,18 +470,19 @@ def save_forecast(self, station, output_root, year_forecast, seasons_range, base
       year_forecast = int(year_forecast)
       
       for i in range(len(IDs)):
+
           df = seasons_range[(seasons_range['id'] == IDs[i])]
+
           df = df.reset_index()
           df = df.drop(columns = ['year'])
 
-          for j in list(range(len(df))):          
-              df.loc[j, 'year'] = self.add_year(year_forecast = year_forecast, observed_month= df.loc[j, 'month'], current_month= current_month)
-
-          df = df.drop(['index','id', 'season'], axis = 1)
-          df['year'] = df['year'].astype('int')
+          df = self.add_year(df = df, year_forecast = year_forecast, current_month=current_month)
 
+          df = df.drop(['index', 'season'], axis = 1)
+          df1 = df.copy()
+          
           escenarios.append(df)
-          df.to_csv(os.path.join(output_estacion ,f"{station}_escenario_{str(i+1)}.csv"), index=False)
+          df1.drop(['id'], axis = 1).to_csv(os.path.join(output_estacion ,f"{station}_escenario_{str(i+1)}.csv"), index=False)
 
       print("Escenaries saved in {}".format(output_estacion))
 
@@ -501,6 +502,8 @@ def save_forecast(self, station, output_root, year_forecast, seasons_range, base
       vars = [item for item in vars if item != "year"]
       vars = [item for item in vars if item != "month"]
       vars = [item for item in vars if item != "day"]
+      vars = [item for item in vars if item != "prec"]
+
 
       accum = df.groupby(['id', 'month'])['prec'].sum().reset_index().rename(columns = {'id': 'escenario_id'})#.sort_values(['id', 'month'], ascending = True).reset_index()#
       prom = df.groupby(['id', 'month'])[vars].mean().rename(columns = {'id': 'escenario_id'})#.reset_index()#.sort_values(['id', 'month'], ascending = True).reset_index()#.rename(columns = {vars[i]: 'max'})

From a28d033a064b2eb68c70b5b72a267b2f2b746cdb Mon Sep 17 00:00:00 2001
From: mariavictoriadiaz <diaz.maria@correounivalle.edu.co>
Date: Wed, 22 Nov 2023 09:12:10 -0500
Subject: [PATCH 6/6] fix

---
 src/aclimate_resampling/resampling.py | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/src/aclimate_resampling/resampling.py b/src/aclimate_resampling/resampling.py
index c6028a1..7ec385e 100644
--- a/src/aclimate_resampling/resampling.py
+++ b/src/aclimate_resampling/resampling.py
@@ -440,14 +440,19 @@ def forecast_station(self, station, prob, daily_data_root, output_root, year_for
             
 
 
-  def add_year(self, year_forecast, observed_month, current_month):
+  def add_year(self,df, year_forecast, current_month):
   
-    if observed_month < current_month:
-     a = year_forecast + 1
-    else:
-      a = year_forecast
+    month = df['month']
+    for j in list(range(len(df))):          
+
+      if month[j] < current_month:
+        df.loc[j, 'year'] = year_forecast + 1
+      else:
+        df.loc[j, 'year'] = year_forecast
+
+    df['year'] = df['year'].astype('int')
 
-    return a
+    return df
 
 
 
@@ -476,7 +481,7 @@ def save_forecast(self, station, output_root, year_forecast, current_month, seas
           df = df.reset_index()
           df = df.drop(columns = ['year'])
 
-          df = self.add_year(df = df, year_forecast = year_forecast, current_month=current_month)
+          df = self.add_year(df, year_forecast, current_month)
 
           df = df.drop(['index', 'season'], axis = 1)
           df1 = df.copy()
@@ -511,14 +516,14 @@ def save_forecast(self, station, output_root, year_forecast, current_month, seas
       summary = pd.merge(accum, prom, on=["escenario_id", "month"])
 
       summary_min = summary.groupby(['month']).min().reset_index().drop(['escenario_id'], axis = 1)#.sort_values(['id', 'month'], ascending = True).reset_index()#.rename(columns = {vars[i]: 'max'})
-      summary_min = self.add_year(summary_min, year_forecast, current_month=current_month)
+      summary_min = self.add_year(summary_min, year_forecast, current_month)
 
       summary_max = summary.groupby(['month']).max().reset_index().drop(['escenario_id'], axis = 1)
-      summary_max = self.add_year(summary_max, year_forecast, current_month=current_month)
+      summary_max = self.add_year(summary_max, year_forecast, current_month)
 
       
       summary_avg = summary.groupby(['month']).mean().reset_index().drop(['escenario_id'], axis = 1)
-      summary_avg = self.add_year(summary_avg, year_forecast, current_month=current_month)
+      summary_avg = self.add_year(summary_avg, year_forecast,current_month)
 
       vars = [item for item in vars if item != "id"]
       vars.append('prec')