UDST · mxndrwgrdnr · Feb 9, 2018 · Feb 9, 2018 · Feb 9, 2018 · Mar 22, 2019
diff --git a/.gitignore b/.gitignore
@@ -37,3 +37,4 @@ data/2015_12_21_zoning_parcels.csv
 data/simple_parcels.*
 data/02_01_2016_parcels_geography.csv
 data/2015_12_21_zoning_parcels.csv
+data/
diff --git a/baus.py b/baus.py
@@ -31,8 +31,10 @@
 COMPARE_TO_NO_PROJECT = True
 NO_PROJECT = 611
 EARTHQUAKE = False
+DATA_OUT = './output/model_data_output.h5'
+OUT_TABLES = ['jobs', 'households', 'buildings', 'parcels']
 
-IN_YEAR, OUT_YEAR = 2010, 2050
+IN_YEAR, OUT_YEAR = 2010, 2025
 COMPARE_AGAINST_LAST_KNOWN_GOOD = False
 
 LAST_KNOWN_GOOD_RUNS = {
@@ -97,11 +99,6 @@
 
 SCENARIO = orca.get_injectable("scenario")
 
-if INTERACT:
-    import code
-    code.interact(local=locals())
-    sys.exit()
-
 run_num = orca.get_injectable("run_number")
 
 if LOGS:
@@ -198,16 +195,16 @@ def get_simulation_models(SCENARIO):
 
         # save_intermediate_tables", # saves output for visualization
 
-        "topsheet",
+        # "topsheet",
         "simulation_validation",
-        "parcel_summary",
-        "building_summary",
+        # "parcel_summary",
+        # "building_summary",
         "diagnostic_output",
-        "geographic_summary",
-        "travel_model_output",
+        # "geographic_summary",
+        # "travel_model_output",
         # "travel_model_2_output",
-        "hazards_slr_summary",
-        "hazards_eq_summary"
+        # "hazards_slr_summary",
+        # "hazards_eq_summary"
 
     ]
 
@@ -239,7 +236,7 @@ def run_models(MODE, SCENARIO):
             "preproc_jobs",
             "preproc_households",
             "preproc_buildings",
-            "initialize_residential_units"
+            # "initialize_residential_units"     # ual already has a static units table
         ])
 
     elif MODE == "fetch_data":
@@ -289,25 +286,34 @@ def run_models(MODE, SCENARIO):
 
                 "price_vars",
 
-                "topsheet",
+                # "topsheet",
                 "simulation_validation",
-                "parcel_summary",
-                "building_summary",
-                "geographic_summary",
-                "travel_model_output",
+                # "parcel_summary",
+                # "building_summary",
+                # "geographic_summary",
+                # "travel_model_output",
                 # "travel_model_2_output",
-                "hazards_slr_summary",
-                "hazards_eq_summary",
+                # "hazards_slr_summary",
+                # "hazards_eq_summary",
                 "diagnostic_output"
-
-            ], iter_vars=[IN_YEAR])
+            ],
+                iter_vars=[IN_YEAR],
+                data_out=DATA_OUT,
+                out_base_tables=[],
+                out_run_tables=OUT_TABLES
+                )
 
         # start the simulation in the next round - only the models above run
         # for the IN_YEAR
         years_to_run = range(IN_YEAR+EVERY_NTH_YEAR, OUT_YEAR+1,
                              EVERY_NTH_YEAR)
         models = get_simulation_models(SCENARIO)
-        orca.run(models, iter_vars=years_to_run)
+        orca.run(
+            models, iter_vars=years_to_run,
+            data_out='./output/model_data_output.h5',
+            out_base_tables=[],
+            out_run_tables=['jobs', 'buildings', 'households', 'parcels']
+            )
 
     elif MODE == "estimation":
 
@@ -364,6 +370,11 @@ def run_models(MODE, SCENARIO):
         raise "Invalid mode"
 
 
+if INTERACT:
+    import code
+    code.interact(local=locals())
+    sys.exit()
+
 print "Started", time.ctime()
 print "Current Branch : ", BRANCH.rstrip()
 print "Current Commit : ", CURRENT_COMMIT.rstrip()

diff --git a/baus/datasources.py b/baus/datasources.py
@@ -374,7 +374,9 @@ def add_drop_helper(col, val):
 
 @orca.table(cache=True)
 def parcels(store):
-    return store['parcels']
+    df = store['parcels']
+    df.index.name = 'parcel_id'
+    return df
 
 
 @orca.table(cache=True)
@@ -387,6 +389,48 @@ def taz(zones):
     return zones
 
 
+@orca.table()
+def skims(store):
+    df = store['beam_skims']
+    df = df[(df['period'] == 'AM') & (df['mode'] == 'CAR')]
+    assert len(df) == 2114116
+    df = df.rename(
+        columns={'origTaz': 'from_zone_id', 'destTaz': 'to_zone_id'})
+    df = df.set_index(['from_zone_id', 'to_zone_id'])
+    return df
+
+
+def register_aggregation_table(table_name, table_id):
+    """
+    Generator function for tables representing aggregate geography.
+    """
+
+    @orca.table(table_name, cache=True)
+    def func(parcels):
+        geog_ids = parcels[table_id].value_counts().index.values
+        df = pd.DataFrame(index=geog_ids)
+        df.index.name = table_id
+        return df
+
+    return func
+
+
+aggregate_geos = {
+    # 'zonings': 'zoning_id',
+    # 'locations': 'location_id',
+    # 'block_groups': 'block_group_id',
+    'blocks': 'block_id',
+    'zones': 'zone_id',
+    # 'plans': 'plan_id',
+    # 'zone_districts': 'zone_district_id',
+    # 'zone_subdistricts': 'zone_subdistrict_id'
+}
+orca.add_injectable('aggregate_geos', aggregate_geos)
+
+for geog in aggregate_geos.items():
+    register_aggregation_table(geog[0], geog[1])
+
+
 @orca.table(cache=True)
 def parcel_rejections():
     url = "https://forecast-feedback.firebaseio.com/parcelResults.json"
@@ -640,7 +684,12 @@ def buildings(store):
 
 @orca.table(cache=True)
 def residential_units(store):
-    return print_error_if_not_available(store, 'residential_units_preproc')
+    # return print_error_if_not_available(store, 'residential_units_preproc')
+
+    df = store['units']
+    if df.dtypes['tenure'] == np.int64:
+        df['tenure'] = df.tenure.map({1: 'own', 2: 'rent'})
+    return df
 
 
 @orca.table(cache=True)
@@ -834,3 +883,10 @@ def tracts_earthquake():
 orca.broadcast('tmnodes', 'buildings', cast_index=True, onto_on='tmnode_id')
 orca.broadcast('taz_geography', 'parcels', cast_index=True,
                onto_on='zone_id')
+orca.broadcast('parcels', 'homesales', cast_index=True, onto_on='parcel_id')
+orca.broadcast('nodes', 'homesales', cast_index=True, onto_on='node_id')
+orca.broadcast('tmnodes', 'homesales', cast_index=True, onto_on='tmnode_id')
+orca.broadcast('nodes', 'costar', cast_index=True, onto_on='node_id')
+orca.broadcast('tmnodes', 'costar', cast_index=True, onto_on='tmnode_id')
+orca.broadcast('logsums', 'homesales', cast_index=True, onto_on='zone_id')
+orca.broadcast('logsums', 'costar', cast_index=True, onto_on='zone_id')
diff --git a/baus/preprocessing.py b/baus/preprocessing.py
@@ -90,9 +90,18 @@ def move_jobs_from_portola_to_san_mateo_county(parcels, buildings, jobs_df):
 @orca.step()
 def preproc_jobs(store, baseyear_taz_controls, settings, parcels):
     buildings = store['buildings']
+    jobs = store['jobs']
 
-    jobs = allocate_jobs(baseyear_taz_controls, settings, buildings, parcels)
-    jobs = move_jobs_from_portola_to_san_mateo_county(parcels, buildings, jobs)
+    # jobs table already created if using UAL data so no need to re-allocate
+    # jobs = allocate_jobs(baseyear_taz_controls, settings, buildings, parcels)
+
+    # have to run this step from jobs allocations to get right columns
+    sector_map = settings["naics_to_empsix"]
+    jobs['empsix'] = jobs['sector_id'].replace(sector_map)
+
+    # this one i commented out only because it breaks using the UAL jobs table
+    # jobs = move_jobs_from_portola_to_san_mateo_county(
+    #     parcels, buildings, jobs)
     store['jobs_preproc'] = jobs
 
 
@@ -101,7 +110,12 @@ def preproc_households(store):
 
     df = store['households']
 
-    df['tenure'] = df.hownrent.map({1: 'own', 2: 'rent'})
+    df['tenure'] = df.tenure.map({1: 'own', 2: 'rent'})
+
+    # have to drop this county column bc there's a merge
+    # in proportional_elcm() that messes up if theres two
+    # county columns
+    df = df.drop(['county'], axis=1)
 
     # need to keep track of base year income quartiles for use in the
     # transition model - even caching doesn't work because when you add
@@ -112,18 +126,20 @@ def preproc_households(store):
     df["base_income_octile"] = pd.Series(pd.qcut(df.income, 8, labels=False),
                                          index=df.index).add(1)
 
+    # had to comment this stuff out bc it breaks
+
     # there are some overrides where we move households around in order
     # to match the city totals - in the future we will resynthesize and this
     # can go away - this csv is generated by scripts/match_city_totals.py
-    overrides = pd.read_csv("data/household_building_id_overrides.csv",
-                            index_col="household_id").building_id
-    df.loc[overrides.index, "building_id"] = overrides.values
+    # overrides = pd.read_csv("data/household_building_id_overrides.csv",
+    #                         index_col="household_id").building_id
+    # df.loc[overrides.index, "building_id"] = overrides.values
 
-    # turns out we need 4 more households
-    new_households = df.loc[[1132542, 1306618, 950630, 886585]].reset_index()
-    # keep unique index
-    new_households.index += pd.Series(df.index).max() + 1
-    df = df.append(new_households)
+    # # turns out we need 4 more households
+    # new_households = df.loc[[1132542, 1306618, 950630, 886585]].reset_index()
+    # # keep unique index
+    # new_households.index += pd.Series(df.index).max() + 1
+    # df = df.append(new_households)
 
     store['households_preproc'] = df
 
@@ -281,7 +297,7 @@ def preproc_buildings(store, parcels, manual_edits):
                   'sqft_per_unit', 'nonres_rent_per_sqft',
                   'res_price_per_sqft',
                   'redfin_home_type', 'costar_property_type',
-                  'costar_rent'], axis=1)
+                  'costar_rent', 'res_sqft_per_unit'], axis=1)
 
     # apply manual edits
     edits = manual_edits.local

diff --git a/baus/ual.py b/baus/ual.py
@@ -646,7 +646,8 @@ def rsh_simulate(residential_units, aggregations, settings, rsh_config):
     utils.hedonic_simulate(cfg=rsh_config,
                            tbl=residential_units,
                            join_tbls=aggregations,
-                           out_fname='unit_residential_price')
+                           out_fname='unit_residential_price',
+                           cast=True)
 
     _mtc_clip(residential_units, 'unit_residential_price', settings)
     return
@@ -665,10 +666,11 @@ def rrh_simulate(residential_units, aggregations, settings, rrh_config):
     utils.hedonic_simulate(cfg=rrh_config,
                            tbl=residential_units,
                            join_tbls=aggregations,
-                           out_fname='unit_residential_rent')
+                           out_fname='unit_residential_rent',
+                           cast=True)
 
     _mtc_clip(residential_units, 'unit_residential_rent',
-              settings, price_scale=0.05/12)
+              settings, price_scale=0.05 / 12)
     return
 
 

diff --git a/baus/validation.py b/baus/validation.py
@@ -117,7 +117,7 @@ def simulation_validation(
 
     check_household_controls(households, household_controls, year)
 
-    check_residential_units(residential_units, buildings)
+    # check_residential_units(residential_units, buildings)
 
 #    check_no_unplaced_households(households, year)