Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix estimation #99

Closed
wants to merge 10 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,4 @@ data/2015_12_21_zoning_parcels.csv
data/simple_parcels.*
data/02_01_2016_parcels_geography.csv
data/2015_12_21_zoning_parcels.csv
data/
59 changes: 35 additions & 24 deletions baus.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,10 @@
COMPARE_TO_NO_PROJECT = True
NO_PROJECT = 611
EARTHQUAKE = False
DATA_OUT = './output/model_data_output.h5'
OUT_TABLES = ['jobs', 'households', 'buildings', 'parcels']

IN_YEAR, OUT_YEAR = 2010, 2050
IN_YEAR, OUT_YEAR = 2010, 2025
COMPARE_AGAINST_LAST_KNOWN_GOOD = False

LAST_KNOWN_GOOD_RUNS = {
Expand Down Expand Up @@ -97,11 +99,6 @@

SCENARIO = orca.get_injectable("scenario")

if INTERACT:
import code
code.interact(local=locals())
sys.exit()

run_num = orca.get_injectable("run_number")

if LOGS:
Expand Down Expand Up @@ -198,16 +195,16 @@ def get_simulation_models(SCENARIO):

# save_intermediate_tables", # saves output for visualization

"topsheet",
# "topsheet",
"simulation_validation",
"parcel_summary",
"building_summary",
# "parcel_summary",
# "building_summary",
"diagnostic_output",
"geographic_summary",
"travel_model_output",
# "geographic_summary",
# "travel_model_output",
# "travel_model_2_output",
"hazards_slr_summary",
"hazards_eq_summary"
# "hazards_slr_summary",
# "hazards_eq_summary"

]

Expand Down Expand Up @@ -239,7 +236,7 @@ def run_models(MODE, SCENARIO):
"preproc_jobs",
"preproc_households",
"preproc_buildings",
"initialize_residential_units"
# "initialize_residential_units" # ual already has a static units table
])

elif MODE == "fetch_data":
Expand Down Expand Up @@ -289,25 +286,34 @@ def run_models(MODE, SCENARIO):

"price_vars",

"topsheet",
# "topsheet",
"simulation_validation",
"parcel_summary",
"building_summary",
"geographic_summary",
"travel_model_output",
# "parcel_summary",
# "building_summary",
# "geographic_summary",
# "travel_model_output",
# "travel_model_2_output",
"hazards_slr_summary",
"hazards_eq_summary",
# "hazards_slr_summary",
# "hazards_eq_summary",
"diagnostic_output"

], iter_vars=[IN_YEAR])
],
iter_vars=[IN_YEAR],
data_out=DATA_OUT,
out_base_tables=[],
out_run_tables=OUT_TABLES
)

# start the simulation in the next round - only the models above run
# for the IN_YEAR
years_to_run = range(IN_YEAR+EVERY_NTH_YEAR, OUT_YEAR+1,
EVERY_NTH_YEAR)
models = get_simulation_models(SCENARIO)
orca.run(models, iter_vars=years_to_run)
orca.run(
models, iter_vars=years_to_run,
data_out='./output/model_data_output.h5',
out_base_tables=[],
out_run_tables=['jobs', 'buildings', 'households', 'parcels']
)

elif MODE == "estimation":

Expand Down Expand Up @@ -364,6 +370,11 @@ def run_models(MODE, SCENARIO):
raise "Invalid mode"


if INTERACT:
import code
code.interact(local=locals())
sys.exit()

print "Started", time.ctime()
print "Current Branch : ", BRANCH.rstrip()
print "Current Commit : ", CURRENT_COMMIT.rstrip()
Expand Down
60 changes: 58 additions & 2 deletions baus/datasources.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,9 @@ def add_drop_helper(col, val):

@orca.table(cache=True)
def parcels(store):
return store['parcels']
df = store['parcels']
df.index.name = 'parcel_id'
return df


@orca.table(cache=True)
Expand All @@ -387,6 +389,48 @@ def taz(zones):
return zones


@orca.table()
def skims(store):
df = store['beam_skims']
df = df[(df['period'] == 'AM') & (df['mode'] == 'CAR')]
assert len(df) == 2114116
df = df.rename(
columns={'origTaz': 'from_zone_id', 'destTaz': 'to_zone_id'})
df = df.set_index(['from_zone_id', 'to_zone_id'])
return df


def register_aggregation_table(table_name, table_id):
"""
Generator function for tables representing aggregate geography.
"""

@orca.table(table_name, cache=True)
def func(parcels):
geog_ids = parcels[table_id].value_counts().index.values
df = pd.DataFrame(index=geog_ids)
df.index.name = table_id
return df

return func


aggregate_geos = {
# 'zonings': 'zoning_id',
# 'locations': 'location_id',
# 'block_groups': 'block_group_id',
'blocks': 'block_id',
'zones': 'zone_id',
# 'plans': 'plan_id',
# 'zone_districts': 'zone_district_id',
# 'zone_subdistricts': 'zone_subdistrict_id'
}
orca.add_injectable('aggregate_geos', aggregate_geos)

for geog in aggregate_geos.items():
register_aggregation_table(geog[0], geog[1])


@orca.table(cache=True)
def parcel_rejections():
url = "https://forecast-feedback.firebaseio.com/parcelResults.json"
Expand Down Expand Up @@ -640,7 +684,12 @@ def buildings(store):

@orca.table(cache=True)
def residential_units(store):
return print_error_if_not_available(store, 'residential_units_preproc')
# return print_error_if_not_available(store, 'residential_units_preproc')

df = store['units']
if df.dtypes['tenure'] == np.int64:
df['tenure'] = df.tenure.map({1: 'own', 2: 'rent'})
return df


@orca.table(cache=True)
Expand Down Expand Up @@ -834,3 +883,10 @@ def tracts_earthquake():
orca.broadcast('tmnodes', 'buildings', cast_index=True, onto_on='tmnode_id')
orca.broadcast('taz_geography', 'parcels', cast_index=True,
onto_on='zone_id')
orca.broadcast('parcels', 'homesales', cast_index=True, onto_on='parcel_id')
orca.broadcast('nodes', 'homesales', cast_index=True, onto_on='node_id')
orca.broadcast('tmnodes', 'homesales', cast_index=True, onto_on='tmnode_id')
orca.broadcast('nodes', 'costar', cast_index=True, onto_on='node_id')
orca.broadcast('tmnodes', 'costar', cast_index=True, onto_on='tmnode_id')
orca.broadcast('logsums', 'homesales', cast_index=True, onto_on='zone_id')
orca.broadcast('logsums', 'costar', cast_index=True, onto_on='zone_id')
40 changes: 28 additions & 12 deletions baus/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,18 @@ def move_jobs_from_portola_to_san_mateo_county(parcels, buildings, jobs_df):
@orca.step()
def preproc_jobs(store, baseyear_taz_controls, settings, parcels):
buildings = store['buildings']
jobs = store['jobs']

jobs = allocate_jobs(baseyear_taz_controls, settings, buildings, parcels)
jobs = move_jobs_from_portola_to_san_mateo_county(parcels, buildings, jobs)
# jobs table already created if using UAL data so no need to re-allocate
# jobs = allocate_jobs(baseyear_taz_controls, settings, buildings, parcels)

# have to run this step from jobs allocations to get right columns
sector_map = settings["naics_to_empsix"]
jobs['empsix'] = jobs['sector_id'].replace(sector_map)

# this one i commented out only because it breaks using the UAL jobs table
# jobs = move_jobs_from_portola_to_san_mateo_county(
# parcels, buildings, jobs)
store['jobs_preproc'] = jobs


Expand All @@ -101,7 +110,12 @@ def preproc_households(store):

df = store['households']

df['tenure'] = df.hownrent.map({1: 'own', 2: 'rent'})
df['tenure'] = df.tenure.map({1: 'own', 2: 'rent'})

# have to drop this county column bc there's a merge
# in proportional_elcm() that messes up if theres two
# county columns
df = df.drop(['county'], axis=1)

# need to keep track of base year income quartiles for use in the
# transition model - even caching doesn't work because when you add
Expand All @@ -112,18 +126,20 @@ def preproc_households(store):
df["base_income_octile"] = pd.Series(pd.qcut(df.income, 8, labels=False),
index=df.index).add(1)

# had to comment this stuff out bc it breaks

# there are some overrides where we move households around in order
# to match the city totals - in the future we will resynthesize and this
# can go away - this csv is generated by scripts/match_city_totals.py
overrides = pd.read_csv("data/household_building_id_overrides.csv",
index_col="household_id").building_id
df.loc[overrides.index, "building_id"] = overrides.values
# overrides = pd.read_csv("data/household_building_id_overrides.csv",
# index_col="household_id").building_id
# df.loc[overrides.index, "building_id"] = overrides.values

# turns out we need 4 more households
new_households = df.loc[[1132542, 1306618, 950630, 886585]].reset_index()
# keep unique index
new_households.index += pd.Series(df.index).max() + 1
df = df.append(new_households)
# # turns out we need 4 more households
# new_households = df.loc[[1132542, 1306618, 950630, 886585]].reset_index()
# # keep unique index
# new_households.index += pd.Series(df.index).max() + 1
# df = df.append(new_households)

store['households_preproc'] = df

Expand Down Expand Up @@ -281,7 +297,7 @@ def preproc_buildings(store, parcels, manual_edits):
'sqft_per_unit', 'nonres_rent_per_sqft',
'res_price_per_sqft',
'redfin_home_type', 'costar_property_type',
'costar_rent'], axis=1)
'costar_rent', 'res_sqft_per_unit'], axis=1)

# apply manual edits
edits = manual_edits.local
Expand Down
8 changes: 5 additions & 3 deletions baus/ual.py
Original file line number Diff line number Diff line change
Expand Up @@ -646,7 +646,8 @@ def rsh_simulate(residential_units, aggregations, settings, rsh_config):
utils.hedonic_simulate(cfg=rsh_config,
tbl=residential_units,
join_tbls=aggregations,
out_fname='unit_residential_price')
out_fname='unit_residential_price',
cast=True)

_mtc_clip(residential_units, 'unit_residential_price', settings)
return
Expand All @@ -665,10 +666,11 @@ def rrh_simulate(residential_units, aggregations, settings, rrh_config):
utils.hedonic_simulate(cfg=rrh_config,
tbl=residential_units,
join_tbls=aggregations,
out_fname='unit_residential_rent')
out_fname='unit_residential_rent',
cast=True)

_mtc_clip(residential_units, 'unit_residential_rent',
settings, price_scale=0.05/12)
settings, price_scale=0.05 / 12)
return


Expand Down
2 changes: 1 addition & 1 deletion baus/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def simulation_validation(

check_household_controls(households, household_controls, year)

check_residential_units(residential_units, buildings)
# check_residential_units(residential_units, buildings)

# check_no_unplaced_households(households, year)

Expand Down
Loading