-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
21 changed files
with
404,640 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
DROP table if exists data_coverage; | ||
|
||
create temp table data_coverage (locode varchar(16) not null, gpc_reference_number varchar(8) not null, publisher_id varchar(32) not null, year int not null, total bigint null, primary key (locode, gpc_reference_number, publisher_id, year)); | ||
|
||
/* Most city-wide emissions */ | ||
|
||
insert into data_coverage (locode, gpc_reference_number, publisher_id, year, total) select locode, "GPC_refno" as gpc_reference_number, source_name as publisher_id, year, sum(emissions_value) as total from citywide_emissions group by locode, "GPC_refno", source_name, year; | ||
|
||
/* ClimateTrace */ | ||
|
||
insert into data_coverage (locode, year, gpc_reference_number, publisher_id, total) select locode, date_part('year', start_time) as year, reference_number as gpc_reference_number, 'ClimateTrace' as publisher_id, sum(emissions_quantity) as total from asset where locode is not null group by locode, date_part('year', start_time), reference_number; | ||
|
||
/* EPA */ | ||
|
||
insert into data_coverage(locode, year, gpc_reference_number, publisher_id, total) select locode, CAST(year as INT), "GPC_ref_no" as gpc_reference_number, 'EPA' as publisher_id, sum(emissions_quantity) as total from ghgrp_epa group by locode, year, "GPC_ref_no"; | ||
|
||
/* EDGAR */ | ||
|
||
insert into data_coverage(locode, year, gpc_reference_number, publisher_id, total) select locode, year, reference_number as gpc_reference_number, 'EDGAR' as publisher_id, sum(emissions_quantity * fraction_in_city) as total from "CityCellOverlapEdgar" ccoe join "GridCellEmissionsEdgar" gcee on ccoe.cell_lat = gcee.cell_lat and ccoe.cell_lon = gcee.cell_lon group by locode, year, reference_number; | ||
|
||
/* Scaled by country (e.g. IEA) */ | ||
|
||
insert into data_coverage(locode, year, gpc_reference_number, publisher_id, total) | ||
select locode, country_code.year as year, "GPC_refno" as gpc_reference_number, source_name as publisher_id, ROUND((CAST(p1.population as float)/CAST(p2.population as float)) * CAST(emissions_value as float)) as total from geography, population p1, population p2, country_code where geography.locode = p1.actor_id and geography.country = p2.actor_id and p1.year = p2.year and p2.year = country_code.year and geography.country = country_code.country_code; | ||
|
||
/* Scaled by region (e.g. Argentina) */ | ||
|
||
insert into data_coverage(locode, year, gpc_reference_number, publisher_id, total) | ||
select locode, regionwide_emissions.year as year, "GPC_refno" as gpc_reference_number, source_name as publisher_id, ROUND((CAST(p1.population as float)/CAST(p2.population as float)) * CAST(emissions_value as float)) as total from geography, population p1, population p2, regionwide_emissions where geography.locode = p1.actor_id and geography.region = p2.actor_id and p1.year = p2.year and p2.year = regionwide_emissions.year and geography.region = regionwide_emissions.region_code; | ||
|
||
\copy data_coverage (locode, gpc_reference_number, publisher_id, year, total) to 'data_coverage.csv' with CSV HEADER; | ||
|
||
drop table data_coverage; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# Custom polygons from cities | ||
|
||
Cities may have their own boundaries that should overwrite the open street maps, this must run after osm to update boundaries and before any joins to data sources. | ||
|
||
1. Save the database URI to an environment variable named `DB_URI` | ||
|
||
2. Run the script | ||
|
||
```bash | ||
python ./custom_polygons/custom_polygon_importer.py | ||
``` | ||
|
||
Use the following to to create an envionrment variable in `zsh`: | ||
|
||
```sh | ||
export DB_URI="postgresql://ccglobal:@localhost/ccglobal" | ||
``` | ||
|
||
### Directory tree | ||
|
||
```sh | ||
├── README.md # top level readme | ||
├── custom_polygon_importer.py # importer for crosswalk city data | ||
└── utils.py # utility scripts uses .py files | ||
``` |
146 changes: 146 additions & 0 deletions
146
global-api/importer/custom_polygons/custom_polygon_importer.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
import zipfile | ||
import pandas as pd | ||
import geopandas as gpd | ||
from shapely.geometry import LineString | ||
from shapely import wkt | ||
import argparse | ||
import os | ||
from sqlalchemy import create_engine | ||
import osmnx as ox | ||
from sqlalchemy.sql import text | ||
from shapely.geometry import Polygon | ||
|
||
|
||
|
||
def unzip_file(zip_file_path, extract_to_path='./'): | ||
""" | ||
Unzips a file to a specified directory. If no extraction directory is provided, it defaults to the current directory. | ||
Args: | ||
zip_file_path (str): Path to the ZIP file. | ||
extract_to_path (str, optional): Directory where the contents of the ZIP file will be extracted. Defaults to './'. | ||
Returns: | ||
None | ||
""" | ||
# Open the ZIP file for reading | ||
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref: | ||
# Extract all the contents of the ZIP file to the specified directory | ||
zip_ref.extractall(extract_to_path) | ||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument( | ||
"--database_uri", | ||
help="database URI (e.g. postgresql://ccglobal:@localhost/ccglobal)", | ||
default=os.environ.get("DB_URI"), | ||
) | ||
parser.add_argument( | ||
"--zip_file_path", | ||
help="Path to the ZIP file", | ||
default="./Limites Ciudad-001.zip", | ||
) | ||
parser.add_argument( | ||
"--extract_to_path", | ||
help="Directory where the contents of the ZIP file will be extracted", | ||
default="./", | ||
) | ||
args = parser.parse_args() | ||
|
||
unzip_file(args.zip_file_path, args.extract_to_path) | ||
gdf = gpd.read_file(os.path.join(args.extract_to_path, './Limites Ciudad/Limites Ciudad.shp')) | ||
|
||
# Set the CRS to EPSG:22172 | ||
gdf.crs = "EPSG:22192" | ||
|
||
# Convert the CRS of your data to EPSG:4326 | ||
gdf = gdf.to_crs("EPSG:4326") | ||
|
||
linestring = gdf['geometry'].iloc[0] | ||
|
||
# Convert LineString to Polygon | ||
polygon = Polygon(linestring) | ||
|
||
# Convert LineString to well-known text (WKT) representation | ||
polygon_wkt = wkt.dumps(polygon) | ||
|
||
# Get the bounding box coordinates | ||
bbox = linestring.bounds | ||
|
||
# Extract individual bounding box coordinates | ||
bbox_north = bbox[3] | ||
bbox_south = bbox[1] | ||
bbox_east = bbox[2] | ||
bbox_west = bbox[0] | ||
|
||
# Add the locode for Mendoza | ||
locode = 'AR MDZ' | ||
|
||
# Extract center point coordinates from gdf | ||
center_point = linestring.centroid | ||
lat = center_point.y | ||
lon = center_point.x | ||
|
||
# Retrieve the GeoDataFrame with the place boundary from OpenStreetMap | ||
place_gdf = ox.geocode_to_gdf('R4206710', by_osmid=True) | ||
|
||
# Extract required attributes | ||
data = { | ||
'geometry': [polygon_wkt], | ||
'bbox_north': [bbox_north], | ||
'bbox_south': [bbox_south], | ||
'bbox_east': [bbox_east], | ||
'bbox_west': [bbox_west], | ||
'locode': [locode], | ||
'lat': [lat], | ||
'lon': [lon], | ||
'type': ['custom'] | ||
} | ||
|
||
# Merge with attributes from place_gdf | ||
data = {**data, **place_gdf.iloc[0].drop(['geometry', 'bbox_north', 'bbox_south', 'bbox_east', 'bbox_west', 'lat', 'lon', 'type']).to_dict()} | ||
|
||
# Create a DataFrame with the data to be inserted into the database | ||
df = pd.DataFrame(data) | ||
|
||
# Create a SQLAlchemy engine | ||
engine = create_engine(args.database_uri) | ||
|
||
# Write the DataFrame to the database table | ||
df.to_sql('osm_staging', engine, if_exists='replace', index=False) | ||
|
||
# Define the UPSERT query using text() construct | ||
upsert_query = """ | ||
INSERT INTO osm (geometry, bbox_north, bbox_south, bbox_east, bbox_west, place_id, osm_type, osm_id, lat, lon, "class", "type", place_rank, importance, addresstype, name, display_name, locode) | ||
SELECT geometry, bbox_north, bbox_south, bbox_east, bbox_west, place_id, osm_type, osm_id, lat, lon, "class", "type", place_rank, importance, addresstype, name, display_name, locode | ||
FROM osm_staging | ||
ON CONFLICT (locode) | ||
DO UPDATE SET | ||
geometry = EXCLUDED.geometry, | ||
bbox_north = EXCLUDED.bbox_north, | ||
bbox_south = EXCLUDED.bbox_south, | ||
bbox_east = EXCLUDED.bbox_east, | ||
bbox_west = EXCLUDED.bbox_west, | ||
place_id = EXCLUDED.place_id, | ||
osm_type = EXCLUDED.osm_type, | ||
osm_id = EXCLUDED.osm_id, | ||
lat = EXCLUDED.lat, | ||
lon = EXCLUDED.lon, | ||
"class" = EXCLUDED."class", | ||
"type" = EXCLUDED."type", | ||
place_rank = EXCLUDED.place_rank, | ||
importance = EXCLUDED.importance, | ||
addresstype = EXCLUDED.addresstype, | ||
name = EXCLUDED.name, | ||
display_name = EXCLUDED.display_name; | ||
DROP TABLE osm_staging | ||
""" | ||
|
||
with engine.connect() as connection: | ||
try: | ||
result = connection.execute(text(upsert_query)) | ||
connection.commit() | ||
print("Query completed successfully.") | ||
except Exception as e: | ||
print("Error updating osm table:", e) |
Oops, something went wrong.