Merge tag 'v0.6.0-rc.1'

Open-Earth-Foundation · Apr 2, 2024 · a56af8b · a56af8b
2 parents 9d5d284 + f87be6a
commit a56af8b
Show file tree

Hide file tree

Showing 21 changed files with 404,640 additions and 8 deletions.
diff --git a/.github/workflows/global-api-develop.yml b/.github/workflows/global-api-develop.yml
@@ -11,6 +11,13 @@ on:
       - k8s/cc-global-api-migrate.yml
       - .github/workflows/global-api-develop.yml
     branches: ["develop"]
+  pull_request:
+    paths:
+      - global-api/**
+      - k8s/cc-global-api-deploy.yml
+      - k8s/cc-global-api.yml
+      - k8s/cc-global-api-migrate.yml
+      - .github/workflows/global-api-develop.yml
 
 jobs:
   runTests:
@@ -38,7 +45,7 @@ jobs:
           sleep 10
           createuser -w -h localhost -p 5432 -U postgres citycatalyst
           createdb -w -h localhost -p 5432 -U postgres citycatalyst -O citycatalyst
-          cp env.example .env
+          cp sample.env .env
 
       - name: Build coverage file
         run: |
@@ -64,7 +71,9 @@ jobs:
         run: docker stop github_action_postgresql
 
   pushToGHCR:
+    needs: runTests
     runs-on: ubuntu-latest
+    if: github.ref == 'refs/heads/develop'
     steps:
       - uses: actions/checkout@v4
 

diff --git a/.github/workflows/global-api-tag.yml b/.github/workflows/global-api-tag.yml
@@ -48,3 +48,25 @@ jobs:
           docker push $IMAGE:$MAJOR
           docker push $IMAGE:$MINOR
           docker push $IMAGE:stable
+
+  deployToEKS:
+    needs: pushToGHCR
+    runs-on: ubuntu-latest
+    env:
+      AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_EKS_PROD_USER }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_EKS_PROD_USER }}
+      EKS_DEV_NAME: ${{ secrets.EKS_PROD_NAME }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Creating kubeconfig file
+        run: aws eks update-kubeconfig --name ${{ secrets.EKS_PROD_NAME }} --region us-east-1
+
+      - name: Testing connection to EKS
+        run: kubectl get pods -n default
+
+      - name: Deploying service
+        run: |
+          kubectl create -f k8s/cc-global-api-migrate.yml -n default
+          kubectl apply -f k8s/cc-global-api-deploy.yml -n default
+          kubectl rollout restart deployment cc-global-api-deploy -n default
diff --git a/.github/workflows/web-tag.yml b/.github/workflows/web-tag.yml
@@ -49,9 +49,6 @@ jobs:
       - name: Run API tests
         run: npm run api:test
 
-      # - name: Run Cypress tests
-      #   run: npm run cy:test
-
       - name: Shut down database
         run: docker stop github_action_postgresql
 
@@ -89,3 +86,38 @@ jobs:
           docker push $IMAGE:$MAJOR
           docker push $IMAGE:$MINOR
           docker push $IMAGE:stable
+
+  deployToEKS:
+    needs: pushToGHCR
+    runs-on: ubuntu-latest
+    env:
+      AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_EKS_PROD_USER }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_EKS_PROD_USER }}
+      EKS_PROD_NAME: ${{ secrets.EKS_PROD_NAME }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Creating kubeconfig file
+        run: aws eks update-kubeconfig --name ${{secrets.EKS_PROD_NAME}} --region us-east-1
+
+      - name: Testing connection to EKS
+        run: kubectl get pods -n default
+
+      - name: Deploying service
+        run: |
+          kubectl create -f k8s/cc-migrate.yml -n default
+          kubectl create -f k8s/cc-seed.yml -n default
+          kubectl apply -f k8s/cc-sync-catalogue.yml -n default
+          kubectl apply -f k8s/cc-web-deploy.yml -n default
+          kubectl set env deployment/cc-web-deploy SMTP_USER=${{secrets.SMTP_USER}}
+          kubectl set env deployment/cc-web-deploy SMTP_PASSWORD=${{secrets.SMTP_PASSWORD}}
+          kubectl set env deployment/cc-web-deploy NEXTAUTH_SECRET=${{secrets.NEXTAUTH_SECRET}}
+          kubectl set env deployment/cc-web-deploy RESET_TOKEN_SECRET=${{secrets.RESET_TOKEN_SECRET}}
+          kubectl set env deployment/cc-web-deploy VERIFICATION_TOKEN_SECRET=${{secrets.VERIFICATION_TOKEN_SECRET}}
+          kubectl set env deployment/cc-web-deploy CHAT_PROVIDER=openai
+          kubectl set env deployment/cc-web-deploy OPENAI_API_KEY=${{secrets.OPENAI_API_KEY}}
+          kubectl set env deployment/cc-web-deploy HUGGINGFACE_API_KEY=${{secrets.HUGGINGFACE_API_KEY}}
+          kubectl set env deployment/cc-web-deploy "DEFAULT_ADMIN_EMAIL=${{secrets.DEFAULT_ADMIN_EMAIL}}"
+          kubectl set env deployment/cc-web-deploy "DEFAULT_ADMIN_PASSWORD=${{secrets.DEFAULT_ADMIN_PASSWORD}}"
+          kubectl create -f k8s/cc-create-admin.yml -n default
+          kubectl rollout restart deployment cc-web-deploy -n default
diff --git a/app/package-lock.json b/app/package-lock.json
diff --git a/app/package.json b/app/package.json
@@ -1,6 +1,6 @@
 {
   "name": "city-catalyst",
-  "version": "0.6.0-rc.0",
+  "version": "0.6.0-rc.1",
   "private": true,
   "type": "module",
   "scripts": {

diff --git a/global-api/data-coverage/data-coverage.sql b/global-api/data-coverage/data-coverage.sql
@@ -0,0 +1,33 @@
+DROP table if exists data_coverage;
+
+create temp table data_coverage (locode varchar(16) not null, gpc_reference_number varchar(8) not null, publisher_id varchar(32) not null, year int not null, total bigint null, primary key (locode, gpc_reference_number, publisher_id, year));
+
+/* Most city-wide emissions */
+
+insert into data_coverage (locode, gpc_reference_number, publisher_id, year, total) select locode, "GPC_refno" as gpc_reference_number, source_name as publisher_id, year, sum(emissions_value) as total from citywide_emissions group by locode, "GPC_refno", source_name, year;
+
+/* ClimateTrace */
+
+insert into data_coverage (locode, year, gpc_reference_number, publisher_id, total) select locode, date_part('year', start_time) as year, reference_number as gpc_reference_number, 'ClimateTrace' as publisher_id, sum(emissions_quantity) as total from asset where locode is not null group by locode, date_part('year', start_time), reference_number;
+
+/* EPA */
+
+insert into data_coverage(locode, year, gpc_reference_number, publisher_id, total) select locode, CAST(year as INT), "GPC_ref_no" as gpc_reference_number, 'EPA' as publisher_id, sum(emissions_quantity) as total from ghgrp_epa group by locode, year, "GPC_ref_no";
+
+/* EDGAR */
+
+insert into data_coverage(locode, year, gpc_reference_number, publisher_id, total) select locode, year, reference_number as gpc_reference_number, 'EDGAR' as publisher_id, sum(emissions_quantity * fraction_in_city) as total from "CityCellOverlapEdgar" ccoe join "GridCellEmissionsEdgar" gcee on ccoe.cell_lat = gcee.cell_lat and ccoe.cell_lon = gcee.cell_lon group by locode, year, reference_number;
+
+/* Scaled by country (e.g. IEA) */
+
+insert into data_coverage(locode, year, gpc_reference_number, publisher_id, total)
+select locode, country_code.year as year, "GPC_refno" as gpc_reference_number, source_name as publisher_id, ROUND((CAST(p1.population as float)/CAST(p2.population as float)) * CAST(emissions_value as float)) as total from geography, population p1, population p2, country_code where geography.locode = p1.actor_id and geography.country = p2.actor_id and p1.year = p2.year and p2.year = country_code.year and geography.country = country_code.country_code;
+
+/* Scaled by region (e.g. Argentina) */
+
+insert into data_coverage(locode, year, gpc_reference_number, publisher_id, total)
+select locode, regionwide_emissions.year as year, "GPC_refno" as gpc_reference_number, source_name as publisher_id, ROUND((CAST(p1.population as float)/CAST(p2.population as float)) * CAST(emissions_value as float)) as total from geography, population p1, population p2, regionwide_emissions where geography.locode = p1.actor_id and geography.region = p2.actor_id and p1.year = p2.year and p2.year = regionwide_emissions.year and geography.region = regionwide_emissions.region_code;
+
+\copy data_coverage (locode, gpc_reference_number, publisher_id, year, total) to 'data_coverage.csv' with CSV HEADER;
+
+drop table data_coverage;
diff --git a/global-api/import_everything.sh b/global-api/import_everything.sh
@@ -11,6 +11,24 @@ export DB_URI="postgresql://$CC_GLOBAL_API_DB_USER:$CC_GLOBAL_API_DB_PASSWORD@$C
 
 # A script to import all of our data into the Global API database
 
+# Import geography
+
+pushd importer/geography
+psql -h $CC_GLOBAL_API_DB_HOST \
+   -U $CC_GLOBAL_API_DB_USER \
+   -d $CC_GLOBAL_API_DB_NAME \
+   -f ./load_geography.sql
+popd
+
+# Import population
+
+pushd importer/population
+psql -h $CC_GLOBAL_API_DB_HOST \
+   -U $CC_GLOBAL_API_DB_USER \
+   -d $CC_GLOBAL_API_DB_NAME \
+   -f ./load_population.sql
+popd
+
 # Import OSM
 
 pushd importer/osm
@@ -20,6 +38,14 @@ psql -h $CC_GLOBAL_API_DB_HOST \
    -f ./osm_geometry_import.sql
 popd
 
+# import custom polygons
+pushd importer/custom_polygons
+$python_cmd custom_polygon_importer.py \
+  --database_uri $DB_URI \
+  --zip_file_path "./Limites Ciudad-001.zip" \
+  --extract_to_path "./processed"
+popd
+
 # Import Carbon Monitor
 
 pushd importer/carbon_monitor

diff --git a/global-api/importer/custom_polygons/Limites Ciudad-001.zip b/global-api/importer/custom_polygons/Limites Ciudad-001.zip
diff --git a/global-api/importer/custom_polygons/README b/global-api/importer/custom_polygons/README
@@ -0,0 +1,25 @@
+# Custom polygons from cities
+
+Cities may have their own boundaries that should overwrite the open street maps, this must run after osm to update boundaries and before any joins to data sources.
+
+1. Save the database URI to an environment variable named `DB_URI`
+
+2. Run the script 
+
+```bash
+python ./custom_polygons/custom_polygon_importer.py
+```
+
+Use the following to to create an envionrment variable in `zsh`:
+
+```sh
+export DB_URI="postgresql://ccglobal:@localhost/ccglobal"
+```
+
+### Directory tree
+
+```sh
+├── README.md                           # top level readme
+├── custom_polygon_importer.py          # importer for crosswalk city data
+└── utils.py                            # utility scripts uses .py files
+```
diff --git a/global-api/importer/custom_polygons/custom_polygon_importer.py b/global-api/importer/custom_polygons/custom_polygon_importer.py
@@ -0,0 +1,146 @@
+import zipfile
+import pandas as pd
+import geopandas as gpd
+from shapely.geometry import LineString
+from shapely import wkt
+import argparse
+import os
+from sqlalchemy import create_engine
+import osmnx as ox
+from sqlalchemy.sql import text
+from shapely.geometry import Polygon
+
+
+
+def unzip_file(zip_file_path, extract_to_path='./'):
+    """
+    Unzips a file to a specified directory. If no extraction directory is provided, it defaults to the current directory.
+
+    Args:
+        zip_file_path (str): Path to the ZIP file.
+        extract_to_path (str, optional): Directory where the contents of the ZIP file will be extracted. Defaults to './'.
+
+    Returns:
+        None
+    """
+    # Open the ZIP file for reading
+    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
+        # Extract all the contents of the ZIP file to the specified directory
+        zip_ref.extractall(extract_to_path)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--database_uri",
+        help="database URI (e.g. postgresql://ccglobal:@localhost/ccglobal)",
+        default=os.environ.get("DB_URI"),
+    )
+    parser.add_argument(
+        "--zip_file_path",
+        help="Path to the ZIP file",
+        default="./Limites Ciudad-001.zip",
+    )
+    parser.add_argument(
+        "--extract_to_path",
+        help="Directory where the contents of the ZIP file will be extracted",
+        default="./",
+    )
+    args = parser.parse_args()
+
+    unzip_file(args.zip_file_path, args.extract_to_path)
+    gdf = gpd.read_file(os.path.join(args.extract_to_path, './Limites Ciudad/Limites Ciudad.shp'))
+
+    # Set the CRS to EPSG:22172
+    gdf.crs = "EPSG:22192"
+
+    # Convert the CRS of your data to EPSG:4326
+    gdf = gdf.to_crs("EPSG:4326")
+
+    linestring = gdf['geometry'].iloc[0]
+
+    # Convert LineString to Polygon
+    polygon = Polygon(linestring)
+
+    # Convert LineString to well-known text (WKT) representation
+    polygon_wkt = wkt.dumps(polygon)
+
+    # Get the bounding box coordinates
+    bbox = linestring.bounds
+
+    # Extract individual bounding box coordinates
+    bbox_north = bbox[3]
+    bbox_south = bbox[1]
+    bbox_east = bbox[2]
+    bbox_west = bbox[0]
+
+    # Add the locode for Mendoza 
+    locode = 'AR MDZ'
+
+    # Extract center point coordinates from gdf
+    center_point = linestring.centroid
+    lat = center_point.y
+    lon = center_point.x
+
+    # Retrieve the GeoDataFrame with the place boundary from OpenStreetMap
+    place_gdf = ox.geocode_to_gdf('R4206710', by_osmid=True)
+
+    # Extract required attributes
+    data = {
+        'geometry': [polygon_wkt],
+        'bbox_north': [bbox_north],
+        'bbox_south': [bbox_south],
+        'bbox_east': [bbox_east],
+        'bbox_west': [bbox_west],
+        'locode': [locode],
+        'lat': [lat],
+        'lon': [lon],
+        'type': ['custom']
+    }
+
+    # Merge with attributes from place_gdf
+    data = {**data, **place_gdf.iloc[0].drop(['geometry', 'bbox_north', 'bbox_south', 'bbox_east', 'bbox_west', 'lat', 'lon', 'type']).to_dict()}
+
+    # Create a DataFrame with the data to be inserted into the database
+    df = pd.DataFrame(data)
+
+    # Create a SQLAlchemy engine
+    engine = create_engine(args.database_uri)
+
+    # Write the DataFrame to the database table
+    df.to_sql('osm_staging', engine, if_exists='replace', index=False)
+
+    # Define the UPSERT query using text() construct
+    upsert_query = """
+    INSERT INTO osm (geometry, bbox_north, bbox_south, bbox_east, bbox_west, place_id, osm_type, osm_id, lat, lon, "class", "type", place_rank, importance, addresstype, name, display_name, locode)
+    SELECT geometry, bbox_north, bbox_south, bbox_east, bbox_west, place_id, osm_type, osm_id, lat, lon, "class", "type", place_rank, importance, addresstype, name, display_name, locode
+    FROM osm_staging
+    ON CONFLICT (locode)
+    DO UPDATE SET
+    geometry = EXCLUDED.geometry,
+    bbox_north = EXCLUDED.bbox_north,
+    bbox_south = EXCLUDED.bbox_south,
+    bbox_east = EXCLUDED.bbox_east,
+    bbox_west = EXCLUDED.bbox_west,
+    place_id = EXCLUDED.place_id,
+    osm_type = EXCLUDED.osm_type,
+    osm_id = EXCLUDED.osm_id,
+    lat = EXCLUDED.lat,
+    lon = EXCLUDED.lon,
+    "class" = EXCLUDED."class",
+    "type" = EXCLUDED."type",
+    place_rank = EXCLUDED.place_rank,
+    importance = EXCLUDED.importance,
+    addresstype = EXCLUDED.addresstype,
+    name = EXCLUDED.name,
+    display_name = EXCLUDED.display_name;
+
+    DROP TABLE osm_staging
+    """
+
+    with engine.connect() as connection:
+        try:
+            result = connection.execute(text(upsert_query))
+            connection.commit() 
+            print("Query completed successfully.")
+        except Exception as e:
+            print("Error updating osm table:", e)