From 967855155621ab4d0649b0bff2d56dfa60a72369 Mon Sep 17 00:00:00 2001 From: Chenglim Ear Date: Wed, 1 Nov 2023 17:13:05 -0700 Subject: [PATCH] upgrade csvkit to 1.3.0 and upgraded its dependencies where needed --- .github/workflows/main.yml | 2 +- Makefile | 12 ++++++------ bin/import-file | 16 ++++++++++------ requirements.txt | 10 +++++----- 4 files changed, 22 insertions(+), 18 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 41e456450..4ce2b7efb 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -81,7 +81,7 @@ jobs: echo "c1,c2" > test.csv echo "a,b" >> test.csv cat test.csv - csvsql -v --doublequote --db postgresql:///disclosure-backend --insert < test.csv + csvsql -v --db postgresql:///disclosure-backend --insert test.csv echo "List tables" psql -c "SELECT * FROM pg_catalog.pg_tables WHERE schemaname != 'pg_catalog' AND schemaname != 'information_schema';" diff --git a/Makefile b/Makefile index 8ea3bcf4e..1372f7cc1 100644 --- a/Makefile +++ b/Makefile @@ -74,22 +74,22 @@ prep-import-spreadsheets: do-import-spreadsheets: echo 'DROP TABLE IF EXISTS candidates;' | psql $(DATABASE_NAME) - csvsql --doublequote --db postgresql:///$(DATABASE_NAME) --insert $(CSV_PATH)/candidates.csv + csvsql --db postgresql:///$(DATABASE_NAME) --insert $(CSV_PATH)/candidates.csv echo 'ALTER TABLE "candidates" ADD COLUMN id SERIAL PRIMARY KEY;' | psql $(DATABASE_NAME) echo 'DROP TABLE IF EXISTS referendums;' | psql $(DATABASE_NAME) - csvsql --doublequote --db postgresql:///$(DATABASE_NAME) --insert $(CSV_PATH)/referendums.csv + csvsql --db postgresql:///$(DATABASE_NAME) --insert $(CSV_PATH)/referendums.csv echo 'ALTER TABLE "referendums" ADD COLUMN id SERIAL PRIMARY KEY;' | psql $(DATABASE_NAME) echo 'DROP TABLE IF EXISTS name_to_number;' | psql $(DATABASE_NAME) - csvsql --doublequote --db postgresql:///$(DATABASE_NAME) --insert $(CSV_PATH)/name_to_number.csv + csvsql --db postgresql:///$(DATABASE_NAME) --insert $(CSV_PATH)/name_to_number.csv echo 'DROP TABLE IF EXISTS committees;' | psql $(DATABASE_NAME) - csvsql --doublequote --db postgresql:///$(DATABASE_NAME) --insert $(CSV_PATH)/committees.csv + csvsql --db postgresql:///$(DATABASE_NAME) --insert $(CSV_PATH)/committees.csv echo 'ALTER TABLE "committees" ADD COLUMN id SERIAL PRIMARY KEY;' | psql $(DATABASE_NAME) echo 'DROP TABLE IF EXISTS office_elections;' | psql $(DATABASE_NAME) - csvsql --doublequote --db postgresql:///$(DATABASE_NAME) --insert downloads/csv/office_elections.csv + csvsql --db postgresql:///$(DATABASE_NAME) --insert downloads/csv/office_elections.csv echo 'ALTER TABLE "office_elections" ALTER COLUMN title TYPE varchar(50);' | psql $(DATABASE_NAME) echo 'ALTER TABLE "office_elections" ADD COLUMN id SERIAL PRIMARY KEY;' | psql $(DATABASE_NAME) echo 'DROP TABLE IF EXISTS elections;' | psql $(DATABASE_NAME) - csvsql --doublequote --db postgresql:///$(DATABASE_NAME) --insert downloads/csv/elections.csv + csvsql --db postgresql:///$(DATABASE_NAME) --insert downloads/csv/elections.csv echo 'ALTER TABLE "elections" ADD COLUMN id SERIAL PRIMARY KEY;' | psql $(DATABASE_NAME) import-data: 496 497 A-Contributions B1-Loans B2-Loans C-Contributions \ diff --git a/bin/import-file b/bin/import-file index 3fb817750..f04c997c1 100755 --- a/bin/import-file +++ b/bin/import-file @@ -27,13 +27,17 @@ if psql disclosure-backend -c '\d "'${table_name}'"' >/dev/null 2>&1; then fi if ls $filename_glob 2>/dev/null >/dev/null; then - csvstack $filename_glob /dev/null | \ + # the non-beta version of csvsql will not create a table from an empty csv + csvstack $filename_glob 2> /dev/null | \ csvsql --db postgresql:///$DATABASE_NAME --tables $table_name --insert ${table_exists:+--no-create} - echo -n ' Removing empty Tran_Date... ' - ./bin/clean "$DATABASE_NAME" "$table_name" - echo - echo -n ' Fixing pending Filer_IDs... ' - ./bin/fix-pending "$DATABASE_NAME" "$table_name" + if psql disclosure-backend -c '\d "'${table_name}'"' >/dev/null 2>&1; then + # table exists + echo -n ' Removing empty Tran_Date... ' + ./bin/clean "$DATABASE_NAME" "$table_name" + echo + echo -n ' Fixing pending Filer_IDs... ' + ./bin/fix-pending "$DATABASE_NAME" "$table_name" + fi else echo 'Found no files to import' fi diff --git a/requirements.txt b/requirements.txt index dbd61807b..7196892cb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,11 @@ -agate==1.5.5 -agate-dbf==0.2.0 -agate-excel==0.2.0 -agate-sql==0.5.0 +agate==1.6.1 +agate-dbf==0.2.2 +agate-excel==0.2.2 +agate-sql==0.5.3 awesome-slugify==1.6.5 awscli>=1.16.89 Babel==2.9.1 -csvkit==0.9.0 +csvkit>=1.3.0 dbfread==2.0.7 et-xmlfile==1.0.1 isodate==0.5.4; python_version < '3.0'