Skip to content

Commit

Permalink
Merge branch 'check-using-digest' into upgrade-csvkit
Browse files Browse the repository at this point in the history
  • Loading branch information
ChenglimEar committed Nov 9, 2023
2 parents 929372a + e02407a commit 8f42b3d
Show file tree
Hide file tree
Showing 27 changed files with 6,834 additions and 105 deletions.
1 change: 1 addition & 0 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"workspaceFolder": "/workspace",
"remoteUser": "vscode",
"postCreateCommand": "bash ./.devcontainer/post-create-command.sh",
"postStartCommand": "git config --global --add safe.directory ${containerWorkspaceFolder}",
"forwardPorts": [4567, 5432],
"extensions": [
"ms-python.python",
Expand Down
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ process: process.rb
# todo: remove RUBYOPT variable when activerecord fixes deprecation warnings
echo 'delete from calculations;'| psql $(DATABASE_NAME)
rm -rf build && RUBYOPT="-W:no-deprecated -W:no-experimental" bundle exec ruby process.rb
python bin/create-digests.py

download-spreadsheets: downloads/csv/candidates.csv downloads/csv/committees.csv \
downloads/csv/referendums.csv downloads/csv/name_to_number.csv \
Expand Down
114 changes: 114 additions & 0 deletions bin/create-digests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
import os
import json
import hashlib
import logging

logging.basicConfig(encoding='utf-8', level=logging.INFO)

def round_floats(data):
if type(data) == list:
for i in range(len(data)):
round_floats(data[i])
else:
for key in data:
the_type = type(data[key])
if the_type == dict:
round_floats(data[key])
elif the_type == list:
round_floats(data[key])
elif the_type == float:
data[key] = round(data[key],2)

def sort_arrays(data):
if type(data) == list:
if len(data) > 0:
if type(data[0]) == dict:
data.sort(key=lambda x: tuple([str(x[key]) for key in x.keys()]))
else:
data.sort()
else:
for key in data:
the_type = type(data[key])
if the_type == dict:
sort_arrays(data[key])
elif the_type == list:
sort_arrays(data[key])

def redact(data):
if type(data) == dict:
if 'date_processed' in data:
data['date_processed'] = '***'
else:
for key in data.keys():
if key.startswith('top_') :
# Redact names for items with duplicate amounts and last item in case the next
# was duplicated. We have to do this now because the ordering for these lists
# are undefined by the amounts are the same
last_item = None
for item in data[key]:
if 'name' in item:
if 'total_contributions' in item:
amount_key = 'total_contributions'
elif 'total_spending' in item:
amount_key = 'total_spending'
else:
continue

amount = item[amount_key]
if last_item is not None:
last_amount = last_item[amount_key]
if amount == last_amount:
last_item['name'] = '***'
item['name'] = '***'
last_item = item
if (last_item is not None) and ('name' in last_item):
last_item['name'] = '***'
elif type(data[key]) == list:
for item in data[key]:
redact(item)
else:
redact(data[key])

def collect_digests(digests, subdir, exclude=[]):
filenames = os.listdir(subdir)
for filename in filenames:
filepath = f'{subdir}/{filename}'
if filepath in exclude:
logging.info(f'Skipping {filepath}')
elif os.path.isdir(filepath):
collect_digests(digests,filepath)
elif filename.endswith('.json'):
with open(filepath, 'r', encoding='utf-8') as fp:
logging.info(filepath)
data = json.load(fp)
# clean data before generating digests
redact(data)
round_floats(data)
sort_arrays(data)
# generate digests
if type(data) == dict:
for key in data:
sub_data = data[key]
datastr = json.dumps(sub_data, sort_keys=True).encode('utf-8')

digest = hashlib.md5(datastr).hexdigest()
digests[f'{filepath}:{key}'] = digest
else:
datastr = json.dumps(data, sort_keys=True).encode('utf-8')

digest = hashlib.md5(datastr).hexdigest()
if filepath not in digests:
digests[filepath] = {}
digests[filepath] = digest.hexdigest()

def main():
digests = {}
build_dir = 'build'
filepath = f'{build_dir}/digests.json'
collect_digests(digests, build_dir, exclude=[filepath])
print(f'Saving {filepath}')
with open(filepath, 'w') as fp:
json.dump(digests, fp, indent=1, sort_keys=True)

if __name__ == '__main__':
main()
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,16 @@
"total_contributions": 17530.0,
"total_expenditures": 9718.34,
"total_loans_received": 0.0,
"total_supporting_independent": 50172.51,
"total_supporting_independent": 71066.51,
"support_list": [
{
"Total": 15800.0,
"Total": 23700.0,
"Cand_ID": 1460829,
"Filer_ID": "1433122",
"Filer_NamL": "California Workers' Justice Coalition sponsored by Service Employees International Union Local 1021"
},
{
"Total": 30192.57,
"Total": 43186.57,
"Cand_ID": 1460829,
"Filer_ID": "1345259",
"Filer_NamL": "Oakland Education Association Political Action Committee"
Expand Down
4 changes: 2 additions & 2 deletions build/_data/committees/1410941.json
Original file line number Diff line number Diff line change
Expand Up @@ -813,7 +813,7 @@
"Tran_Date": "2018-10-19",
"Tran_NamF": "Garrett",
"Tran_NamL": "Riegg",
"Tran_Zip4": "94606"
"Tran_Zip4": "94602"
},
{
"Filer_ID": "1410941",
Expand All @@ -824,7 +824,7 @@
"Tran_Date": "2018-10-19",
"Tran_NamF": "Garrett",
"Tran_NamL": "Riegg",
"Tran_Zip4": "94602"
"Tran_Zip4": "94606"
},
{
"Filer_ID": "1410941",
Expand Down
4 changes: 2 additions & 2 deletions build/_data/committees/1421001.json
Original file line number Diff line number Diff line change
Expand Up @@ -1198,7 +1198,7 @@
"Tran_Date": "2020-07-15",
"Tran_NamF": "Jonathan",
"Tran_NamL": "Williams",
"Tran_Zip4": "94602"
"Tran_Zip4": "94603"
},
{
"Filer_ID": "1421001",
Expand All @@ -1209,7 +1209,7 @@
"Tran_Date": "2020-07-15",
"Tran_NamF": "Jonathan",
"Tran_NamL": "Williams",
"Tran_Zip4": "94603"
"Tran_Zip4": "94602"
},
{
"Filer_ID": "1421001",
Expand Down
2 changes: 1 addition & 1 deletion build/_data/elections/oakland/2014-11-04.json
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
{
"name": "Families and Educators for Public Education, Sponsored by Go Public Schools Advocates",
"election_name": "oakland-2014",
"total_spending": 107576.9
"total_spending": 107576.90000000001
},
{
"name": "Unity PAC, a Sponsored Committee of the Alameda Labor Council, AFL-CIO",
Expand Down
6 changes: 3 additions & 3 deletions build/_data/elections/oakland/2018-06-05.json
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
"total_contributions": 15000.0
},
{
"name": "Oakland Athletics Baseball Company",
"name": "Service Employees International Union Local 1021 Issues PAC",
"election_name": "oakland-june-2018",
"total_contributions": 10000.0
}
Expand Down Expand Up @@ -77,7 +77,7 @@
"total_spending": 15000.0
},
{
"name": "Oakland Athletics Baseball Company",
"name": "Service Employees International Union Local 1021 Issues PAC",
"type": "Measure",
"election_name": "oakland-june-2018",
"total_spending": 10000.0
Expand All @@ -100,7 +100,7 @@
"total_spending": 15000.0
},
{
"name": "Oakland Athletics Baseball Company",
"name": "Service Employees International Union Local 1021 Issues PAC",
"type": "Measure",
"election_name": "oakland-june-2018",
"total_spending": 10000.0
Expand Down
8 changes: 4 additions & 4 deletions build/_data/elections/oakland/2018-11-06.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"Committee": 954896.17,
"Individual": 2649041.44,
"Unitemized": 157811.78,
"Self Funding": 88464.97,
"Self Funding": 88464.97000000002,
"Other (includes Businesses)": 2739121.2600000002
},
"most_expensive_races": [
Expand Down Expand Up @@ -72,7 +72,7 @@
{
"name": "Oaklanders for Responsible Leadership, Opposing Desley Brooks for Oakland City Council 2018",
"election_name": "oakland-2018",
"total_spending": 129086.79
"total_spending": 129086.79000000001
}
],
"top_contributors": [
Expand All @@ -97,7 +97,7 @@
"name": "Brenda Roberts",
"type": "Office",
"election_name": "oakland-2018",
"total_contributions": 53757.13
"total_contributions": 53757.130000000005
},
{
"name": "Charlie Michelson",
Expand Down Expand Up @@ -157,7 +157,7 @@
"name": "Brenda Roberts",
"type": "Office",
"election_name": "oakland-2018",
"total_spending": 53757.13
"total_spending": 53757.130000000005
},
{
"name": "Charlie Michelson",
Expand Down
6 changes: 3 additions & 3 deletions build/_data/elections/oakland/2020-11-03.json
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
"type": "office",
"title": "Oakland November 3rd, 2020 General Election",
"candidate": "Tri Ngo",
"proportion": 0.3032000954178602,
"proportion": 0.30320009541786014,
"office_title": "City Council District 1"
},
{
Expand All @@ -62,12 +62,12 @@
{
"name": "Oakland 2020 Committee to Replace Lynette Gibson McElhaney And Elect Carroll Fife and Rebecca Kaplan to the Oakland City Council, sponsored by Alameda Labor Council, AFL-CIO",
"election_name": "oakland-2020",
"total_spending": 395215.85
"total_spending": 395215.8500000001
},
{
"name": "Families and Educators for Public Education, Sponsored by Go Public Schools Advocates",
"election_name": "oakland-2020",
"total_spending": 340009.22
"total_spending": 340009.22000000003
},
{
"name": "Californians for Independent Work, Sponsored by Lyft, Inc.",
Expand Down
2 changes: 1 addition & 1 deletion build/_data/elections/oakland/2022-06-07.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"total_contributions": 104075.02,
"total_contributions_by_source": {
"Out of State": 6400.0,
"Within Oakland": 64425.02,
"Within Oakland": 64425.020000000004,
"Within California": 33250.0
},
"contributions_by_type": {
Expand Down
16 changes: 8 additions & 8 deletions build/_data/elections/oakland/2022-11-08.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
"total_contributions": 6476065.89,
"total_contributions_by_source": {
"Out of State": 521147.35,
"Within Oakland": 2632398.1800000006,
"Within Oakland": 2632398.180000001,
"Within California": 3006531.1199999996
},
"contributions_by_type": {
"PTY": 15900.0,
"Committee": 995101.14,
"Individual": 2507910.870000002,
"Individual": 2507910.869999998,
"Unitemized": 135828.3,
"Self Funding": 2711.0,
"Other (includes Businesses)": 2638453.6399999997
Expand Down Expand Up @@ -95,19 +95,19 @@
],
"top_contributors_for_offices": [
{
"name": "Riaz Taplin",
"name": "Griffin Tischler",
"type": "Office",
"election_name": "oakland-2022",
"total_contributions": 6300.0
},
{
"name": "Griffin Tischler",
"name": "Riaz Taplin",
"type": "Office",
"election_name": "oakland-2022",
"total_contributions": 6300.0
},
{
"name": "Russ Taplin",
"name": "Goolshan Chinoy",
"type": "Office",
"election_name": "oakland-2022",
"total_contributions": 5400.0
Expand Down Expand Up @@ -155,19 +155,19 @@
],
"top_spenders_for_offices": [
{
"name": "Riaz Taplin",
"name": "Griffin Tischler",
"type": "Office",
"election_name": "oakland-2022",
"total_spending": 6300.0
},
{
"name": "Griffin Tischler",
"name": "Riaz Taplin",
"type": "Office",
"election_name": "oakland-2022",
"total_spending": 6300.0
},
{
"name": "John Protopappas",
"name": "Goolshan Chinoy",
"type": "Office",
"election_name": "oakland-2022",
"total_spending": 5400.0
Expand Down
Loading

0 comments on commit 8f42b3d

Please sign in to comment.