Skip to content

Commit

Permalink
Refactor reports and add partial _data/all* reports
Browse files Browse the repository at this point in the history
  • Loading branch information
ShaneCurcuru committed Feb 9, 2024
1 parent 364276b commit 8751596
Show file tree
Hide file tree
Showing 13 changed files with 9,613 additions and 109 deletions.
2,351 changes: 2,351 additions & 0 deletions _data/allsponsorfunds.json

Large diffs are not rendered by default.

4,669 changes: 4,669 additions & 0 deletions _data/allsponsorreports.json

Large diffs are not rendered by default.

2,511 changes: 2,511 additions & 0 deletions _data/allsponsorships.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion _foundations/asf.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ budgetYear:
budgeturl: https://whimsy.apache.org/board/minutes/Budget.html
sponsorurl: https://www.apache.org/foundation/thanks.html
sponsorList: Cloudera, Comcast, Facebook, Google, Leaseweb, Microsoft, Pivotal, Yahoo!
sponsorship: asf.json
sponsorship: asf
licenses: Apache-2.0
claPolicy: CLA
ethicsPolicy:
Expand Down
2 changes: 1 addition & 1 deletion _foundations/drupal.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ budgetYear:
budgeturl:
sponsorurl: https://www.drupal.org/association/supporters/partners
sponsorList:
sponsorship: drupal.json
sponsorship: drupal
licenses: GPL-2.0-or-later
claPolicy:
ethicsPolicy:
Expand Down
2 changes: 1 addition & 1 deletion _foundations/freebsd.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ budgetYear:
budgeturl:
sponsorurl: https://www.freebsdfoundation.org/donors/
sponsorList:
sponsorship: freebsd.json
sponsorship: freebsd
licenses: BSD-2-Clause
claPolicy:
ethicsPolicy:
Expand Down
2 changes: 1 addition & 1 deletion _foundations/lf.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ budgetYear: 2019
budgeturl: https://projects.propublica.org/nonprofits/organizations/460503801
sponsorurl: https://www.linuxfoundation.org/members
sponsorList:
sponsorship: lf.json
sponsorship: lf
licenses: various
claPolicy:
ethicsPolicy:
Expand Down
2 changes: 1 addition & 1 deletion _foundations/numfocus.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ budgetYear:
budgeturl: https://numfocus.org/legal
sponsorurl:
sponsorList: https://numfocus.org/sponsors
sponsorship: numfocus.json
sponsorship: numfocus
licenses: various
claPolicy:
ethicsPolicy:
Expand Down
2 changes: 1 addition & 1 deletion _foundations/osgeo.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ budgetYear:
budgeturl:
sponsorurl:
sponsorList:
sponsorship: osgeo.json
sponsorship: osgeo
licenses:
claPolicy:
ethicsPolicy:
Expand Down
2 changes: 1 addition & 1 deletion _foundations/owasp.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ budgeturl: https://www.owasp.org/index.php/About_OWASP/Financial_Transparency
sponsorurl: https://www.owasp.org/index.php/Acknowledgements
sponsorList: Adobe, CME Group, Fortify, Qualys, Salesforce, Security Innovation, Signal
Sciences, waratek
sponsorship: owasp.json
sponsorship: owasp
licenses: various
claPolicy: CLA
ethicsPolicy:
Expand Down
2 changes: 1 addition & 1 deletion _foundations/python.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ budgetYear:
budgeturl:
sponsorurl:
sponsorList:
sponsorship: python.json
sponsorship: python
licenses:
claPolicy:
ethicsPolicy:
Expand Down
81 changes: 61 additions & 20 deletions assets/ruby/sponsor_reports.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,38 +8,45 @@ module SponsorReports
require 'csv'
require 'yaml'
require 'json'
require_relative 'sponsor_utils'

INKIND_DISCOUNT = 0.5 # Discount value from sponsor of in-kind levels

# Report total (approx) cash outlay by sponsors accross all orgs
# in-kind donations are counted at INKIND_DISCOUNT of value (arbitrary estimate)
# @param orglist output of sponsor_utils listing org sponsors scraped
# @param levels definition of sponsorship levels by org
def sponsor_totals(orglist, levels)
# @return hash of estimated funding levels by org or sponsor
def report_funding(allsponsors)
report = {}
report['orgtotal'] = {}
report['sponsortotal'] = Hash.new(0)
orglist.each do | org, sponsors |
allsponsors.each do | org, sponsors |
orgtotal = 0
report['orgtotal'][org] = {}
orglevels = SponsorUtils.get_current_sponsorship(SponsorUtils.get_sponsorship_file(org))
orglevels = orglevels['levels']
sponsors.each do | lvl, ary |
lvlamt = levels[org][0]['levels'][lvl][1].to_i
next unless ary.is_a?(Array)
lvlamt = orglevels[lvl].fetch('amount', 0).to_i
numlvl = ary.size
amtlvl = lvlamt * numlvl
# For the organization's report, count full value for all
report['orgtotal'][org][lvl] = amtlvl
orgtotal += amtlvl
# For the sponsor's report, discount inkind levels
ary.each do | sponsorurl |
# TODO map any non-hostnames intelligently
# TODO use /inkind/ INKIND_DISCOUNT
# report['sponsortotal'][sponsorurl] += lvlamt # HACK this line randomly throws: undefined method `+' for nil:NilClass
# HACK sum up values the hard way
val = report['sponsortotal'].fetch(sponsorurl, nil)
if val
report['sponsortotal'][sponsorurl] += lvlamt
else
report['sponsortotal'][sponsorurl] = lvlamt
if ary.is_a?(Array) # Ignore dates or errors
ary.each do | sponsorurl |
# TODO somehow mark amountvaries levels
# TODO map any non-hostnames intelligently
lvlamt = (lvlamt * INKIND_DISCOUNT).round(0) if /inkind/.match(lvl)
# report['sponsortotal'][sponsorurl] += lvlamt # HACK1 this line randomly throws: undefined method `+' for nil:NilClass
# HACK1 sum up values the hard way
val = report['sponsortotal'].fetch(sponsorurl, nil)
if val
report['sponsortotal'][sponsorurl] += lvlamt
else
report['sponsortotal'][sponsorurl] = lvlamt
end
end
end
end
Expand All @@ -48,16 +55,50 @@ def sponsor_totals(orglist, levels)
return report
end

# Rough count of number of times different urls appear at levels
# @param sponsors hash returned from scrape_bycss or parse_landscape
# @return hash of counts of how often domain names appear
def report_counts(sponsors)
counts = {}
counts['orgs'] = []
counts['all'] = Hash.new(0)
SponsorUtils::SPONSOR_METALEVELS.each do | lvl |
counts[lvl] = Hash.new(0)
end
sponsors.each do | org, sponsorhash |
counts['orgs'] << org
if sponsorhash.is_a?(Hash) # Ignore dates or possible error entries
sponsorhash.each do | level, ary |
if ary.is_a?(Array) # Ignore dates or possible error entries
ary.each do | url |
counts['all'][url] += 1
counts[level][url] += 1
end
end
end
end
end
counts['all'] = Hash[counts['all'].sort_by { |k, v| -v }]
SponsorUtils::SPONSOR_METALEVELS.each do | lvl |
counts[lvl] = Hash[counts[lvl].sort_by { |k, v| -v }]
end
return counts
end

# ### #### ##### ######
# Main method for command line use
if __FILE__ == $PROGRAM_NAME
# TODO: default dir? Command line params? Load each sponsor level by org?
levelfile = 'sponsor_levels.json'
orgfile = 'sponsor_utils.json'
levels = JSON.parse(File.read(levelfile))
orglist = JSON.parse(File.read(orgfile))
report = sponsor_totals(orglist, levels)
File.open('sponsor_report.json', "w") do |f|
infile = '_data/allsponsorships.json'
levelfile = '_data/allsponsorreports.json'
fundfile = '_data/allsponsorfunds.json'
sponsors = JSON.parse(File.read(infile))
report = report_counts(sponsors)
File.open(levelfile, "w") do |f|
f.write(JSON.pretty_generate(report))
end
report = report_funding(sponsors)
File.open(fundfile, "w") do |f|
f.write(JSON.pretty_generate(report))
end
end
Expand Down
94 changes: 13 additions & 81 deletions assets/ruby/sponsor_utils.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,12 @@ module SponsorUtils

# Map all sponsorships to common-ish levels
# - Ordinals are cash sponsorships in order
# - inkind is services donations (primarily services, not cash)
# - 'inkind'* is services donations (primarily services, not cash)
# - community is widely used as a separate level
# - grants covers any sort of government/institution grants
SPONSOR_METALEVELS = %w[ first second third fourth fifth sixth seventh eighth community firstinkind secondinkind thirdinkind fourthinkind startuppartners grants ]
# TODO: Define a more rigorous and smaller set of categories,
# to map some unusual ones (cncf:enduser, etc.) to simpler ones
SPONSOR_METALEVELS = %w[ first second third fourth fifth sixth seventh eighth community firstinkind secondinkind thirdinkind fourthinkind startuppartners academic enduser grants ]
CURRENT_SPONSORSHIP = '20240101' # HACK: select current one TODO allow different dates/versions

# Return a normalized domain name for mapping to a single sponsor org
Expand Down Expand Up @@ -138,35 +140,13 @@ def cleanup_drupal(links)
sponsors[level] << itm
end
rescue StandardError => e
puts "ERROR: cleanup_drupal(...#{itm}): #{e.message}\n\n#{e.backtrace.join("\n\t")}"
sponsors[level] << itm # HACK: leave as-is, will be obvious to reader
sponsors[level] << "ERROR: cleanup_drupal(...#{itm}): #{e.message}\n\n#{e.backtrace.join("\n\t")}"
end
end
end
return sponsors
end

# Rough count of number of times different urls appear at levels
# @param sponsors hash returned from scrape_bycss or cleanup
# @return hash of counts of how often domain names appear
def report_counts(sponsors)
counts = {}
SPONSOR_METALEVELS.each do | lvl |
counts[lvl] = Hash.new(0)
end
counts['all'] = Hash.new(0)
sponsors.each do | org, sponsorhash |
sponsorhash.each do | level, ary |
ary.each do | url |
counts['all'][url] += 1
counts[level][url] += 1
end
end
end
counts['all'] = Hash[counts['all'].sort_by { |k, v| -v }]
return counts
end

# Future use: allow parsing historical sponsorships
def get_current_sponsorship(sponsorship)
return sponsorship[CURRENT_SPONSORSHIP]
Expand Down Expand Up @@ -226,74 +206,26 @@ def parse_all_sponsorships()
foundations.each do | org, foundation |
sponsorship = foundation.fetch('sponsorship', nil)
if sponsorship
sponsorships[org] = JSON.parse(File.read("_sponsorships/#{sponsorship}"))
sponsorships[org] = get_sponsorship_file(sponsorship)
end
end
sponsorships = sponsorships.select{ | k, v | ('asf'.eql?(k) || 'cncf'.eql?(k))} # HACK
sponsorships['cncf'] = get_sponsorship_file('cncf') # HACK: Add in cncf as a test subject, since it's not a separate org
all_sponsors = parse_sponsorships(sponsorships)
return all_sponsors
end

# Convenience method to get sponsorship file
def get_sponsorship_file(org)
return JSON.parse(File.read("_sponsorships/#{org}.json"))
end

# ### #### ##### ######
# Main method for command line use
if __FILE__ == $PROGRAM_NAME
# TODO: default dir? Command line params? Load each sponsor level by org?
sponsorship = JSON.parse(File.read('_sponsorships/cncf.json'))
sponsorships = { 'cncf' => sponsorship }
sponsors = parse_sponsorships(sponsorships)
File.open('parsecncf.json', "w") do |f|
f.write(JSON.pretty_generate(sponsors))
end
puts "DEBUG - done testing parse just cncf list"
exit 1

# TODO: default dir? Command line params?
alldata = parse_all_sponsorships()
File.open('parseall.json', "w") do |f|
File.open('_data/allsponsorships.json', "w") do |f|
f.write(JSON.pretty_generate(alldata))
end
puts "DEBUG - done testing parse_all_sponsorships"
exit 1


infile = 'sponsor_levels.json'
outfile = 'sponsor_utils.json'
io = nil
sponsors = {}
maps = JSON.parse(File.read(infile))
maps.each do | org, map |
map = map[0] # HACK: just use first map on list; by date for future use historical scans
if true
filename = "../../../sponsors-#{org}.html"
baseurl = ''
io = File.open(filename)
else
sponsorurl = map['sponsorurl']
begin
io = URI.open(sponsorurl).read
rescue StandardError => e
puts "ERROR: #{sponsorurl}: #{e.message}\n\n#{e.backtrace.join("\n\t")}"
next
end
end
sponsors[org] = SponsorUtils.scrape_bycss(io, map)
case org
when 'python'
sponsors[org] = cleanup_with_map(sponsors[org], 'python_map.json')
when 'drupal'
sponsors[org] = cleanup_drupal(sponsors[org])
when 'lf'
sponsors[org] = cleanup_with_map(sponsors[org], 'lf_map.json')
else
# No-op
end
end
File.open(outfile, "w") do |f|
f.write(JSON.pretty_generate(sponsors))
end
counts = report_counts(sponsors)
File.open('sponsor_metacount.json', "w") do |f|
f.write(JSON.pretty_generate(counts))
end
end
end

0 comments on commit 8751596

Please sign in to comment.