-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmakestat.rb
69 lines (58 loc) · 2.38 KB
/
makestat.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
###############################################################################
## ZipStats ##
## Reads in Sales info and outputs stats ##
## Using Ruby 1.8.7 ##
## ##
## Output Format: ##
## zipcode,average,median,totalsold,month-value ##
###############################################################################
require 'rubygems'
require 'FasterCSV'
raise "Please specify both the input and output files." unless ARGV.length == 2
raise "Please specify a legit input file" unless File.exists?(ARGV[0])
inPath = ARGV[0]
outPath = ARGV[1]
#Create Data Hashes
priceData = {}
monthData = {}
#Read file and populate values for zip codes
FasterCSV.foreach(inPath, :headers => true) do |row|
currentZip = row[4][0,5]
unless currentZip.size == 4
currentMonth = row[1].split("-")[1]
currentPrice = row[3]
priceData[currentZip] ||= []
priceData[currentZip] << currentPrice.to_i
monthData[currentZip] ||= []
monthData[currentZip] << currentMonth.to_i
end
end
#Start Building Output String with statistical data
output_file = {}
priceData.each{ |zipc, row|
rowSize = row.size
mPoint = rowSize / 2
average = ((row.inject(&:+)) / (rowSize.to_f))
median = (rowSize % 2 != 0 ? row.sort[mPoint] : ((row.sort[mPoint] + row.sort[mPoint-1]) / 2.0))
output_file[zipc] ||= []
output_file[zipc] << "#{zipc},#{(average * 100).round / 100.0},#{(median * 100).round / 100.0},#{row.size},"
}
#Calculate month frequncies & append to output string
monthFreq = {}
monthData.each{ |zipc, month|
monthFreq[zipc] ||= []
monthFreq[zipc] = month.each.inject(Hash.new(0)) do |saleMonth, count|
saleMonth[count] +=1
saleMonth
end
monthFreq[zipc] = monthFreq[zipc].sort
output_file[zipc] = "#{output_file[zipc]}#{(monthFreq[zipc].inspect).gsub!(/\s*\[+\s*(\d+), (\d+)\]+\s*/, '\1-\2')}"
}
#Puts header and then string to output file
File.open(outPath, "w") do |file|
file.puts('Zipcode,Average,Median,Qty,Month-Value')
output_file.each{ |zipc, outString|
file.puts(output_file[zipc])
}
end
puts "Done! Please check out " + ARGV[1] + " for your statistics"