Skip to content

Commit

Permalink
Added new profiling scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
alexdowad committed Sep 17, 2012
1 parent c5c1fe8 commit aac827a
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 15 deletions.
77 changes: 62 additions & 15 deletions tools/bench.rb
Original file line number Diff line number Diff line change
@@ -1,23 +1,70 @@
# coding: utf-8

# a quick script for profiling performance with perftools.
#
# USAGE
#
# ruby tools/bench.rb
# evince bench.pdf

$:.unshift "../lib"
require 'pdf-reader'
require 'perftools'

PerfTools::CpuProfiler.start("/tmp/restart_profile") do
PDF::Reader.open("restart.pdf") do |reader|
# a script for measuring text extraction performance

# TO BENCHMARK: ruby tools/bench.rb <runs>
# TO PROFILE: ruby tools/bench.rb perftools
# OR: ruby-prof tools/bench.rb <runs>
# FOR OBJECT ALLOCATION STATS: ruby tools/bench.rb memprof
# TO COUNT GC RUNS: ruby tools/bench.rb gc

$project_root = File.expand_path(File.join(File.dirname(__FILE__), ".."))
require 'rubygems' # for Ruby 1.8
$:.unshift "#{$project_root}/lib"
require 'pdf/reader'

# Extract all the text from a large PDF

def extract_text
PDF::Reader.open("#{$project_root}/spec/data/no_text_spaces.pdf") do |reader|
reader.pages.each do |page|
page.text
end
end
end

`pprof.rb --text /tmp/restart_profile > bench.txt`
`pprof.rb --pdf /tmp/restart_profile > bench.pdf`
case ARGV[0]
when "memprof"
# Measure object allocation with memprof
require 'memprof'
GC.disable
Memprof.track { extract_text }

when "perftools"
# Profile with perftools.rb
# (The best thing about perftools.rb is that it shows you time spent on
# garbage collection)
require 'perftools'
PerfTools::CpuProfiler.start("/tmp/perftools_data") do
extract_text
end
`pprof.rb --text /tmp/perftools_data > #{$project_root}/tools/profiles/perftools.txt`
`pprof.rb --pdf /tmp/perftools_data > #{$project_root}/tools/profiles/perftools.pdf`

when "gc"
before = GC.count
extract_text
puts "GC ran #{GC.count - before} times"

else
# Benchmark
# Average the results over multiple runs
# Throw out the best and worst results, and average what remains
# With 10 runs, the results seem to fluctuate by as much as 6-7%
# I'd like that to be 1-2%, but that requires a VERY high number of runs

runs = (ARGV[0] || 10).to_i
times = []

runs.times do
start = Time.new
extract_text
times << (Time.new - start)
sleep(0.1) # results seem more consistent this way
end

times.sort!
times = times.drop(runs / 5).take(runs - (runs * 2 / 3))
average = times.reduce(0,&:+).to_f / times.size
puts "#{"%0.3f" % average} seconds"
end
20 changes: 20 additions & 0 deletions tools/profile.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Driver to run a bunch of profiling scripts in parallel,
# leaving all the results in tools/profiles
# Assumes "ruby" is Ruby 1.9, and "ruby1.8" is Ruby 1.8.7
# Also assumes that all needed gems are installed
# This script itself should be run under Ruby 1.9

require 'fileutils'

project_root = File.expand_path(File.join(File.dirname(__FILE__), ".."))
dir = "#{project_root}/tools/profiles"
FileUtils.mkdir(dir) unless File.exist?(dir)

pids = []
pids << fork { `ruby-prof #{project_root}/tools/bench.rb 1 --file=#{dir}/rubyprof.txt` }
pids << fork { `ruby-prof #{project_root}/tools/bench.rb 1 --file=#{dir}/rubyprof-graph.htm --printer=graph_html` }
pids << fork { `ruby-prof #{project_root}/tools/bench.rb 1 --file=#{dir}/rubyprof-stack.htm --printer=call_stack` }
pids << fork { `ruby1.8 #{project_root}/tools/bench.rb memprof > #{dir}/memprof.txt` }
pids << fork { `ruby #{project_root}/tools/bench.rb perftools` }

pids.each { |pid| Process.wait(pid) }

0 comments on commit aac827a

Please sign in to comment.