forked from yob/pdf-reader
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
82 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,23 +1,70 @@ | ||
# coding: utf-8 | ||
|
||
# a quick script for profiling performance with perftools. | ||
# | ||
# USAGE | ||
# | ||
# ruby tools/bench.rb | ||
# evince bench.pdf | ||
|
||
$:.unshift "../lib" | ||
require 'pdf-reader' | ||
require 'perftools' | ||
|
||
PerfTools::CpuProfiler.start("/tmp/restart_profile") do | ||
PDF::Reader.open("restart.pdf") do |reader| | ||
# a script for measuring text extraction performance | ||
|
||
# TO BENCHMARK: ruby tools/bench.rb <runs> | ||
# TO PROFILE: ruby tools/bench.rb perftools | ||
# OR: ruby-prof tools/bench.rb <runs> | ||
# FOR OBJECT ALLOCATION STATS: ruby tools/bench.rb memprof | ||
# TO COUNT GC RUNS: ruby tools/bench.rb gc | ||
|
||
$project_root = File.expand_path(File.join(File.dirname(__FILE__), "..")) | ||
require 'rubygems' # for Ruby 1.8 | ||
$:.unshift "#{$project_root}/lib" | ||
require 'pdf/reader' | ||
|
||
# Extract all the text from a large PDF | ||
|
||
def extract_text | ||
PDF::Reader.open("#{$project_root}/spec/data/no_text_spaces.pdf") do |reader| | ||
reader.pages.each do |page| | ||
page.text | ||
end | ||
end | ||
end | ||
|
||
`pprof.rb --text /tmp/restart_profile > bench.txt` | ||
`pprof.rb --pdf /tmp/restart_profile > bench.pdf` | ||
case ARGV[0] | ||
when "memprof" | ||
# Measure object allocation with memprof | ||
require 'memprof' | ||
GC.disable | ||
Memprof.track { extract_text } | ||
|
||
when "perftools" | ||
# Profile with perftools.rb | ||
# (The best thing about perftools.rb is that it shows you time spent on | ||
# garbage collection) | ||
require 'perftools' | ||
PerfTools::CpuProfiler.start("/tmp/perftools_data") do | ||
extract_text | ||
end | ||
`pprof.rb --text /tmp/perftools_data > #{$project_root}/tools/profiles/perftools.txt` | ||
`pprof.rb --pdf /tmp/perftools_data > #{$project_root}/tools/profiles/perftools.pdf` | ||
|
||
when "gc" | ||
before = GC.count | ||
extract_text | ||
puts "GC ran #{GC.count - before} times" | ||
|
||
else | ||
# Benchmark | ||
# Average the results over multiple runs | ||
# Throw out the best and worst results, and average what remains | ||
# With 10 runs, the results seem to fluctuate by as much as 6-7% | ||
# I'd like that to be 1-2%, but that requires a VERY high number of runs | ||
|
||
runs = (ARGV[0] || 10).to_i | ||
times = [] | ||
|
||
runs.times do | ||
start = Time.new | ||
extract_text | ||
times << (Time.new - start) | ||
sleep(0.1) # results seem more consistent this way | ||
end | ||
|
||
times.sort! | ||
times = times.drop(runs / 5).take(runs - (runs * 2 / 3)) | ||
average = times.reduce(0,&:+).to_f / times.size | ||
puts "#{"%0.3f" % average} seconds" | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# Driver to run a bunch of profiling scripts in parallel, | ||
# leaving all the results in tools/profiles | ||
# Assumes "ruby" is Ruby 1.9, and "ruby1.8" is Ruby 1.8.7 | ||
# Also assumes that all needed gems are installed | ||
# This script itself should be run under Ruby 1.9 | ||
|
||
require 'fileutils' | ||
|
||
project_root = File.expand_path(File.join(File.dirname(__FILE__), "..")) | ||
dir = "#{project_root}/tools/profiles" | ||
FileUtils.mkdir(dir) unless File.exist?(dir) | ||
|
||
pids = [] | ||
pids << fork { `ruby-prof #{project_root}/tools/bench.rb 1 --file=#{dir}/rubyprof.txt` } | ||
pids << fork { `ruby-prof #{project_root}/tools/bench.rb 1 --file=#{dir}/rubyprof-graph.htm --printer=graph_html` } | ||
pids << fork { `ruby-prof #{project_root}/tools/bench.rb 1 --file=#{dir}/rubyprof-stack.htm --printer=call_stack` } | ||
pids << fork { `ruby1.8 #{project_root}/tools/bench.rb memprof > #{dir}/memprof.txt` } | ||
pids << fork { `ruby #{project_root}/tools/bench.rb perftools` } | ||
|
||
pids.each { |pid| Process.wait(pid) } |