add a handy tool for benchmarking the parsing of individual pages

babymastodon · Aug 5, 2012 · 8468d71 · 8468d71
1 parent 41f52ec
commit 8468d71
Showing 1 changed file with 21 additions and 0 deletions.
diff --git a/tools/page_bench b/tools/page_bench
@@ -0,0 +1,21 @@
+#!/usr/bin/env ruby
+
+# text extraction is a handy benchmark of parsing/lexing performance, as 
+# the full content stream of each page is processed.
+#
+# run like so:
+#
+#     ruby -Ilib tools/page_bench foo.pdf
+
+require 'rubygems'
+require 'pdf/reader'
+
+reader = PDF::Reader.new(ARGV[0])
+
+require "benchmark"
+
+Benchmark.bm(1) do |x|
+  reader.pages.each do |page|
+    x.report(page.number) { page.text }
+  end
+end