-
-
Notifications
You must be signed in to change notification settings - Fork 196
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'excel-analyzer' into develop
- Loading branch information
Showing
18 changed files
with
304 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
.rspec_status |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# frozen_string_literal: true | ||
|
||
source "https://rubygems.org" | ||
|
||
# Specify your gem's dependencies in excel_analyzer.gemspec | ||
gemspec |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
# ExcelAnalyzer | ||
|
||
This gem packages an analyzer to inspects `XLSX` files (uploaded and analyzed as | ||
`ActiveStorage::Blob` objects) for hidden data, adding the results to the blob | ||
metadata. | ||
|
||
## Development | ||
|
||
After checking out the repo, run `bin/setup` to install dependencies. Then, run | ||
`rake spec` to run the tests. You can also run `bin/console` for an interactive | ||
prompt that will allow you to experiment. | ||
|
||
## Contributing | ||
|
||
Bug reports and pull requests are welcome on GitHub at | ||
https://github.com/mysociety/alaveteli |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# frozen_string_literal: true | ||
|
||
require "bundler/gem_tasks" | ||
require "rspec/core/rake_task" | ||
|
||
RSpec::Core::RakeTask.new(:spec) | ||
|
||
task default: :spec |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
#!/usr/bin/env ruby | ||
# frozen_string_literal: true | ||
|
||
require "bundler/setup" | ||
require "excel_analyzer" | ||
|
||
# You can add fixtures and/or initialization code here to make experimenting | ||
# with your gem easier. You can also use a different console, if you like. | ||
|
||
# (If you use this, don't forget to add pry to your Gemfile!) | ||
# require "pry" | ||
# Pry.start | ||
|
||
require "irb" | ||
IRB.start(__FILE__) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
#!/usr/bin/env bash | ||
set -euo pipefail | ||
IFS=$'\n\t' | ||
set -vx | ||
|
||
bundle install | ||
|
||
# Do any other automated setup that you need to do here |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
# frozen_string_literal: true | ||
|
||
lib = File.expand_path('lib', __dir__) | ||
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) | ||
|
||
Gem::Specification.new do |spec| | ||
spec.name = "excel_analyzer" | ||
spec.version = "0.0.1" | ||
spec.authors = ["mySociety"] | ||
spec.email = ["[email protected]"] | ||
|
||
spec.summary = "File analysers for ActiveStorage" | ||
spec.description = "Extra ActiveStorage Analysers for Alaveteli" | ||
spec.homepage = "https://alaveteli.org" | ||
spec.required_ruby_version = ">= 3.0.0" | ||
|
||
spec.metadata["homepage_uri"] = spec.homepage | ||
|
||
spec.files = Dir.chdir(__dir__) do | ||
`git ls-files -z`.split("\x0").reject do |f| | ||
(File.expand_path(f) == __FILE__) || f.start_with?(*%w[bin/ spec/ .git]) | ||
end | ||
end | ||
spec.require_paths = ["lib"] | ||
|
||
spec.add_dependency "activestorage" | ||
spec.add_dependency "nokogiri" | ||
spec.add_dependency "rubyzip" | ||
|
||
spec.add_development_dependency "bundler" | ||
spec.add_development_dependency "pry" | ||
spec.add_development_dependency "rake" | ||
spec.add_development_dependency "rspec" | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
require "excel_analyzer/analyzer" | ||
require "excel_analyzer/railtie" if defined?(Rails) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
require "active_storage" | ||
require "active_storage/analyzer" | ||
require "nokogiri" | ||
require "zip" | ||
|
||
module ExcelAnalyzer | ||
## | ||
# The Analyzer class is responsible for analyzing Excel files uploaded through | ||
# Active Storage. It checks for various features within the Excel file such as | ||
# hidden rows, columns, sheets, pivot caches, and external links. | ||
# | ||
# The class uses rubyzip and Nokogiri for reading and parsing the contents of | ||
# the Excel (.xlsx) files. | ||
# | ||
class Analyzer < ActiveStorage::Analyzer | ||
XLSX_CONTENT_TYPE = | ||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" | ||
|
||
def self.accept?(blob) | ||
blob.content_type == XLSX_CONTENT_TYPE | ||
end | ||
|
||
def metadata | ||
{ excel: excel_metadata } | ||
end | ||
|
||
private | ||
|
||
def excel_metadata | ||
download_blob_to_tempfile(&method(:probe)) | ||
end | ||
|
||
def probe(tempfile) | ||
Zip::File.open(tempfile.path) do |zip_file| | ||
{ | ||
pivot_cache: zip_file.glob("xl/pivotCache/*").any?, | ||
external_links: zip_file.glob("xl/externalLinks/*").any?, | ||
hidden_rows: hidden_rows?(zip_file), | ||
hidden_columns: hidden_columns?(zip_file), | ||
hidden_sheets: hidden_sheets?(zip_file) | ||
} | ||
end | ||
|
||
rescue StandardError => ex | ||
{ error: ex.message } | ||
end | ||
|
||
def namespace | ||
{ "ns" => "http://schemas.openxmlformats.org/spreadsheetml/2006/main" } | ||
end | ||
|
||
def hidden?(object) | ||
object.attr("hidden") == "true" || | ||
object.attr("hidden") == "1" || | ||
object.attr("state") == "hidden" | ||
end | ||
|
||
def hidden_rows?(zip_file) | ||
zip_file.glob("xl/worksheets/*.xml").any? do |worksheet_file| | ||
doc = Nokogiri::XML(worksheet_file.get_input_stream.read) | ||
doc.xpath("//ns:row", namespace).any?(&method(:hidden?)) | ||
end | ||
end | ||
|
||
def hidden_columns?(zip_file) | ||
zip_file.glob("xl/worksheets/*.xml").any? do |worksheet_file| | ||
doc = Nokogiri::XML(worksheet_file.get_input_stream.read) | ||
doc.xpath("//ns:col", namespace).any?(&method(:hidden?)) | ||
end | ||
end | ||
|
||
def hidden_sheets?(zip_file) | ||
workbook_file = zip_file.glob("xl/workbook.xml").first | ||
return false unless workbook_file | ||
|
||
doc = Nokogiri::XML(workbook_file.get_input_stream.read) | ||
doc.xpath("//ns:sheet", namespace).any?(&method(:hidden?)) | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
require "rails" | ||
require "active_storage" | ||
|
||
module ExcelAnalyzer | ||
## | ||
# This Railtie integrates the gem with Rails by extending ActiveStorage's | ||
# Analyzers with the custom ExcelAnalyzer::Analyzer. | ||
# | ||
class Railtie < Rails::Railtie | ||
config.active_storage.analyzers.prepend ExcelAnalyzer::Analyzer | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
# frozen_string_literal: true | ||
|
||
require "spec_helper" | ||
|
||
RSpec.describe ExcelAnalyzer::Analyzer do | ||
describe ".accept?" do | ||
subject { ExcelAnalyzer::Analyzer.accept?(blob) } | ||
|
||
context "when the blob is an Excel file" do | ||
let(:blob) do | ||
fake_blob(content_type: ExcelAnalyzer::Analyzer::XLSX_CONTENT_TYPE) | ||
end | ||
|
||
it { is_expected.to eq true } | ||
end | ||
|
||
context "when the blob is not an Excel file" do | ||
let(:blob) { fake_blob(content_type: "text/plain") } | ||
it { is_expected.to eq false } | ||
end | ||
end | ||
|
||
describe "#metadata" do | ||
let(:metadata) { ExcelAnalyzer::Analyzer.new(blob).metadata } | ||
|
||
context "when the blob is an Excel file with hidden data" do | ||
let(:blob) do | ||
fake_blob(io: File.open(File.join(__dir__, "../fixtures/suspect.xlsx")), | ||
content_type: ExcelAnalyzer::Analyzer::XLSX_CONTENT_TYPE) | ||
end | ||
|
||
it "detects pivot cache" do | ||
expect(metadata[:excel][:pivot_cache]).to eq true | ||
end | ||
|
||
it "detects external links" do | ||
expect(metadata[:excel][:external_links]).to eq true | ||
end | ||
|
||
it "detects hidden rows" do | ||
expect(metadata[:excel][:hidden_rows]).to eq true | ||
end | ||
|
||
it "detects hidden columns" do | ||
expect(metadata[:excel][:hidden_columns]).to eq true | ||
end | ||
|
||
it "detects hidden sheets" do | ||
expect(metadata[:excel][:hidden_sheets]).to eq true | ||
end | ||
end | ||
|
||
context "when the blob is an Excel file without hidden data" do | ||
let(:blob) do | ||
fake_blob(io: File.open(File.join(__dir__, "../fixtures/data.xlsx")), | ||
content_type: ExcelAnalyzer::Analyzer::XLSX_CONTENT_TYPE) | ||
end | ||
|
||
it "does not detect hidden data" do | ||
expect(metadata[:excel]).to eq( | ||
pivot_cache: false, | ||
external_links: false, | ||
hidden_rows: false, | ||
hidden_columns: false, | ||
hidden_sheets: false | ||
) | ||
end | ||
end | ||
|
||
context "when the blob is not an Excel file" do | ||
let(:blob) do | ||
fake_blob(io: File.open(File.join(__dir__, "../fixtures/plain.txt")), | ||
content_type: "text/plain") | ||
end | ||
|
||
it "returns an error metadata" do | ||
expect(metadata[:excel]).to eq( | ||
error: "Zip end of central directory signature not found" | ||
) | ||
end | ||
end | ||
end | ||
|
||
private | ||
|
||
def fake_blob(io: nil, content_type:) | ||
dbl = double(content_type: content_type) | ||
allow(dbl).to receive(:open).and_yield(io) | ||
dbl | ||
end | ||
end |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
This is a text file |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
# frozen_string_literal: true | ||
|
||
require "bundler/setup" | ||
require "excel_analyzer/analyzer" | ||
|
||
RSpec.configure do |config| | ||
# Enable flags like --only-failures and --next-failure | ||
config.example_status_persistence_file_path = ".rspec_status" | ||
|
||
# Disable RSpec exposing methods globally on `Module` and `main` | ||
config.disable_monkey_patching! | ||
|
||
config.expect_with :rspec do |c| | ||
c.syntax = :expect | ||
end | ||
end |