Skip to content

Commit

Permalink
Merge pull request #113 from samvera-labs/filename_sanitization
Browse files Browse the repository at this point in the history
Add filename sanitizing to adapters
  • Loading branch information
cjcolvar authored Nov 9, 2022
2 parents d4474f4 + cefb01d commit 34c0828
Show file tree
Hide file tree
Showing 16 changed files with 336 additions and 21 deletions.
5 changes: 5 additions & 0 deletions lib/active_encode.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,8 @@
require 'active_encode/version'
require 'active_encode/base'
require 'active_encode/engine'
require 'active_encode/filename_sanitizer'

module ActiveEncode
extend ActiveEncode::FilenameSanitizer
end
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,10 @@ def check_s3_bucket(input_url, source_bucket)
# logger.info("Already in bucket `#{source_bucket}'")
s3_object.key
else
s3_key = File.join(SecureRandom.uuid, s3_object.key)
cleaned_url = ActiveEncode.sanitize_filename input_url
s3_key = File.join(SecureRandom.uuid, File.basename(cleaned_url))
# logger.info("Copying to `#{source_bucket}/#{input_url}'")
target = Aws::S3::Object.new(bucket_name: source_bucket, key: input_url)
target = Aws::S3::Object.new(bucket_name: source_bucket, key: s3_key)
target.copy_from(s3_object, multipart_copy: s3_object.size > 15_728_640) # 15.megabytes
s3_key
end
Expand All @@ -152,7 +153,8 @@ def upload_to_s3(input_url, source_bucket)
# original_input = input_url
bucket = Aws::S3::Resource.new(client: s3client).bucket(source_bucket)
filename = FileLocator.new(input_url).location
s3_key = File.join(SecureRandom.uuid, File.basename(filename))
cleaned_url = ActiveEncode.sanitize_filename input_url
s3_key = File.join(SecureRandom.uuid, File.basename(cleaned_url))
# logger.info("Copying `#{original_input}' to `#{source_bucket}/#{input_url}'")
obj = bucket.object(s3_key)
obj.upload_file filename
Expand Down
13 changes: 3 additions & 10 deletions lib/active_encode/engine_adapters/ffmpeg_adapter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def create(input_url, options = {})
case input_url
when /^file\:\/\/\//
input_url = Addressable::URI.unencode(input_url)
input_url = ActiveEncode.sanitize_input(input_url)
when /^s3\:\/\//
require 'file_locator'

Expand Down Expand Up @@ -200,7 +201,7 @@ def build_outputs(encode)
Dir["#{File.absolute_path(working_path('outputs', id))}/*"].each do |file_path|
output = ActiveEncode::Output.new
output.url = "file://#{file_path}"
sanitized_filename = sanitize_base encode.input.url
sanitized_filename = ActiveEncode.sanitize_base encode.input.url
output.label = file_path[/#{Regexp.quote(sanitized_filename)}\-(.*?)#{Regexp.quote(File.extname(file_path))}$/, 1]
output.id = "#{encode.input.id}-#{output.label}"
output.created_at = encode.created_at
Expand All @@ -219,7 +220,7 @@ def build_outputs(encode)

def ffmpeg_command(input_url, id, opts)
output_opt = opts[:outputs].collect do |output|
sanitized_filename = sanitize_base input_url
sanitized_filename = ActiveEncode.sanitize_base input_url
file_name = "outputs/#{sanitized_filename}-#{output[:label]}.#{output[:extension]}"
" #{output[:ffmpeg_opt]} #{working_path(file_name, id)}"
end.join(" ")
Expand All @@ -230,14 +231,6 @@ def ffmpeg_command(input_url, id, opts)
"#{FFMPEG_PATH} #{header_opt} -y -loglevel level+fatal -progress #{working_path('progress', id)} -i \"#{input_url}\" #{output_opt}"
end

def sanitize_base(input_url)
if input_url.is_a? URI::HTTP
File.basename(input_url.path, File.extname(input_url.path))
else
File.basename(input_url, File.extname(input_url)).gsub(/[^0-9A-Za-z.\-]/, '_')
end
end

def get_pid(id)
File.read(working_path("pid", id)).remove("\n") if File.file? working_path("pid", id)
end
Expand Down
12 changes: 9 additions & 3 deletions lib/active_encode/engine_adapters/media_convert_adapter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,10 @@ def mediaconvert
end
end

def s3client
Aws::S3::Client.new
end

def s3_uri(url, options = {})
bucket = options[:masterfile_bucket]

Expand All @@ -503,9 +507,10 @@ def check_s3_bucket(input_url, source_bucket)
# logger.info("Already in bucket `#{source_bucket}'")
s3_object.key
else
s3_key = File.join(SecureRandom.uuid, s3_object.key)
cleaned_url = ActiveEncode.sanitize_filename input_url
s3_key = File.join(SecureRandom.uuid, File.basename(cleaned_url))
# logger.info("Copying to `#{source_bucket}/#{input_url}'")
target = Aws::S3::Object.new(bucket_name: source_bucket, key: input_url)
target = Aws::S3::Object.new(bucket_name: source_bucket, key: s3_key)
target.copy_from(s3_object, multipart_copy: s3_object.size > 15_728_640) # 15.megabytes
s3_key
end
Expand All @@ -515,7 +520,8 @@ def upload_to_s3(input_url, source_bucket)
# original_input = input_url
bucket = Aws::S3::Resource.new(client: s3client).bucket(source_bucket)
filename = FileLocator.new(input_url).location
s3_key = File.join(SecureRandom.uuid, File.basename(filename))
cleaned_url = ActiveEncode.sanitize_filename input_url
s3_key = File.join(SecureRandom.uuid, File.basename(cleaned_url))
# logger.info("Copying `#{original_input}' to `#{source_bucket}/#{input_url}'")
obj = bucket.object(s3_key)
obj.upload_file filename
Expand Down
6 changes: 1 addition & 5 deletions lib/active_encode/engine_adapters/pass_through_adapter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def create(input_url, options = {})
# Copy derivatives to work directory
options[:outputs].each do |opt|
url = opt[:url]
output_path = working_path("outputs/#{sanitize_base opt[:url]}#{File.extname opt[:url]}", new_encode.id)
output_path = working_path("outputs/#{ActiveEncode.sanitize_base opt[:url]}#{File.extname opt[:url]}", new_encode.id)
FileUtils.cp FileLocator.new(url).location, output_path
filename_label_hash[output_path] = opt[:label]
end
Expand Down Expand Up @@ -206,10 +206,6 @@ def build_outputs(encode)
outputs
end

def sanitize_base(input_url)
File.basename(input_url, File.extname(input_url)).gsub(/[^0-9A-Za-z.\-]/, '_')
end

def working_path(path, id)
File.join(WORK_DIR, id, path)
end
Expand Down
22 changes: 22 additions & 0 deletions lib/active_encode/filename_sanitizer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# frozen_string_literal: true
module ActiveEncode
module FilenameSanitizer
# ffmpeg has trouble with double quotes in file names. Escape them to get ffmpeg to find the file.
def sanitize_input(input_url)
input_url.gsub(/["]/, '\\\\\0')
end

def sanitize_base(input_url)
filepath = input_url.is_a?(URI::HTTP) ? input_url.path : input_url
# Replace special characters with underscores and remove excess periods.
# This removes the extension before processing so it is safe to delete all detected periods.
File.basename(filepath, File.extname(filepath)).gsub(/[^0-9A-Za-z.\-\/]/, '_').delete('.')
end

def sanitize_filename(input_url)
filepath = input_url.is_a?(URI::HTTP) ? input_url.path : input_url
# Replace special characters with underscores and remove excess periods.
File.basename(filepath).gsub(/[^0-9A-Za-z.\-\/]/, '_').gsub(/\.(?=.*\.)/, '')
end
end
end
Binary file added spec/fixtures/"file_with_double_quote".low.mp4
Binary file not shown.
Binary file added spec/fixtures/"file_with_double_quote".mp4
Binary file not shown.
Binary file added spec/fixtures/'file_with_single_quote'.low.mp4
Binary file not shown.
Binary file added spec/fixtures/'file_with_single_quote'.mp4
Binary file not shown.
Binary file added spec/fixtures/file.with :=+%sp3c!l-ch4cts().mp4
Binary file not shown.
Binary file added spec/fixtures/file.with...periods.mp4
Binary file not shown.
48 changes: 48 additions & 0 deletions spec/integration/elastic_transcoder_adapter_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,54 @@
end
end

describe "#copy_to_input_bucket" do
context "when filename has no special characters" do
context "non-s3 file" do
let(:input_url) { "spec/fixtures/fireworks.mp4" }
let(:source_bucket) { "bucket1" }

it "calls the #upload_to_s3 method" do
allow(SecureRandom).to receive(:uuid).and_return("randomstring")
expect(described_class.new.send(:copy_to_input_bucket, input_url, source_bucket)).to eq "randomstring/fireworks.mp4"
end
end
context "s3 file" do
let(:input_url) { "s3://bucket1/file.mp4" }
let(:source_bucket) { "bucket1" }

it "calls the #check_s3_bucket method" do
expect(described_class.new.send(:copy_to_input_bucket, input_url, source_bucket)).to eq "file.mp4"
end
end
end
context "when filename has special characters" do
context "non-s3 file" do
let(:input) { ["'file_with_single_quote'.mp4", '"file_with_double_quote".mp4', "file with space.mp4", "file.with...periods.mp4", "file.with :=+%sp3c!l-ch4cts().mp4"] }
let(:clean) { ["_file_with_single_quote_.mp4", "_file_with_double_quote_.mp4", "file_with_space.mp4", "filewithperiods.mp4", "filewith_____sp3c_l-ch4cts__.mp4"] }
let(:source_bucket) { "bucket1" }

it "calls the #upload_to_s3 method" do
allow(SecureRandom).to receive(:uuid).and_return("randomstring")
input.each_with_index do |url, index|
expect(described_class.new.send(:copy_to_input_bucket, "spec/fixtures/#{url}", source_bucket)).to eq "randomstring/#{clean[index]}"
end
end
end
context "s3 file" do
let(:input_urls) { ["s3://bucket1/'file_with_single_quote'.mp4", 's3://bucket1/"file_with_double_quote".mp4', "s3://bucket1/file with space.mp4", "s3://bucket1/file.with...periods.mp4", "s3://bucket1/file.with :=+%sp3c!l-ch4cts().mp4"] }
let(:clean) { ["_file_with_single_quote_.mp4", "_file_with_double_quote_.mp4", "file_with_space.mp4", "filewithperiods.mp4", "filewith_____sp3c_l-ch4cts__.mp4"] }
let(:source_bucket) { "bucket2" }

it "calls the #check_s3_bucket method" do
allow(SecureRandom).to receive(:uuid).and_return("randomstring")
input_urls.each_with_index do |url, index|
expect(described_class.new.send(:copy_to_input_bucket, url, source_bucket)).to eq "randomstring/#{clean[index]}"
end
end
end
end
end

describe "#check_s3_bucket" do
context "when file exists in masterfile_bucket" do
let(:input_url) { "s3://bucket1/file.mp4" }
Expand Down
108 changes: 108 additions & 0 deletions spec/integration/ffmpeg_adapter_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,114 @@ def find_encode(id)
end
end

context "input filename with single quotes" do
let(:file_with_single_quote) { "file://" + Rails.root.join('..', 'spec', 'fixtures', "'file_with_single_quote'.mp4").to_s }
let!(:create_single_quote_job) { ActiveEncode::Base.create(file_with_single_quote, outputs: [{ label: "low", ffmpeg_opt: "-s 640x480", extension: 'mp4' }]) }
let(:find_single_quote_job) { ActiveEncode::Base.find create_single_quote_job.id }

it "does not have errors" do
sleep 2
expect(find_single_quote_job.errors).to be_empty
end

it "has the input technical metadata in a file" do
expect(File.read("#{work_dir}/#{create_single_quote_job.id}/input_metadata")).not_to be_empty
end

it "has the pid in a file" do
expect(File.read("#{work_dir}/#{create_single_quote_job.id}/pid")).not_to be_empty
end

context 'when uri encoded' do
let(:file_with_single_quote) { Addressable::URI.encode("file://" + Rails.root.join('..', 'spec', 'fixtures', "'file_with_single_quote'.mp4").to_s) }

it "does not have errors" do
sleep 2
expect(find_single_quote_job.errors).to be_empty
end

it "has the input technical metadata in a file" do
expect(File.read("#{work_dir}/#{create_single_quote_job.id}/input_metadata")).not_to be_empty
end

it "has the pid in a file" do
expect(File.read("#{work_dir}/#{create_single_quote_job.id}/pid")).not_to be_empty
end
end
end

context "input filename with double quotes" do
let(:file_with_double_quote) { "file://" + Rails.root.join('..', 'spec', 'fixtures', '"file_with_double_quote".mp4').to_s }
let!(:create_double_quote_job) { ActiveEncode::Base.create(file_with_double_quote, outputs: [{ label: "low", ffmpeg_opt: "-s 640x480", extension: 'mp4' }]) }
let(:find_double_quote_job) { ActiveEncode::Base.find create_double_quote_job.id }

it "does not have errors" do
sleep 2
expect(find_double_quote_job.errors).to be_empty
end

it "has the input technical metadata in a file" do
expect(File.read("#{work_dir}/#{create_double_quote_job.id}/input_metadata")).not_to be_empty
end

it "has the pid in a file" do
expect(File.read("#{work_dir}/#{create_double_quote_job.id}/pid")).not_to be_empty
end

context 'when uri encoded' do
let(:file_with_double_quote) { Addressable::URI.encode("file://" + Rails.root.join('..', 'spec', 'fixtures', '"file_with_double_quote".mp4').to_s) }

it "does not have errors" do
sleep 2
expect(find_double_quote_job.errors).to be_empty
end

it "has the input technical metadata in a file" do
expect(File.read("#{work_dir}/#{create_double_quote_job.id}/input_metadata")).not_to be_empty
end

it "has the pid in a file" do
expect(File.read("#{work_dir}/#{create_double_quote_job.id}/pid")).not_to be_empty
end
end
end

context "input filename with other special characters" do
let(:file_with_special_characters) { "file://" + Rails.root.join('..', 'spec', 'fixtures', 'file.with :=+%sp3c!l-ch4cts().mp4').to_s }
let!(:create_special_characters_job) { ActiveEncode::Base.create(file_with_special_characters, outputs: [{ label: "low", ffmpeg_opt: "-s 640x480", extension: 'mp4' }]) }
let(:find_special_characters_job) { ActiveEncode::Base.find create_special_characters_job.id }

it "does not have errors" do
sleep 2
expect(find_special_characters_job.errors).to be_empty
end

it "has the input technical metadata in a file" do
expect(File.read("#{work_dir}/#{create_special_characters_job.id}/input_metadata")).not_to be_empty
end

it "has the pid in a file" do
expect(File.read("#{work_dir}/#{create_special_characters_job.id}/pid")).not_to be_empty
end

context 'when uri encoded' do
let(:file_with_special_characters) { Addressable::URI.encode("file://" + Rails.root.join('..', 'spec', 'fixtures', 'file.with :=+%sp3c!l-ch4cts().mp4').to_s) }

it "does not have errors" do
sleep 2
expect(find_special_characters_job.errors).to be_empty
end

it "has the input technical metadata in a file" do
expect(File.read("#{work_dir}/#{create_special_characters_job.id}/input_metadata")).not_to be_empty
end

it "has the pid in a file" do
expect(File.read("#{work_dir}/#{create_special_characters_job.id}/pid")).not_to be_empty
end
end
end

context 'when failed' do
subject { created_job }

Expand Down
51 changes: 51 additions & 0 deletions spec/integration/media_convert_adapter_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,15 @@ def reconstitute_response(fixture_path)
let(:cloudwatch_events) { Aws::CloudWatchEvents::Client.new(stub_responses: true) }
let(:cloudwatch_logs) { Aws::CloudWatchLogs::Client.new(stub_responses: true) }

let(:s3client) { Aws::S3::Client.new(stub_responses: true) }

before do
mediaconvert.stub_responses(:describe_endpoints, reconstitute_response("media_convert/endpoints.json"))

allow(Aws::MediaConvert::Client).to receive(:new).and_return(mediaconvert)
allow(Aws::CloudWatchEvents::Client).to receive(:new).and_return(cloudwatch_events)
allow(Aws::CloudWatchLogs::Client).to receive(:new).and_return(cloudwatch_logs)
allow(Aws::S3::Client).to receive(:new).and_return(s3client)
end

let(:created_job) do
Expand Down Expand Up @@ -267,4 +270,52 @@ def reconstitute_response(fixture_path)
completed_job
end
end

describe "#s3_uri" do
context "when filename has no special characters" do
context "non-s3 file" do
let(:input_url) { "spec/fixtures/fireworks.mp4" }
let(:source_bucket) { "bucket1" }

it "calls the #upload_to_s3 method" do
allow(SecureRandom).to receive(:uuid).and_return("randomstring")
expect(described_class.new.send(:s3_uri, input_url, { masterfile_bucket: source_bucket })).to eq "randomstring/fireworks.mp4"
end
end
context "s3 file" do
let(:input_url) { "s3://bucket1/file.mp4" }
let(:source_bucket) { "bucket1" }

it "calls the #check_s3_bucket method" do
expect(described_class.new.send(:s3_uri, input_url, { masterfile_bucket: source_bucket })).to eq "file.mp4"
end
end
end
context "when filename has special characters" do
context "non-s3 file" do
let(:input) { ["'file_with_single_quote'.mp4", '"file_with_double_quote".mp4', "file with space.mp4", "file.with...periods.mp4", "file.with :=+%sp3c!l-ch4cts().mp4"] }
let(:clean) { ["_file_with_single_quote_.mp4", "_file_with_double_quote_.mp4", "file_with_space.mp4", "filewithperiods.mp4", "filewith_____sp3c_l-ch4cts__.mp4"] }
let(:source_bucket) { "bucket1" }

it "calls the #upload_to_s3 method" do
allow(SecureRandom).to receive(:uuid).and_return("randomstring")
input.each_with_index do |url, index|
expect(described_class.new.send(:s3_uri, "spec/fixtures/#{url}", { masterfile_bucket: source_bucket })).to eq "randomstring/#{clean[index]}"
end
end
end
context "s3 file" do
let(:input_urls) { ["s3://bucket1/'file_with_single_quote'.mp4", 's3://bucket1/"file_with_double_quote".mp4', "s3://bucket1/file with space.mp4", "s3://bucket1/file.with...periods.mp4", "s3://bucket1/file.with :=+%sp3c!l-ch4cts().mp4"] }
let(:clean) { ["_file_with_single_quote_.mp4", "_file_with_double_quote_.mp4", "file_with_space.mp4", "filewithperiods.mp4", "filewith_____sp3c_l-ch4cts__.mp4"] }
let(:source_bucket) { "bucket2" }

it "calls the #check_s3_bucket method" do
allow(SecureRandom).to receive(:uuid).and_return("randomstring")
input_urls.each_with_index do |url, index|
expect(described_class.new.send(:s3_uri, url, { masterfile_bucket: source_bucket })).to eq "randomstring/#{clean[index]}"
end
end
end
end
end
end
Loading

0 comments on commit 34c0828

Please sign in to comment.