diff --git a/WORKSPACE b/WORKSPACE index 6b0a9f9318..644a7fb216 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -26,6 +26,7 @@ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository") +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") # BoringSSL, see # https://boringssl.googlesource.com/boringssl/+/master/INCORPORATING.md#bazel @@ -35,3 +36,30 @@ git_repository( remote = "https://boringssl.googlesource.com/boringssl", shallow_since = "1559759280 +0000", ) + +# Sandboxed API +git_repository( + name = "com_google_sandboxed_api", + commit = "2301e05097818734f59b881d7fbe1624c17fc840", # 2019-07-08 + remote = "https://github.com/google/sandboxed-api.git", + shallow_since = "1562590596 -0700", +) + +load( + "@com_google_sandboxed_api//sandboxed_api/bazel:sapi_deps.bzl", + "sapi_deps", +) + +sapi_deps() + +load("@com_google_protobuf//:protobuf_deps.bzl", "protobuf_deps") + +protobuf_deps() + +# GoogleTest/GoogleMock for testing the sandbox +http_archive( + name = "com_google_googletest", + sha256 = "baed63b97595c32667694de0a434f8f23da59609c4a44f3360ba94b0abd5c583", + strip_prefix = "googletest-8ffb7e5c88b20a297a2e786c480556467496463b", + urls = ["https://github.com/google/googletest/archive/8ffb7e5c88b20a297a2e786c480556467496463b.zip"], # 2019-05-30 +) diff --git a/sandbox/BUILD.bazel b/sandbox/BUILD.bazel new file mode 100644 index 0000000000..7fa7947265 --- /dev/null +++ b/sandbox/BUILD.bazel @@ -0,0 +1,125 @@ +# Copyright (c) 2019. The YARA Authors. All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without modification, +# are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation and/or +# other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +load( + "@com_google_sandboxed_api//sandboxed_api/bazel:sapi.bzl", + "sapi_library", +) + +# Proto message that stores YARA matches. Used to communicate matches from +# the sandboxee to the host code. +proto_library( + name = "yara_matches", + srcs = ["yara_matches.proto"], +) + +cc_proto_library( + name = "yara_matches_cc_proto", + deps = [":yara_matches"], +) + +# Library with a callback function to collect YARA matches into a YaraMatches +# proto +cc_library( + name = "collect_matches", + srcs = ["collect_matches.cc"], + hdrs = ["collect_matches.h"], + visibility = ["//visibility:public"], + deps = [ + ":yara_matches_cc_proto", + "//:yara", + ], +) + +# The sandboxee side of the YARA sandbox. This implements a dispatch queue +# shared by multiple worker threads. YARA rules are shared across all threads +# to keep memory usage down. +cc_library( + name = "yara_entry_points", + srcs = ["yara_entry_points.cc"], + deps = [ + ":collect_matches", + ":yara_matches_cc_proto", + "//:libyara", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/container:node_hash_map", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/synchronization", + ], + alwayslink = 1, +) + +# Sandboxed API for YARA. This is what clients of this library should use. The +# API is intentionally minimal and may be extended in the future. +# See the "sandboxed-yara" target for an example on how to use this from code. +sapi_library( + name = "yara_sapi", + srcs = ["yara_transaction.cc"], + hdrs = ["yara_transaction.h"], + embed = True, + functions = [ + "YaraAsyncScanFd", + "YaraGetScanResult", + "YaraInitWorkers", + "YaraLoadRules", + ], + input_files = ["yara_entry_points.cc"], + lib = ":yara_entry_points", + lib_name = "Yara", + namespace = "yara::sandbox", + visibility = ["//visibility:public"], + deps = [ + ":yara_matches_cc_proto", + "//:yara_errors", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + "@com_google_sandboxed_api//sandboxed_api/sandbox2/util:bpf_helper", + "@com_google_sandboxed_api//sandboxed_api/util:status", + ], +) + +cc_test( + name = "yara_transaction_test", + srcs = ["yara_transaction_test.cc"], + deps = [ + ":yara_sapi", + "@com_google_googletest//:gtest_main", + "@com_google_sandboxed_api//sandboxed_api/util:status_matchers", + ], +) + +# Sandboxed command-line executable demonstrating how to use the YARA SAPI. +cc_binary( + name = "sandboxed_yara", + srcs = ["sandboxed_yara.cc"], + deps = [ + ":yara_sapi", + "@com_google_absl//absl/flags:parse", + "@com_google_absl//absl/strings", + ], +) diff --git a/sandbox/collect_matches.cc b/sandbox/collect_matches.cc new file mode 100644 index 0000000000..b827f7da66 --- /dev/null +++ b/sandbox/collect_matches.cc @@ -0,0 +1,68 @@ +/* +Copyright (c) 2019. The YARA Authors. All Rights Reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors +may be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "sandbox/collect_matches.h" + +#include "libyara/include/yara.h" +#include "sandbox/yara_matches.pb.h" + +namespace yara { + +int CollectMatches(int message, void* message_data, void* user_data) { + if (message != CALLBACK_MSG_RULE_MATCHING) { + return ERROR_SUCCESS; // There are no matching rules, simply return + } + + auto* rule = static_cast(message_data); + YR_META* rule_meta = rule->metas; + + auto* match = reinterpret_cast(user_data)->add_match(); + if (rule->ns != nullptr && rule->ns->name != nullptr) { + match->mutable_id()->set_rule_namespace(rule->ns->name); + } + match->mutable_id()->set_rule_name(rule->identifier); + while (!META_IS_NULL(rule_meta)) { + auto* meta = match->add_meta(); + meta->set_identifier(rule_meta->identifier); + switch (rule_meta->type) { + case META_TYPE_BOOLEAN: + case META_TYPE_INTEGER: + meta->set_int_value(rule_meta->integer); + break; + case META_TYPE_STRING: + meta->set_bytes_value(rule_meta->string); + break; + } + ++rule_meta; + } + + return ERROR_SUCCESS; +} + +} // namespace yara diff --git a/sandbox/collect_matches.h b/sandbox/collect_matches.h new file mode 100644 index 0000000000..55b887ae6b --- /dev/null +++ b/sandbox/collect_matches.h @@ -0,0 +1,41 @@ +/* +Copyright (c) 2019. The YARA Authors. All Rights Reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors +may be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef SANDBOX_COLLECT_MATCHES_H_ +#define SANDBOX_COLLECT_MATCHES_H_ + +namespace yara { + +// Callback function for yr_scan_mem() that collects YARA matches in a +// YaraMatches proto given in user_data. +int CollectMatches(int message, void* message_data, void* user_data); + +} // namespace yara + +#endif // SANDBOX_COLLECT_MATCHES_H_ diff --git a/sandbox/sandboxed_yara.cc b/sandbox/sandboxed_yara.cc new file mode 100644 index 0000000000..c02eb2d90c --- /dev/null +++ b/sandbox/sandboxed_yara.cc @@ -0,0 +1,139 @@ +/* +Copyright (c) 2019. The YARA Authors. All Rights Reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors +may be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "sandbox/yara_transaction.h" +#include "sandboxed_api/util/canonical_errors.h" +#include "sandboxed_api/util/statusor.h" +// TODO(cblichmann): SAPI leaks these symbols currently. +#undef ABSL_FLAG +#undef ABSL_DECLARE_FLAG +#undef ABSL_RETIRED_FLAG + +#include "absl/flags/flag.h" +#include "absl/flags/internal/usage.h" +#include "absl/flags/parse.h" +#include "absl/time/time.h" +#include "absl/strings/str_cat.h" + +ABSL_FLAG(std::string, identifier, "", "print only rules with this name"); +ABSL_FLAG(int, timeout, 5, "abort scanning after the given number of seconds"); + +namespace yara { +namespace { + +::sapi::StatusOr ReadFileToString(absl::string_view filename) { + std::ifstream input(std::string(filename), std::ios::in | std::ios::binary); + std::ostringstream output; + output << input.rdbuf(); + if (!input) { + return ::sapi::UnknownError(absl::StrCat("Cannot read file '", filename, "'")); + } + return output.str(); +} + +} // namespace + +// Implements a subset of the YARA command line scanner, but runs the actual +// scan inside of a sandbox. +::sapi::Status YaraMain(const std::vector& args) { + if (args.size() < 3) { + return ::sapi::InvalidArgumentError("Missing operand. Try '--help'."); + } + + // Get file to scan and concatenate all the YARA rules from the specified + // files. + std::string scan_filename = args.back(); + std::string all_rules; + for (size_t i = 1; i != args.size() - 1; ++i) { + SAPI_ASSIGN_OR_RETURN(std::string rules, ReadFileToString(args[i])); + absl::StrAppend(&all_rules, rules, "\n"); + } + + SAPI_ASSIGN_OR_RETURN( + auto transaction, + YaraTransaction::Create( + YaraTransaction::Options() + .set_scan_timeout(absl::Seconds(absl::GetFlag(FLAGS_timeout))) + .set_num_workers(1))); + SAPI_ASSIGN_OR_RETURN(int num_rules ABSL_ATTRIBUTE_UNUSED, + transaction->LoadRules(all_rules)); + + struct FDCloser { + ~FDCloser() { close(fd); } + int fd; + } fd_closer{open(scan_filename.c_str(), O_RDONLY)}; + if (fd_closer.fd == -1) { + return ::sapi::UnknownError(absl::StrCat( + "Cannot open file '", scan_filename, "': ", strerror(errno))); + } + + SAPI_ASSIGN_OR_RETURN(YaraMatches matches, transaction->ScanFd(fd_closer.fd)); + for (const auto& match : matches.match()) { + const std::string& rule_name = match.id().rule_name(); + if (absl::GetFlag(FLAGS_identifier).empty() || + (absl::GetFlag(FLAGS_identifier) == rule_name)) { + absl::PrintF("%s %s\n", rule_name, scan_filename); + } + } + + return ::sapi::OkStatus(); +} + +} // namespace yara + +int main(int argc, char* argv[]) { + absl::string_view argv0 = argv[0]; + { + auto last_slash_pos = argv0.find_last_of("/\\"); + if (last_slash_pos != absl::string_view::npos) { + argv0 = argv0.substr(last_slash_pos + 1); + } + } + // TODO(cblichmann): Use public API once available from Bazel builds. + absl::flags_internal::SetProgramUsageMessage( + absl::StrCat("YARA, the pattern matching swiss army knife.\n", + "Usage: ", argv0, " [OPTION] RULES_FILE... FILE")); + + ::sapi::Status status = ::yara::YaraMain(absl::ParseCommandLine(argc, argv)); + if (!status.ok()) { + absl::FPrintF(stderr, "ERROR: %s\n", status.message()); + return EXIT_FAILURE; + } + return EXIT_SUCCESS; +} diff --git a/sandbox/yara_entry_points.cc b/sandbox/yara_entry_points.cc new file mode 100644 index 0000000000..8e3a147b4a --- /dev/null +++ b/sandbox/yara_entry_points.cc @@ -0,0 +1,230 @@ +/* +Copyright (c) 2019. The YARA Authors. All Rights Reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors +may be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "absl/base/attributes.h" +#include "absl/container/node_hash_map.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" +#include "absl/synchronization/mutex.h" +#include "libyara/include/yara.h" +#include "sandbox/collect_matches.h" +#include "sandbox/yara_matches.pb.h" + +namespace yara { +namespace { + +struct ScanTask { + // Key into the g_results map, used by YaraGetScanResult() + uint64_t result_id; + + // File descriptor containing the data to scan + int data_fd; + + // File descriptor used to signal the host code on scan completion + int event_fd; + + // Scan timeout. YARA only supports second granularity. + absl::Duration timeout; +}; + +struct ScanResult { + int code; + YaraMatches matches; +}; + +static const bool g_init_done ABSL_ATTRIBUTE_UNUSED = []() { + // Disable output buffering + setbuf(stdout, nullptr); + setbuf(stderr, nullptr); + + // Increase stack size + struct rlimit stack_limit; + stack_limit.rlim_cur = 1 << 20 /* 1 MiB */; + stack_limit.rlim_max = stack_limit.rlim_cur; + ABSL_RAW_CHECK(setrlimit(RLIMIT_STACK, &stack_limit) == 0, strerror(errno)); + + // Initialize YARA. Note that the sandboxed code never calls yr_finalize(). + // Instead, the OS will clean up on process exit. + const int err = yr_initialize(); + ABSL_RAW_CHECK( + err == ERROR_SUCCESS, + absl::StrCat("yr_initialize() failed with code: ", err).c_str()); + return true; +}(); + +// Global dispatch queue used to schedule new scan tasks +ABSL_CONST_INIT static absl::Mutex g_queue_mutex(absl::kConstInit); +static auto* g_queue GUARDED_BY(g_queue_mutex) = new std::queue(); + +static uint64_t g_result_id GUARDED_BY(g_queue_mutex) = 0; + +// This map tracks scan results. It relies on pointers staying stable, so this +// uses a node_hash_map<> instead of a flat_hash_map<>. +ABSL_CONST_INIT static absl::Mutex g_results_mutex(absl::kConstInit); +static auto* g_results GUARDED_BY(g_results_mutex) = + new absl::node_hash_map(); + +ABSL_CONST_INIT static absl::Mutex g_rules_mutex(absl::kConstInit); +static YR_RULES* g_rules GUARDED_BY(g_rules_mutex) = nullptr; + +void ScanWorker() { + while (true) { + // Wait for and retrieve a new ScanTask from the queue. + g_queue_mutex.LockWhen(absl::Condition( + +[](std::queue* queue) { return !queue->empty(); }, g_queue)); + const ScanTask task = std::move(g_queue->front()); + g_queue->pop(); + g_queue_mutex.Unlock(); + + ScanResult result; + { + absl::ReaderMutexLock lock(&g_rules_mutex); + result.code = yr_rules_scan_fd( + g_rules, task.data_fd, + // Disable SIGSEGV handler, allowing YARA to crash/coredump. + SCAN_FLAGS_NO_TRYCATCH, CollectMatches, + /*user_data=*/reinterpret_cast(&result.matches), + absl::ToInt64Seconds(task.timeout)); + } + { + absl::MutexLock lock(&g_results_mutex); + (*g_results)[task.result_id] = std::move(result); + } + + // Unblock any waiting clients on the host side. This should always succeed + // writing 8 bytes, as long as the event_fd stays open in this function, + // hence the CHECK. + uint64_t unblock_value = 1; + ABSL_RAW_CHECK(write(task.event_fd, &unblock_value, + sizeof(unblock_value)) == sizeof(unblock_value), + strerror(errno)); + + close(task.event_fd); + close(task.data_fd); + } +} + +} // namespace + +extern "C" void YaraInitWorkers(int num_workers) { + const int num_threads = + std::min(static_cast(std::min(num_workers, YR_MAX_THREADS)), + std::thread::hardware_concurrency()); + static auto* workers = new std::vector(); + workers->reserve(num_threads); + for (int i = 0; i < num_threads; ++i) { + workers->emplace_back(ScanWorker); + } +} + +// Initializes the global YARA rules set from a string. Returns the number of +// rules loaded. Extended error information can be found in status if it is not +// nullptr. +extern "C" int YaraLoadRules(const char* rule_string, YaraStatus* error_status) { + _YR_COMPILER* compiler; + int error = yr_compiler_create(&compiler); + if (error != ERROR_SUCCESS) { + if (error_status) { + error_status->set_code(error); + } + return 0; + } + std::unique_ptr<_YR_COMPILER, void (*)(_YR_COMPILER*)> compiler_cleanup( + compiler, yr_compiler_destroy); + + if (yr_compiler_add_string(compiler, rule_string, /*namespace_=*/nullptr) != + 0) { + if (error_status) { + error_status->set_code(compiler->last_error); + + char message[512] = {'\0'}; + yr_compiler_get_error_message(compiler, message, sizeof(message)); + error_status->set_message(message); + } + return 0; + } + + YR_RULES* rules = nullptr; + error = yr_compiler_get_rules(compiler, &rules); + if (error != ERROR_SUCCESS) { + if (error_status) { + error_status->set_code(error); + } + return 0; + } + + int num_rules = 0; + YR_RULE* rule; + yr_rules_foreach(rules, rule) { ++num_rules; } + + absl::MutexLock lock(&g_rules_mutex); + if (g_rules) { + yr_rules_destroy(g_rules); + } + g_rules = rules; + + return num_rules; +} + +// Schedules a new asynchronous YARA scan task on the data in the specified file +// descriptor. Notifies host code via writing to the event_fd file descriptor. +// Returns a unique identifier that can be used to retrieve the results. +extern "C" uint64_t YaraAsyncScanFd(int data_fd, int event_fd, + int timeout_secs) { + absl::MutexLock queue_lock(&g_queue_mutex); + ++g_result_id; + g_queue->push({g_result_id, data_fd, event_fd, absl::Seconds(timeout_secs)}); + return g_result_id; +} + +extern "C" int YaraGetScanResult(uint64_t result_id, YaraMatches* matches) { + absl::MutexLock lock(&g_results_mutex); + auto result = g_results->find(result_id); + if (result == g_results->end()) { + return -1; + } + + int code = result->second.code; + *matches = std::move(result->second.matches); + g_results->erase(result); + return code; +} + +} // namespace yara diff --git a/sandbox/yara_matches.proto b/sandbox/yara_matches.proto new file mode 100644 index 0000000000..a4c3e9b391 --- /dev/null +++ b/sandbox/yara_matches.proto @@ -0,0 +1,63 @@ +/* +Copyright (c) 2019. The YARA Authors. All Rights Reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors +may be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +syntax = "proto3"; + +package yara; + +// Identifies a single rule inside a namespace +message YaraRuleId { + string rule_namespace = 1; // Currently unused by yara_entrypoints.cc + string rule_name = 2; +} + +// Holds N mappings for the matches. +message YaraMatches { + // Holds one mapping from (namespace, name) --> N key-value entries. + message Match { + message Meta { + string identifier = 1; + oneof value { + bytes bytes_value = 2; + int64 int_value = 3; + } + } + + YaraRuleId id = 1; + repeated Meta meta = 2; + } + + repeated Match match = 1; +} + +message YaraStatus { + int64 code = 1; + int64 line_number = 2; + string message = 3; +} diff --git a/sandbox/yara_transaction.cc b/sandbox/yara_transaction.cc new file mode 100644 index 0000000000..8e8f923ec4 --- /dev/null +++ b/sandbox/yara_transaction.cc @@ -0,0 +1,161 @@ +/* +Copyright (c) 2019. The YARA Authors. All Rights Reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors +may be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "sandbox/yara_transaction.h" + +#include +#include +#include + +#include "absl/strings/str_cat.h" +#include "libyara/include/yara/error.h" +#include "sandboxed_api/util/canonical_errors.h" +#include "sandboxed_api/util/status_macros.h" + +namespace yara { + +absl::Mutex YaraTransaction::mutex_(absl::kConstInit); + +::sapi::StatusOr> YaraTransaction::Create( + Options options) { + auto transaction = + absl::WrapUnique(new YaraTransaction(options.scan_timeout)); + // "Run" the transaction in order to initialize the underlying sandbox. + SAPI_RETURN_IF_ERROR(transaction->Run()); + + sandbox::YaraApi api(transaction->GetSandbox()); + SAPI_RETURN_IF_ERROR( + api.YaraInitWorkers(options.num_workers >= 1 ? options.num_workers : 1)); + + return transaction; +} + +::sapi::StatusOr YaraTransaction::LoadRules( + const std::string& rule_string) { + absl::MutexLock lock(&mutex_); + sandbox::YaraApi api(GetSandbox()); + + ::sapi::v::ConstCStr rule_string_sapi(rule_string.c_str()); + YaraStatus error_status; + ::sapi::v::Proto error_status_sapi(error_status); + SAPI_ASSIGN_OR_RETURN(int num_rules, + api.YaraLoadRules(rule_string_sapi.PtrBefore(), + error_status_sapi.PtrBoth())); + if (num_rules <= 0) { + auto error_status_copy = error_status_sapi.GetProtoCopy(); + if (!error_status_copy) { + return ::sapi::UnknownError("Deserialization of response failed"); + } + return ::sapi::InvalidArgumentError(error_status_copy->message()); + } + return num_rules; +} + +::sapi::StatusOr YaraTransaction::ScanFd(int fd) { + int local_event_fd = eventfd(0 /* initval */, 0 /* flags */); + if (local_event_fd == -1) { + return ::sapi::InternalError( + absl::StrCat("eventfd() error: ", strerror(errno))); + } + struct FDCloser { + ~FDCloser() { close(event_fd); } + int event_fd; + } event_fd_closer = {local_event_fd}; + + auto* sandbox = GetSandbox(); + sandbox::YaraApi api(sandbox); + uint64_t result_id; + { + absl::MutexLock lock(&mutex_); + + // Note: These SAPI Fd objects use the underlying sandbox comms to + // synchronize. Hence they must live within this locked scope. + ::sapi::v::Fd event_fd{local_event_fd}; + SAPI_RETURN_IF_ERROR(sandbox->TransferToSandboxee(&event_fd)); + event_fd.OwnLocalFd(false); // Needs to be valid during poll() + event_fd.OwnRemoteFd(false); // Sandboxee will close + + ::sapi::v::Fd data_fd{fd}; + SAPI_RETURN_IF_ERROR(sandbox->TransferToSandboxee(&data_fd)); + data_fd.OwnLocalFd(false); // To be closed by caller + data_fd.OwnRemoteFd(false); // Sandboxee will close + + SAPI_ASSIGN_OR_RETURN( + result_id, + api.YaraAsyncScanFd(data_fd.GetRemoteFd(), event_fd.GetRemoteFd(), + absl::ToInt64Seconds(scan_timeout_))); + } + + pollfd poll_events{local_event_fd, POLLIN}; + int poll_result; + + // TEMP_FAILURE_RETRY is a GNU extension that retries if the call returns + // EINTR. + poll_result = TEMP_FAILURE_RETRY( + poll(&poll_events, 1 /* nfds */, + // Add extra time to allow code inside the sandbox to time out first. + absl::ToInt64Milliseconds(scan_timeout_ + absl::Seconds(10)))); + if (poll_result == 0) { + return ::sapi::DeadlineExceededError("Scan timeout during poll()"); + } + + if (poll_result == -1) { + return ::sapi::InternalError( + absl::StrCat("poll() error: ", strerror(errno))); + } + if (poll_events.revents & POLLHUP || poll_events.revents & POLLERR || + poll_events.revents & POLLNVAL) { + return ::sapi::InternalError( + absl::StrCat("poll() error, revents: ", poll_events.revents)); + } + + absl::MutexLock lock(&mutex_); + YaraMatches matches; + ::sapi::v::Proto matches_sapi(matches); + SAPI_ASSIGN_OR_RETURN( + int scan_result, + api.YaraGetScanResult(result_id, matches_sapi.PtrBoth())); + switch (scan_result) { + case ERROR_SUCCESS: + case ERROR_TOO_MANY_MATCHES: { + auto matches_copy = matches_sapi.GetProtoCopy(); + if (!matches_copy) { + return ::sapi::UnknownError("Deserialization of response failed"); + } + return *matches_copy; + } + + case ERROR_SCAN_TIMEOUT: + return ::sapi::DeadlineExceededError("Scan timeout"); + } + return ::sapi::InternalError( + absl::StrCat("Error during scan: ", scan_result)); +} + +} // namespace yara diff --git a/sandbox/yara_transaction.h b/sandbox/yara_transaction.h new file mode 100644 index 0000000000..bffa346593 --- /dev/null +++ b/sandbox/yara_transaction.h @@ -0,0 +1,128 @@ +/* +Copyright (c) 2019. The YARA Authors. All Rights Reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors +may be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef SANDBOX_TRANSACTION_H_ +#define SANDBOX_TRANSACTION_H_ + +#include +#include +#include + +#include "absl/memory/memory.h" +#include "absl/synchronization/mutex.h" +#include "absl/time/time.h" +#include "sandbox/yara_matches.pb.h" +#include "sandbox/yara_sapi.sapi.h" +#include "sandboxed_api/sandbox.h" +#include "sandboxed_api/sandbox2/executor.h" +#include "sandboxed_api/sandbox2/policy.h" +#include "sandboxed_api/sandbox2/util.h" +#include "sandboxed_api/sandbox2/util/bpf_helper.h" +#include "sandboxed_api/transaction.h" + +namespace yara { + +class YaraSandbox : public sandbox::YaraSandbox { + public: + std::unique_ptr ModifyPolicy( + sandbox2::PolicyBuilder* builder) override { + return (*builder) + .AllowStaticStartup() + .AllowMmap() + .AllowFork() // Thread creation + .AllowSyscalls({ + __NR_madvise, + __NR_mprotect, + __NR_munlock, + __NR_poll, + __NR_sched_getparam, + __NR_sched_getscheduler, + __NR_sched_yield, + }) + .BuildOrDie(); + } + + void ModifyExecutor(sandbox2::Executor* executor) override { + (*executor->limits()) + // Remove limit on file descriptor bytes. + .set_rlimit_fsize(RLIM64_INFINITY) + // Wall-time limit per call will be enforced by the Transaction. + .set_rlimit_cpu(RLIM64_INFINITY); + } +}; + +// Transaction class to run sandboxed Yara scans of the contents of file +// descriptors. This class is thread-safe and access to the sandboxee is +// multiplexed so that multiple threads can share the transaction. +class YaraTransaction : public ::sapi::Transaction { + public: + struct Options { + absl::Duration scan_timeout; + int num_workers; + + Options& set_scan_timeout(absl::Duration value) { + scan_timeout = value; + return *this; + } + + Options& set_num_workers(int value) { + num_workers = value; + return *this; + } + }; + + // Creates and initializes an instance of this transaction class with the + // specified scan_timeout. + static ::sapi::StatusOr> Create( + Options options = {}); + + // Loads new Yara rules into the sandboxee. Returns the number of rules + // loaded. Only one set of rules can be active at any given time. This method + // blocks until all concurrent YARA scans are completed before updating the + // rules. + ::sapi::StatusOr LoadRules(const std::string& rule_string) + LOCKS_EXCLUDED(mutex_); + + // Scans the contents of the specified file descriptor. + // Returns DeadlineExceededError if the scan timed out. + ::sapi::StatusOr ScanFd(int fd) LOCKS_EXCLUDED(mutex_); + + private: + explicit YaraTransaction(absl::Duration scan_timeout) + : ::sapi::Transaction(absl::make_unique()) {} + + // Mutex to guard communication with the sandboxee + static absl::Mutex mutex_; + + absl::Duration scan_timeout_; +}; + +} // namespace yara + +#endif // SANDBOX_TRANSACTION_H_ diff --git a/sandbox/yara_transaction_test.cc b/sandbox/yara_transaction_test.cc new file mode 100644 index 0000000000..aa6cabce8e --- /dev/null +++ b/sandbox/yara_transaction_test.cc @@ -0,0 +1,183 @@ +/* +Copyright (c) 2019. The YARA Authors. All Rights Reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors +may be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "sandbox/yara_transaction.h" + +#include // __NR_memdfd_create +#include + +#include +#include +#include +#include + +#include "absl/strings/str_cat.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "sandbox/yara_matches.pb.h" +#include "sandboxed_api/util/status_matchers.h" +#include "sandboxed_api/util/statusor.h" + +using ::sapi::IsOk; +using ::testing::Eq; +using ::testing::StrEq; + +namespace yara { +namespace { + +// Wraps an in-memory file descriptor created by memfd_create(). +class MemoryFD { + public: + static ::sapi::StatusOr CreateWithContent( + absl::string_view content) { + MemoryFD mem_fd; + // Avoid dependency on UAPI headers + constexpr uintptr_t MFD_CLOEXEC = 0x0001U; + constexpr const char* kName = "memfd"; + mem_fd.fd_ = syscall(__NR_memfd_create, reinterpret_cast(kName), + MFD_CLOEXEC); + if (mem_fd.fd_ == -1) { + return ::sapi::UnknownError(absl::StrCat("memfd(): ", strerror(errno))); + } + if (ftruncate(mem_fd.fd_, content.size()) == -1) { + return ::sapi::UnknownError( + absl::StrCat("ftruncate(): ", strerror(errno))); + } + while (!content.empty()) { + ssize_t written = + TEMP_FAILURE_RETRY(write(mem_fd.fd_, content.data(), content.size())); + if (written <= 0) { + return ::sapi::UnknownError(absl::StrCat("write(): ", strerror(errno))); + } + content.remove_prefix(written); + } + return mem_fd; + } + + MemoryFD(MemoryFD&& other) { *this = std::move(other); } + + MemoryFD& operator=(MemoryFD&& other) { + fd_ = other.fd_; + other.fd_ = 0; + return *this; + } + + ~MemoryFD() { + if (fd_ > 0) { + close(fd_); + }; + } + + int fd() const { return fd_; } + + private: + MemoryFD() = default; + int fd_; +}; + +class TransactionTest : public ::testing::Test { + protected: + void SetUp() override { + SAPI_ASSERT_OK_AND_ASSIGN( + transaction_, + YaraTransaction::Create(YaraTransaction::Options{} + .set_scan_timeout(absl::Minutes(1)) + .set_num_workers(16))); + } + + ::sapi::StatusOr ScanString(absl::string_view content) { + SAPI_ASSIGN_OR_RETURN(MemoryFD mem_fd, + MemoryFD::CreateWithContent(content)); + return transaction_->ScanFd(mem_fd.fd()); + } + + std::unique_ptr transaction_; +}; + +TEST_F(TransactionTest, BasicFunctionality) { + ASSERT_THAT(transaction_ + ->LoadRules(R"( + rule Number { + strings: $ = "123" + condition: all of them + } + rule Color { + strings: $ = "green" + condition: all of them + } + rule Keyboard { + strings: $ = "dvorak" + condition: all of them + })") + + .ValueOrDie(), + Eq(3)); + + SAPI_ASSERT_OK_AND_ASSIGN(YaraMatches matches, ScanString("qwerty 123")); + + EXPECT_THAT(matches.match_size(), Eq(1)); + EXPECT_THAT(matches.match(0).id().rule_name(), StrEq("Number")); + + SAPI_ASSERT_OK_AND_ASSIGN(matches, ScanString("green dvorak 456")); + EXPECT_THAT(matches.match_size(), Eq(2)); + EXPECT_THAT(matches.match(0).id().rule_name(), StrEq("Color")); + EXPECT_THAT(matches.match(1).id().rule_name(), StrEq("Keyboard")); +} + +TEST_F(TransactionTest, ConcurrentScanStressTest) { + ASSERT_THAT(transaction_ + ->LoadRules(R"( + rule Simple { + strings: $ = "A" + condition: all of them + })") + .ValueOrDie(), + Eq(1)); + + // Large number of threads during testing to increase likelihood of exposing + // race conditions in threading code. + constexpr int kThreads = 64; + + std::vector bundle; + for (int i = 0; i < kThreads; ++i) { + bundle.emplace_back([this, i]() { + std::string buf((i + 1) * 102400, 'B'); + buf.append("A"); // Force the match to be at the very end + SAPI_ASSERT_OK_AND_ASSIGN(YaraMatches matches, ScanString(buf)); + ASSERT_THAT(matches.match_size(), Eq(1)); + EXPECT_THAT(matches.match(0).id().rule_name(), StrEq("Simple")); + }); + } + for (auto& thread : bundle) { + thread.join(); + } +} + +} // namespace +} // namespace yara