diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc index fb9704a69b..405c9ec689 100644 --- a/compiler/jni/jni_compiler_test.cc +++ b/compiler/jni/jni_compiler_test.cc @@ -45,10 +45,25 @@ extern "C" JNIEXPORT jint JNICALL Java_MyClassNatives_bar(JNIEnv*, jobject, jint return count + 1; } +// Note: JNI name mangling "_" -> "_1". +extern "C" JNIEXPORT jint JNICALL Java_MyClassNatives_bar_1Fast(JNIEnv*, jobject, jint count) { + return count + 1; +} + extern "C" JNIEXPORT jint JNICALL Java_MyClassNatives_sbar(JNIEnv*, jclass, jint count) { return count + 1; } +// Note: JNI name mangling "_" -> "_1". +extern "C" JNIEXPORT jint JNICALL Java_MyClassNatives_sbar_1Fast(JNIEnv*, jclass, jint count) { + return count + 1; +} + +// Note: JNI name mangling "_" -> "_1". +extern "C" JNIEXPORT jint JNICALL Java_MyClassNatives_sbar_1Critical(jint count) { + return count + 1; +} + // TODO: In the Baker read barrier configuration, add checks to ensure // the Marking Register's value is correct. @@ -71,6 +86,11 @@ static bool IsCurrentJniCritical() { return gCurrentJni == static_cast(JniKind::kCritical); } +// Is the current native method under test @FastNative? +static bool IsCurrentJniFast() { + return gCurrentJni == static_cast(JniKind::kFast); +} + // Is the current native method a plain-old non-annotated native? static bool IsCurrentJniNormal() { return gCurrentJni == static_cast(JniKind::kNormal); @@ -352,6 +372,7 @@ class JniCompilerTest : public CommonCompilerTest { void MaxParamNumberImpl(); void WithoutImplementationImpl(); void WithoutImplementationRefReturnImpl(); + void StaticWithoutImplementationImpl(); void StackArgsIntsFirstImpl(); void StackArgsFloatsFirstImpl(); void StackArgsMixedImpl(); @@ -373,9 +394,7 @@ jobject JniCompilerTest::class_loader_; // Test the normal compiler and normal generic JNI only. // The following features are unsupported in @FastNative: -// 1) JNI stubs (lookup via dlsym) when methods aren't explicitly registered -// 2) synchronized keyword -// -- TODO: We can support (1) if we remove the mutator lock assert during stub lookup. +// 1) synchronized keyword # define JNI_TEST_NORMAL_ONLY(TestName) \ TEST_F(JniCompilerTest, TestName ## NormalCompiler) { \ ScopedCheckHandleScope top_handle_scope_check; \ @@ -612,8 +631,8 @@ struct make_jni_test_decorator { #define NORMAL_JNI_ONLY_NOWRAP(func) \ ({ ASSERT_TRUE(IsCurrentJniNormal()); reinterpret_cast(&(func)); }) // Same as above, but with nullptr. When we want to test the stub functionality. -#define NORMAL_JNI_ONLY_NULLPTR \ - ({ ASSERT_TRUE(IsCurrentJniNormal()); nullptr; }) +#define NORMAL_OR_FAST_JNI_ONLY_NULLPTR \ + ({ ASSERT_TRUE(IsCurrentJniNormal() || IsCurrentJniFast()); nullptr; }) int gJava_MyClassNatives_foo_calls[kJniKindCount] = {}; @@ -636,8 +655,8 @@ void JniCompilerTest::CompileAndRunNoArgMethodImpl() { JNI_TEST(CompileAndRunNoArgMethod) void JniCompilerTest::CompileAndRunIntMethodThroughStubImpl() { - SetUpForTest(false, "bar", "(I)I", NORMAL_JNI_ONLY_NULLPTR); - // calling through stub will link with &Java_MyClassNatives_bar + SetUpForTest(false, "bar", "(I)I", NORMAL_OR_FAST_JNI_ONLY_NULLPTR); + // calling through stub will link with &Java_MyClassNatives_bar{,_1Fast} std::string reason; ASSERT_TRUE(Runtime::Current()->GetJavaVM()-> @@ -648,12 +667,12 @@ void JniCompilerTest::CompileAndRunIntMethodThroughStubImpl() { EXPECT_EQ(25, result); } -// TODO: Support @FastNative and @CriticalNative through stubs. -JNI_TEST_NORMAL_ONLY(CompileAndRunIntMethodThroughStub) +// Note: @CriticalNative is only for static methods. +JNI_TEST(CompileAndRunIntMethodThroughStub) void JniCompilerTest::CompileAndRunStaticIntMethodThroughStubImpl() { - SetUpForTest(true, "sbar", "(I)I", NORMAL_JNI_ONLY_NULLPTR); - // calling through stub will link with &Java_MyClassNatives_sbar + SetUpForTest(true, "sbar", "(I)I", nullptr); + // calling through stub will link with &Java_MyClassNatives_sbar{,_1Fast,_1Critical} std::string reason; ASSERT_TRUE(Runtime::Current()->GetJavaVM()-> @@ -664,8 +683,7 @@ void JniCompilerTest::CompileAndRunStaticIntMethodThroughStubImpl() { EXPECT_EQ(43, result); } -// TODO: Support @FastNative and @CriticalNative through stubs. -JNI_TEST_NORMAL_ONLY(CompileAndRunStaticIntMethodThroughStub) +JNI_TEST_CRITICAL(CompileAndRunStaticIntMethodThroughStub) int gJava_MyClassNatives_fooI_calls[kJniKindCount] = {}; jint Java_MyClassNatives_fooI(JNIEnv*, jobject, jint x) { @@ -1894,7 +1912,7 @@ void JniCompilerTest::WithoutImplementationImpl() { // This will lead to error messages in the log. ScopedLogSeverity sls(LogSeverity::FATAL); - SetUpForTest(false, "withoutImplementation", "()V", NORMAL_JNI_ONLY_NULLPTR); + SetUpForTest(false, "withoutImplementation", "()V", NORMAL_OR_FAST_JNI_ONLY_NULLPTR); env_->CallVoidMethod(jobj_, jmethod_); @@ -1902,9 +1920,7 @@ void JniCompilerTest::WithoutImplementationImpl() { EXPECT_TRUE(env_->ExceptionCheck() == JNI_TRUE); } -// TODO: Don't test @FastNative here since it goes through a stub lookup (unsupported) which would -// normally fail with an exception, but fails with an assert. -JNI_TEST_NORMAL_ONLY(WithoutImplementation) +JNI_TEST(WithoutImplementation) void JniCompilerTest::WithoutImplementationRefReturnImpl() { // This will lead to error messages in the log. @@ -1913,7 +1929,7 @@ void JniCompilerTest::WithoutImplementationRefReturnImpl() { SetUpForTest(false, "withoutImplementationRefReturn", "()Ljava/lang/Object;", - NORMAL_JNI_ONLY_NULLPTR); + NORMAL_OR_FAST_JNI_ONLY_NULLPTR); env_->CallObjectMethod(jobj_, jmethod_); @@ -1921,8 +1937,21 @@ void JniCompilerTest::WithoutImplementationRefReturnImpl() { EXPECT_TRUE(env_->ExceptionCheck() == JNI_TRUE); } -// TODO: Should work for @FastNative too. -JNI_TEST_NORMAL_ONLY(WithoutImplementationRefReturn) +JNI_TEST(WithoutImplementationRefReturn) + +void JniCompilerTest::StaticWithoutImplementationImpl() { + // This will lead to error messages in the log. + ScopedLogSeverity sls(LogSeverity::FATAL); + + SetUpForTest(true, "staticWithoutImplementation", "()V", nullptr); + + env_->CallStaticVoidMethod(jklass_, jmethod_); + + EXPECT_TRUE(Thread::Current()->IsExceptionPending()); + EXPECT_TRUE(env_->ExceptionCheck() == JNI_TRUE); +} + +JNI_TEST_CRITICAL(StaticWithoutImplementation) void Java_MyClassNatives_stackArgsIntsFirst(JNIEnv*, jclass, jint i1, jint i2, jint i3, jint i4, jint i5, jint i6, jint i7, jint i8, jint i9, diff --git a/compiler/jni/quick/arm/calling_convention_arm.cc b/compiler/jni/quick/arm/calling_convention_arm.cc index 2072302dd8..e06c91419b 100644 --- a/compiler/jni/quick/arm/calling_convention_arm.cc +++ b/compiler/jni/quick/arm/calling_convention_arm.cc @@ -18,6 +18,7 @@ #include +#include "arch/arm/jni_frame_arm.h" #include "arch/instruction_set.h" #include "base/macros.h" #include "handle_scope-inl.h" @@ -38,7 +39,7 @@ static const Register kJniArgumentRegisters[] = { R0, R1, R2, R3 }; -static const size_t kJniArgumentRegisterCount = arraysize(kJniArgumentRegisters); +static_assert(kJniArgumentRegisterCount == arraysize(kJniArgumentRegisters)); // // Managed calling convention constants. @@ -121,10 +122,6 @@ static constexpr uint32_t CalculateFpCalleeSpillMask(const ManagedRegister (&cal static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask(kCalleeSaveRegisters); static constexpr uint32_t kFpCalleeSpillMask = CalculateFpCalleeSpillMask(kCalleeSaveRegisters); -// The AAPCS requires 8-byte alignement. This is not as strict as the Managed ABI stack alignment. -static constexpr size_t kAapcsStackAlignment = 8u; -static_assert(kAapcsStackAlignment < kStackAlignment); - static constexpr ManagedRegister kAapcsCalleeSaveRegisters[] = { // Core registers. ArmManagedRegister::FromCoreRegister(R4), @@ -448,7 +445,11 @@ size_t ArmJniCallingConvention::OutArgSize() const { if (is_critical_native_ && (size != 0u || GetShorty()[0] == 'F' || GetShorty()[0] == 'D')) { size += kFramePointerSize; // We need to spill LR with the args. } - return RoundUp(size, kAapcsStackAlignment); + size_t out_args_size = RoundUp(size, kAapcsStackAlignment); + if (UNLIKELY(IsCriticalNative())) { + DCHECK_EQ(out_args_size, GetCriticalNativeOutArgsSize(GetShorty(), NumArgs() + 1u)); + } + return out_args_size; } ArrayRef ArmJniCallingConvention::CalleeSaveRegisters() const { diff --git a/compiler/jni/quick/arm/calling_convention_arm.h b/compiler/jni/quick/arm/calling_convention_arm.h index 04ad00b0b6..e4b86fa657 100644 --- a/compiler/jni/quick/arm/calling_convention_arm.h +++ b/compiler/jni/quick/arm/calling_convention_arm.h @@ -23,8 +23,6 @@ namespace art { namespace arm { -constexpr size_t kFramePointerSize = static_cast(PointerSize::k32); - class ArmManagedRuntimeCallingConvention final : public ManagedRuntimeCallingConvention { public: ArmManagedRuntimeCallingConvention(bool is_static, bool is_synchronized, const char* shorty) diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc index 44218efde4..231e1400f8 100644 --- a/compiler/jni/quick/arm64/calling_convention_arm64.cc +++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc @@ -18,6 +18,7 @@ #include +#include "arch/arm64/jni_frame_arm64.h" #include "arch/instruction_set.h" #include "handle_scope-inl.h" #include "utils/arm64/managed_register_arm64.h" @@ -27,28 +28,25 @@ namespace arm64 { static_assert(kArm64PointerSize == PointerSize::k64, "Unexpected ARM64 pointer size"); -// Up to how many float-like (float, double) args can be enregistered. -// The rest of the args must go on the stack. -constexpr size_t kMaxFloatOrDoubleRegisterArguments = 8u; -// Up to how many integer-like (pointers, objects, longs, int, short, bool, etc) args can be -// enregistered. The rest of the args must go on the stack. -constexpr size_t kMaxIntLikeRegisterArguments = 8u; - static const XRegister kXArgumentRegisters[] = { X0, X1, X2, X3, X4, X5, X6, X7 }; +static_assert(kMaxIntLikeRegisterArguments == arraysize(kXArgumentRegisters)); static const WRegister kWArgumentRegisters[] = { W0, W1, W2, W3, W4, W5, W6, W7 }; +static_assert(kMaxIntLikeRegisterArguments == arraysize(kWArgumentRegisters)); static const DRegister kDArgumentRegisters[] = { D0, D1, D2, D3, D4, D5, D6, D7 }; +static_assert(kMaxFloatOrDoubleRegisterArguments == arraysize(kDArgumentRegisters)); static const SRegister kSArgumentRegisters[] = { S0, S1, S2, S3, S4, S5, S6, S7 }; +static_assert(kMaxFloatOrDoubleRegisterArguments == arraysize(kSArgumentRegisters)); static constexpr ManagedRegister kCalleeSaveRegisters[] = { // Core registers. @@ -114,10 +112,6 @@ static constexpr uint32_t CalculateFpCalleeSpillMask(const ManagedRegister (&cal static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask(kCalleeSaveRegisters); static constexpr uint32_t kFpCalleeSpillMask = CalculateFpCalleeSpillMask(kCalleeSaveRegisters); -// The AAPCS64 requires 16-byte alignement. This is the same as the Managed ABI stack alignment. -static constexpr size_t kAapcs64StackAlignment = 16u; -static_assert(kAapcs64StackAlignment == kStackAlignment); - static constexpr ManagedRegister kAapcs64CalleeSaveRegisters[] = { // Core registers. Arm64ManagedRegister::FromXRegister(X19), @@ -334,7 +328,11 @@ size_t Arm64JniCallingConvention::OutArgSize() const { if (is_critical_native_ && (size != 0u || RequiresSmallResultTypeExtension())) { size += kFramePointerSize; // We need to spill LR with the args. } - return RoundUp(size, kStackAlignment); + size_t out_args_size = RoundUp(size, kAapcs64StackAlignment); + if (UNLIKELY(IsCriticalNative())) { + DCHECK_EQ(out_args_size, GetCriticalNativeOutArgsSize(GetShorty(), NumArgs() + 1u)); + } + return out_args_size; } ArrayRef Arm64JniCallingConvention::CalleeSaveRegisters() const { diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.h b/compiler/jni/quick/arm64/calling_convention_arm64.h index f4148c7d73..64b29f1857 100644 --- a/compiler/jni/quick/arm64/calling_convention_arm64.h +++ b/compiler/jni/quick/arm64/calling_convention_arm64.h @@ -23,8 +23,6 @@ namespace art { namespace arm64 { -constexpr size_t kFramePointerSize = static_cast(PointerSize::k64); - class Arm64ManagedRuntimeCallingConvention final : public ManagedRuntimeCallingConvention { public: Arm64ManagedRuntimeCallingConvention(bool is_static, bool is_synchronized, const char* shorty) diff --git a/compiler/jni/quick/x86/calling_convention_x86.cc b/compiler/jni/quick/x86/calling_convention_x86.cc index d12eb9be32..4e643ba34a 100644 --- a/compiler/jni/quick/x86/calling_convention_x86.cc +++ b/compiler/jni/quick/x86/calling_convention_x86.cc @@ -19,6 +19,7 @@ #include #include "arch/instruction_set.h" +#include "arch/x86/jni_frame_x86.h" #include "handle_scope-inl.h" #include "utils/x86/managed_register_x86.h" @@ -51,9 +52,6 @@ static constexpr uint32_t CalculateCoreCalleeSpillMask( static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask(kCalleeSaveRegisters); static constexpr uint32_t kFpCalleeSpillMask = 0u; -static constexpr size_t kNativeStackAlignment = 16; // IA-32 cdecl requires 16 byte alignment. -static_assert(kNativeStackAlignment == kStackAlignment); - static constexpr ManagedRegister kNativeCalleeSaveRegisters[] = { // Core registers. X86ManagedRegister::FromCpuRegister(EBX), @@ -268,8 +266,8 @@ size_t X86JniCallingConvention::OutArgSize() const { static_assert((kCoreCalleeSpillMask & ~kNativeCoreCalleeSpillMask) == 0u); static_assert((kFpCalleeSpillMask & ~kNativeFpCalleeSpillMask) == 0u); - if (is_critical_native_) { - // Add return address size for @CriticalNative + if (UNLIKELY(IsCriticalNative())) { + // Add return address size for @CriticalNative. // For normal native the return PC is part of the managed stack frame instead of out args. size += kFramePointerSize; // For @CriticalNative, we can make a tail call if there are no stack args @@ -281,13 +279,17 @@ size_t X86JniCallingConvention::OutArgSize() const { GetShorty()[0] != 'F' && GetShorty()[0] != 'D' && !RequiresSmallResultTypeExtension()); if (return_type_ok && size == kFramePointerSize) { // Note: This is not aligned to kNativeStackAlignment but that's OK for tail call. - DCHECK_EQ(size, kFramePointerSize); static_assert(kFramePointerSize < kNativeStackAlignment); + DCHECK_EQ(kFramePointerSize, GetCriticalNativeOutArgsSize(GetShorty(), NumArgs() + 1u)); return kFramePointerSize; } } - return RoundUp(size, kNativeStackAlignment); + size_t out_args_size = RoundUp(size, kNativeStackAlignment); + if (UNLIKELY(IsCriticalNative())) { + DCHECK_EQ(out_args_size, GetCriticalNativeOutArgsSize(GetShorty(), NumArgs() + 1u)); + } + return out_args_size; } ArrayRef X86JniCallingConvention::CalleeSaveRegisters() const { diff --git a/compiler/jni/quick/x86/calling_convention_x86.h b/compiler/jni/quick/x86/calling_convention_x86.h index 4d65fc33fc..1273e8d256 100644 --- a/compiler/jni/quick/x86/calling_convention_x86.h +++ b/compiler/jni/quick/x86/calling_convention_x86.h @@ -23,8 +23,6 @@ namespace art { namespace x86 { -constexpr size_t kFramePointerSize = static_cast(PointerSize::k32); - class X86ManagedRuntimeCallingConvention final : public ManagedRuntimeCallingConvention { public: X86ManagedRuntimeCallingConvention(bool is_static, bool is_synchronized, const char* shorty) diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc index b15d904bf1..9013b020fb 100644 --- a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc +++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc @@ -19,6 +19,7 @@ #include #include "arch/instruction_set.h" +#include "arch/x86_64/jni_frame_x86_64.h" #include "base/bit_utils.h" #include "handle_scope-inl.h" #include "utils/x86_64/managed_register_x86_64.h" @@ -26,19 +27,6 @@ namespace art { namespace x86_64 { -constexpr size_t kFramePointerSize = static_cast(PointerSize::k64); -static_assert(kX86_64PointerSize == PointerSize::k64, "Unexpected x86_64 pointer size"); - -constexpr size_t kMmxSpillSize = 8u; - -// XMM0..XMM7 can be used to pass the first 8 floating args. The rest must go on the stack. -// -- Managed and JNI calling conventions. -constexpr size_t kMaxFloatOrDoubleRegisterArguments = 8u; -// Up to how many integer-like (pointers, objects, longs, int, short, bool, etc) args can be -// enregistered. The rest of the args must go on the stack. -// -- JNI calling convention only (Managed excludes RDI, so it's actually 5). -constexpr size_t kMaxIntLikeRegisterArguments = 6u; - static constexpr ManagedRegister kCalleeSaveRegisters[] = { // Core registers. X86_64ManagedRegister::FromCpuRegister(RBX), @@ -81,9 +69,6 @@ static constexpr uint32_t CalculateFpCalleeSpillMask(const ManagedRegister (&cal static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask(kCalleeSaveRegisters); static constexpr uint32_t kFpCalleeSpillMask = CalculateFpCalleeSpillMask(kCalleeSaveRegisters); -static constexpr size_t kNativeStackAlignment = 16; -static_assert(kNativeStackAlignment == kStackAlignment); - static constexpr ManagedRegister kNativeCalleeSaveRegisters[] = { // Core registers. X86_64ManagedRegister::FromCpuRegister(RBX), @@ -268,13 +253,19 @@ size_t X86_64JniCallingConvention::OutArgSize() const { // but not native callee-saves. static_assert((kCoreCalleeSpillMask & ~kNativeCoreCalleeSpillMask) == 0u); static_assert((kFpCalleeSpillMask & ~kNativeFpCalleeSpillMask) != 0u); - size += POPCOUNT(kFpCalleeSpillMask & ~kNativeFpCalleeSpillMask) * kMmxSpillSize; + static_assert( + kAlwaysSpilledMmxRegisters == POPCOUNT(kFpCalleeSpillMask & ~kNativeFpCalleeSpillMask)); + size += kAlwaysSpilledMmxRegisters * kMmxSpillSize; // Add return address size for @CriticalNative // For normal native the return PC is part of the managed stack frame instead of out args. size += kFramePointerSize; } - return RoundUp(size, kNativeStackAlignment); + size_t out_args_size = RoundUp(size, kNativeStackAlignment); + if (UNLIKELY(IsCriticalNative())) { + DCHECK_EQ(out_args_size, GetCriticalNativeOutArgsSize(GetShorty(), NumArgs() + 1u)); + } + return out_args_size; } ArrayRef X86_64JniCallingConvention::CalleeSaveRegisters() const { diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc index cc0b5d5189..6475607076 100644 --- a/compiler/utils/assembler_thumb_test_expected.cc.inc +++ b/compiler/utils/assembler_thumb_test_expected.cc.inc @@ -153,7 +153,7 @@ const char* const VixlJniHelpersResults[] = { " 21c: f8d9 8034 ldr.w r8, [r9, #52] ; 0x34\n", " 220: 4770 bx lr\n", " 222: 4660 mov r0, ip\n", - " 224: f8d9 c2e4 ldr.w ip, [r9, #740] ; 0x2e4\n", + " 224: f8d9 c2e8 ldr.w ip, [r9, #744] ; 0x2e8\n", " 228: 47e0 blx ip\n", nullptr }; diff --git a/dex2oat/driver/compiler_driver.cc b/dex2oat/driver/compiler_driver.cc index e7c0c1e0a0..525cfab231 100644 --- a/dex2oat/driver/compiler_driver.cc +++ b/dex2oat/driver/compiler_driver.cc @@ -298,6 +298,11 @@ std::unique_ptr> CompilerDriver::CreateJniDlsymLookup CREATE_TRAMPOLINE(JNI, kJniAbi, pDlsymLookup) } +std::unique_ptr> +CompilerDriver::CreateJniDlsymLookupCriticalTrampoline() const { + CREATE_TRAMPOLINE(JNI, kJniAbi, pDlsymLookupCritical) +} + std::unique_ptr> CompilerDriver::CreateQuickGenericJniTrampoline() const { CREATE_TRAMPOLINE(QUICK, kQuickAbi, pQuickGenericJniTrampoline) diff --git a/dex2oat/driver/compiler_driver.h b/dex2oat/driver/compiler_driver.h index 4aeb34de90..e14b2793a0 100644 --- a/dex2oat/driver/compiler_driver.h +++ b/dex2oat/driver/compiler_driver.h @@ -122,6 +122,7 @@ class CompilerDriver { // Generate the trampolines that are invoked by unresolved direct methods. std::unique_ptr> CreateJniDlsymLookupTrampoline() const; + std::unique_ptr> CreateJniDlsymLookupCriticalTrampoline() const; std::unique_ptr> CreateQuickGenericJniTrampoline() const; std::unique_ptr> CreateQuickImtConflictTrampoline() const; std::unique_ptr> CreateQuickResolutionTrampoline() const; diff --git a/dex2oat/linker/image_writer.cc b/dex2oat/linker/image_writer.cc index e6ffbc1e44..a5605f9396 100644 --- a/dex2oat/linker/image_writer.cc +++ b/dex2oat/linker/image_writer.cc @@ -3373,6 +3373,8 @@ const uint8_t* ImageWriter::GetOatAddress(StubType type) const { return static_cast(header.GetQuickGenericJniTrampoline()); case StubType::kJNIDlsymLookupTrampoline: return static_cast(header.GetJniDlsymLookupTrampoline()); + case StubType::kJNIDlsymLookupCriticalTrampoline: + return static_cast(header.GetJniDlsymLookupCriticalTrampoline()); case StubType::kQuickIMTConflictTrampoline: return static_cast(header.GetQuickImtConflictTrampoline()); case StubType::kQuickResolutionTrampoline: @@ -3486,8 +3488,9 @@ void ImageWriter::CopyAndFixupMethod(ArtMethod* orig, if (orig->IsNative()) { // The native method's pointer is set to a stub to lookup via dlsym. // Note this is not the code_ pointer, that is handled above. - copy->SetEntryPointFromJniPtrSize( - GetOatAddress(StubType::kJNIDlsymLookupTrampoline), target_ptr_size_); + StubType stub_type = orig->IsCriticalNative() ? StubType::kJNIDlsymLookupCriticalTrampoline + : StubType::kJNIDlsymLookupTrampoline; + copy->SetEntryPointFromJniPtrSize(GetOatAddress(stub_type), target_ptr_size_); } else { CHECK(copy->GetDataPtrSize(target_ptr_size_) == nullptr); } @@ -3624,6 +3627,8 @@ void ImageWriter::UpdateOatFileHeader(size_t oat_index, const OatHeader& oat_hea // Primary oat file, read the trampolines. cur_image_info.SetStubOffset(StubType::kJNIDlsymLookupTrampoline, oat_header.GetJniDlsymLookupTrampolineOffset()); + cur_image_info.SetStubOffset(StubType::kJNIDlsymLookupCriticalTrampoline, + oat_header.GetJniDlsymLookupCriticalTrampolineOffset()); cur_image_info.SetStubOffset(StubType::kQuickGenericJNITrampoline, oat_header.GetQuickGenericJniTrampolineOffset()); cur_image_info.SetStubOffset(StubType::kQuickIMTConflictTrampoline, diff --git a/dex2oat/linker/image_writer.h b/dex2oat/linker/image_writer.h index 811b5c3cb2..8cd306f67f 100644 --- a/dex2oat/linker/image_writer.h +++ b/dex2oat/linker/image_writer.h @@ -234,6 +234,7 @@ class ImageWriter final { enum class StubType { kJNIDlsymLookupTrampoline, + kJNIDlsymLookupCriticalTrampoline, kQuickGenericJNITrampoline, kQuickIMTConflictTrampoline, kQuickResolutionTrampoline, diff --git a/dex2oat/linker/oat_writer.cc b/dex2oat/linker/oat_writer.cc index d75f427639..16e75d05bc 100644 --- a/dex2oat/linker/oat_writer.cc +++ b/dex2oat/linker/oat_writer.cc @@ -417,6 +417,7 @@ OatWriter::OatWriter(const CompilerOptions& compiler_options, size_interpreter_to_interpreter_bridge_(0), size_interpreter_to_compiled_code_bridge_(0), size_jni_dlsym_lookup_trampoline_(0), + size_jni_dlsym_lookup_critical_trampoline_(0), size_quick_generic_jni_trampoline_(0), size_quick_imt_conflict_trampoline_(0), size_quick_resolution_trampoline_(0), @@ -2208,6 +2209,7 @@ size_t OatWriter::InitOatCode(size_t offset) { offset += (field)->size(); DO_TRAMPOLINE(jni_dlsym_lookup_trampoline_, JniDlsymLookupTrampoline); + DO_TRAMPOLINE(jni_dlsym_lookup_critical_trampoline_, JniDlsymLookupCriticalTrampoline); DO_TRAMPOLINE(quick_generic_jni_trampoline_, QuickGenericJniTrampoline); DO_TRAMPOLINE(quick_imt_conflict_trampoline_, QuickImtConflictTrampoline); DO_TRAMPOLINE(quick_resolution_trampoline_, QuickResolutionTrampoline); @@ -2216,6 +2218,7 @@ size_t OatWriter::InitOatCode(size_t offset) { #undef DO_TRAMPOLINE } else { oat_header_->SetJniDlsymLookupTrampolineOffset(0); + oat_header_->SetJniDlsymLookupCriticalTrampolineOffset(0); oat_header_->SetQuickGenericJniTrampolineOffset(0); oat_header_->SetQuickImtConflictTrampolineOffset(0); oat_header_->SetQuickResolutionTrampolineOffset(0); @@ -2755,6 +2758,7 @@ bool OatWriter::CheckOatSize(OutputStream* out, size_t file_offset, size_t relat DO_STAT(size_interpreter_to_interpreter_bridge_); DO_STAT(size_interpreter_to_compiled_code_bridge_); DO_STAT(size_jni_dlsym_lookup_trampoline_); + DO_STAT(size_jni_dlsym_lookup_critical_trampoline_); DO_STAT(size_quick_generic_jni_trampoline_); DO_STAT(size_quick_imt_conflict_trampoline_); DO_STAT(size_quick_resolution_trampoline_); @@ -3086,6 +3090,7 @@ size_t OatWriter::WriteCode(OutputStream* out, size_t file_offset, size_t relati } while (false) DO_TRAMPOLINE(jni_dlsym_lookup_trampoline_); + DO_TRAMPOLINE(jni_dlsym_lookup_critical_trampoline_); DO_TRAMPOLINE(quick_generic_jni_trampoline_); DO_TRAMPOLINE(quick_imt_conflict_trampoline_); DO_TRAMPOLINE(quick_resolution_trampoline_); diff --git a/dex2oat/linker/oat_writer.h b/dex2oat/linker/oat_writer.h index 5015ec3156..dd11b872c7 100644 --- a/dex2oat/linker/oat_writer.h +++ b/dex2oat/linker/oat_writer.h @@ -475,6 +475,7 @@ class OatWriter { dchecked_vector oat_class_headers_; dchecked_vector oat_classes_; std::unique_ptr> jni_dlsym_lookup_trampoline_; + std::unique_ptr> jni_dlsym_lookup_critical_trampoline_; std::unique_ptr> quick_generic_jni_trampoline_; std::unique_ptr> quick_imt_conflict_trampoline_; std::unique_ptr> quick_resolution_trampoline_; @@ -495,6 +496,7 @@ class OatWriter { uint32_t size_interpreter_to_interpreter_bridge_; uint32_t size_interpreter_to_compiled_code_bridge_; uint32_t size_jni_dlsym_lookup_trampoline_; + uint32_t size_jni_dlsym_lookup_critical_trampoline_; uint32_t size_quick_generic_jni_trampoline_; uint32_t size_quick_imt_conflict_trampoline_; uint32_t size_quick_resolution_trampoline_; diff --git a/dex2oat/linker/oat_writer_test.cc b/dex2oat/linker/oat_writer_test.cc index 20d87fafde..6d6d11489f 100644 --- a/dex2oat/linker/oat_writer_test.cc +++ b/dex2oat/linker/oat_writer_test.cc @@ -466,7 +466,7 @@ TEST_F(OatTest, WriteRead) { TEST_F(OatTest, OatHeaderSizeCheck) { // If this test is failing and you have to update these constants, // it is time to update OatHeader::kOatVersion - EXPECT_EQ(56U, sizeof(OatHeader)); + EXPECT_EQ(60U, sizeof(OatHeader)); EXPECT_EQ(4U, sizeof(OatMethodOffsets)); EXPECT_EQ(8U, sizeof(OatQuickMethodHeader)); EXPECT_EQ(169 * static_cast(GetInstructionSetPointerSize(kRuntimeISA)), diff --git a/imgdiag/imgdiag.cc b/imgdiag/imgdiag.cc index 10f4015015..421e7d798e 100644 --- a/imgdiag/imgdiag.cc +++ b/imgdiag/imgdiag.cc @@ -853,6 +853,10 @@ class RegionSpecializedBase : public RegionCommon { if (jdl != nullptr) { entry_point_names_[jdl] = "JniDlsymLookupTrampoline (from boot oat file)"; } + const void* jdlc = oat_header.GetJniDlsymLookupCriticalTrampoline(); + if (jdlc != nullptr) { + entry_point_names_[jdlc] = "JniDlsymLookupCriticalTrampoline (from boot oat file)"; + } const void* qgjt = oat_header.GetQuickGenericJniTrampoline(); if (qgjt != nullptr) { entry_point_names_[qgjt] = "QuickGenericJniTrampoline (from boot oat file)"; @@ -897,6 +901,8 @@ class RegionSpecializedBase : public RegionCommon { return "QuickResolutionStub"; } else if (class_linker_->IsJniDlsymLookupStub(addr)) { return "JniDlsymLookupStub"; + } else if (class_linker_->IsJniDlsymLookupCriticalStub(addr)) { + return "JniDlsymLookupCriticalStub"; } // Match the address against those that we saved from the boot OAT files. if (entry_point_names_.find(addr) != entry_point_names_.end()) { diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc index 86b52cb9d5..8d339706d0 100644 --- a/oatdump/oatdump.cc +++ b/oatdump/oatdump.cc @@ -196,6 +196,7 @@ class OatSymbolizer final { method_debug_infos_.push_back(std::move(info)); \ } DO_TRAMPOLINE(JniDlsymLookupTrampoline); + DO_TRAMPOLINE(JniDlsymLookupCriticalTrampoline); DO_TRAMPOLINE(QuickGenericJniTrampoline); DO_TRAMPOLINE(QuickImtConflictTrampoline); DO_TRAMPOLINE(QuickResolutionTrampoline); @@ -447,6 +448,8 @@ class OatDumper { DUMP_OAT_HEADER_OFFSET("EXECUTABLE", GetExecutableOffset); DUMP_OAT_HEADER_OFFSET("JNI DLSYM LOOKUP TRAMPOLINE", GetJniDlsymLookupTrampolineOffset); + DUMP_OAT_HEADER_OFFSET("JNI DLSYM LOOKUP CRITICAL TRAMPOLINE", + GetJniDlsymLookupCriticalTrampolineOffset); DUMP_OAT_HEADER_OFFSET("QUICK GENERIC JNI TRAMPOLINE", GetQuickGenericJniTrampolineOffset); DUMP_OAT_HEADER_OFFSET("QUICK IMT CONFLICT TRAMPOLINE", @@ -2136,7 +2139,8 @@ class ImageDumper { if (class_linker->IsQuickResolutionStub(quick_code) || class_linker->IsQuickToInterpreterBridge(quick_code) || class_linker->IsQuickGenericJniStub(quick_code) || - class_linker->IsJniDlsymLookupStub(quick_code)) { + class_linker->IsJniDlsymLookupStub(quick_code) || + class_linker->IsJniDlsymLookupCriticalStub(quick_code)) { quick_code = oat_dumper_->GetQuickOatCode(m); } if (oat_dumper_->GetInstructionSet() == InstructionSet::kThumb2) { diff --git a/runtime/arch/arm/asm_support_arm.S b/runtime/arch/arm/asm_support_arm.S index 633591d6e6..5b51e51f25 100644 --- a/runtime/arch/arm/asm_support_arm.S +++ b/runtime/arch/arm/asm_support_arm.S @@ -149,4 +149,124 @@ #endif // USE_HEAP_POISONING .endm +// Macro to refresh the Marking Register (R8). +// +// This macro must be called at the end of functions implementing +// entrypoints that possibly (directly or indirectly) perform a +// suspend check (before they return). +.macro REFRESH_MARKING_REGISTER +#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER) + ldr rMR, [rSELF, #THREAD_IS_GC_MARKING_OFFSET] +#endif +.endm + + /* + * Macro that sets up the callee save frame to conform with + * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs), except for storing the method. + */ +.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY + // Note: We could avoid saving R8 in the case of Baker read + // barriers, as it is overwritten by REFRESH_MARKING_REGISTER + // later; but it's not worth handling this special case. + push {r1-r3, r5-r8, r10-r11, lr} @ 10 words of callee saves and args. + .cfi_adjust_cfa_offset 40 + .cfi_rel_offset r1, 0 + .cfi_rel_offset r2, 4 + .cfi_rel_offset r3, 8 + .cfi_rel_offset r5, 12 + .cfi_rel_offset r6, 16 + .cfi_rel_offset r7, 20 + .cfi_rel_offset r8, 24 + .cfi_rel_offset r10, 28 + .cfi_rel_offset r11, 32 + .cfi_rel_offset lr, 36 + vpush {s0-s15} @ 16 words of float args. + .cfi_adjust_cfa_offset 64 + sub sp, #8 @ 2 words of space, alignment padding and Method* + .cfi_adjust_cfa_offset 8 + // Ugly compile-time check, but we only have the preprocessor. +#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 40 + 64 + 8) +#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(ARM) size not as expected." +#endif +.endm + +.macro RESTORE_SAVE_REFS_AND_ARGS_FRAME + add sp, #8 @ rewind sp + .cfi_adjust_cfa_offset -8 + vpop {s0-s15} + .cfi_adjust_cfa_offset -64 + // Note: Likewise, we could avoid restoring R8 in the case of Baker + // read barriers, as it is overwritten by REFRESH_MARKING_REGISTER + // later; but it's not worth handling this special case. + pop {r1-r3, r5-r8, r10-r11, lr} @ 10 words of callee saves and args. + .cfi_restore r1 + .cfi_restore r2 + .cfi_restore r3 + .cfi_restore r5 + .cfi_restore r6 + .cfi_restore r7 + .cfi_restore r8 + .cfi_restore r10 + .cfi_restore r11 + .cfi_restore lr + .cfi_adjust_cfa_offset -40 +.endm + + /* + * Macro to spill the GPRs. + */ +.macro SPILL_ALL_CALLEE_SAVE_GPRS + push {r4-r11, lr} @ 9 words (36 bytes) of callee saves. + .cfi_adjust_cfa_offset 36 + .cfi_rel_offset r4, 0 + .cfi_rel_offset r5, 4 + .cfi_rel_offset r6, 8 + .cfi_rel_offset r7, 12 + .cfi_rel_offset r8, 16 + .cfi_rel_offset r9, 20 + .cfi_rel_offset r10, 24 + .cfi_rel_offset r11, 28 + .cfi_rel_offset lr, 32 +.endm + + /* + * Macro that sets up the callee save frame to conform with + * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves) + */ +.macro SETUP_SAVE_ALL_CALLEE_SAVES_FRAME rTemp + SPILL_ALL_CALLEE_SAVE_GPRS @ 9 words (36 bytes) of callee saves. + vpush {s16-s31} @ 16 words (64 bytes) of floats. + .cfi_adjust_cfa_offset 64 + sub sp, #12 @ 3 words of space, bottom word will hold Method* + .cfi_adjust_cfa_offset 12 + RUNTIME_CURRENT1 \rTemp @ Load Runtime::Current into rTemp. + @ Load kSaveAllCalleeSaves Method* into rTemp. + ldr \rTemp, [\rTemp, #RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET] + str \rTemp, [sp, #0] @ Place Method* at bottom of stack. + str sp, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET] @ Place sp in Thread::Current()->top_quick_frame. + + // Ugly compile-time check, but we only have the preprocessor. +#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 36 + 64 + 12) +#error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(ARM) size not as expected." +#endif +.endm + + /* + * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending + * exception is Thread::Current()->exception_ when the runtime method frame is ready. + */ +.macro DELIVER_PENDING_EXCEPTION_FRAME_READY + mov r0, rSELF @ pass Thread::Current + bl artDeliverPendingExceptionFromCode @ artDeliverPendingExceptionFromCode(Thread*) +.endm + + /* + * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending + * exception is Thread::Current()->exception_. + */ +.macro DELIVER_PENDING_EXCEPTION + SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0 @ save callee saves for throw + DELIVER_PENDING_EXCEPTION_FRAME_READY +.endm + #endif // ART_RUNTIME_ARCH_X86_ASM_SUPPORT_X86_S_ diff --git a/runtime/arch/arm/jni_entrypoints_arm.S b/runtime/arch/arm/jni_entrypoints_arm.S index a0f93cc416..ceef7720f5 100644 --- a/runtime/arch/arm/jni_entrypoints_arm.S +++ b/runtime/arch/arm/jni_entrypoints_arm.S @@ -33,12 +33,13 @@ ENTRY art_jni_dlsym_lookup_stub .cfi_adjust_cfa_offset 12 mov r0, rSELF @ pass Thread::Current() - // Call artFindNativeMethod() for normal native and artFindNativeMethodRunnable for @FastNative. + // Call artFindNativeMethod() for normal native and artFindNativeMethodRunnable() + // for @FastNative or @CriticalNative. ldr ip, [r0, #THREAD_TOP_QUICK_FRAME_OFFSET] // uintptr_t tagged_quick_frame bic ip, #1 // ArtMethod** sp ldr ip, [ip] // ArtMethod* method ldr ip, [ip, #ART_METHOD_ACCESS_FLAGS_OFFSET] // uint32_t access_flags - tst ip, #ACCESS_FLAGS_METHOD_IS_FAST_NATIVE + tst ip, #(ACCESS_FLAGS_METHOD_IS_FAST_NATIVE | ACCESS_FLAGS_METHOD_IS_CRITICAL_NATIVE) bne .Llookup_stub_fast_native blx artFindNativeMethod b .Llookup_stub_continue @@ -61,3 +62,188 @@ ENTRY art_jni_dlsym_lookup_stub 1: pop {r0, r1, r2, r3, pc} @ restore regs and return to caller to handle exception END art_jni_dlsym_lookup_stub + +ENTRY art_jni_dlsym_lookup_critical_stub + // The hidden arg holding the tagged method (bit 0 set means GenericJNI) is r4. + // For Generic JNI we already have a managed frame, so we reuse the art_jni_dlsym_lookup_stub. + tst r4, #1 + bne art_jni_dlsym_lookup_stub + + // We need to create a GenericJNI managed frame above the stack args. + + // GenericJNI frame is similar to SaveRegsAndArgs frame with the native method + // instead of runtime method saved at the bottom. Note that the runtime shall + // not examine the args here, otherwise we would have to move them in registers + // and stack to account for the difference between managed and native ABIs. + SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY + // Save the hidden arg as method pointer, r0 in the padding. + // (x0 is an arg in native ABI but not considered an arg in managed ABI.) + strd r4, r0, [sp] + + // Call artCriticalNativeOutArgsSize(method) + mov r0, r4 // r0 := method (from hidden arg) + bl artCriticalNativeOutArgsSize + + // Check if we have any stack args. + cbnz r0, .Lcritical_has_stack_args + + // Without stack args, the frame is fully constructed. + // Place tagged managed sp in Thread::Current()->top_quick_frame. + mov ip, sp + orr ip, #1 // Tag as GenericJNI frame. + str ip, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET] + + // Call artFindNativeMethodRunnable() + mov r0, rSELF // pass Thread::Current() + bl artFindNativeMethodRunnable + + // Store result in scratch reg. + mov ip, r0 + + // Restore frame. + .cfi_remember_state + ldrd r4, r0, [sp] + RESTORE_SAVE_REFS_AND_ARGS_FRAME + REFRESH_MARKING_REGISTER + + // Check for exception. + cmp ip, #0 + beq .Lcritical_deliver_exception + + // Do the tail call. + bx ip + .cfi_restore_state + .cfi_def_cfa_offset FRAME_SIZE_SAVE_REFS_AND_ARGS + +.Lcritical_has_stack_args: + // Move the out args size to a scratch register. + mov ip, r0 + + // Restore register args as we're about to move stack args. + ldrd r4, r0, [sp] + RESTORE_SAVE_REFS_AND_ARGS_FRAME + + // Reserve space for SaveRefsAndArgs frame. + sub sp, #FRAME_SIZE_SAVE_REFS_AND_ARGS + .cfi_adjust_cfa_offset FRAME_SIZE_SAVE_REFS_AND_ARGS + + // Save arg regs so that we can use them as temporaries. + push {r0-r3} + .cfi_adjust_cfa_offset 16 + + // Move out args. For simplicity include the return address at the end. + add r0, sp, #16 // Destination. + add ip, r0, ip // Destination end. +1: + ldrd r2, r3, [r0, #FRAME_SIZE_SAVE_REFS_AND_ARGS] + strd r2, r3, [r0], #8 + cmp r0, ip + bne 1b + + // Save our LR, load caller's LR and redefine CFI to take ownership of the JNI stub frame. + str lr, [ip, #-__SIZEOF_POINTER__] + mov lr, r3 // The last moved value from the loop above. + .cfi_def_cfa ip, FRAME_SIZE_SAVE_REFS_AND_ARGS + + // Restore arg regs. + pop {r0-r3} // No `.cfi_adjust_cfa_offset`, CFA register is currently ip, not sp. + + // Re-create the SaveRefsAndArgs frame above the args. + strd r4, r0, [ip] // r0 in the padding as before. + add r4, ip, FRAME_SIZE_SAVE_REFS_AND_ARGS - 40 + stmia r4, {r1-r3, r5-r8, r10-r11, lr} @ 10 words of callee saves and args. + .cfi_rel_offset r1, FRAME_SIZE_SAVE_REFS_AND_ARGS - 40 + 0 + .cfi_rel_offset r2, FRAME_SIZE_SAVE_REFS_AND_ARGS - 40 + 4 + .cfi_rel_offset r3, FRAME_SIZE_SAVE_REFS_AND_ARGS - 40 + 8 + .cfi_rel_offset r5, FRAME_SIZE_SAVE_REFS_AND_ARGS - 40 + 12 + .cfi_rel_offset r6, FRAME_SIZE_SAVE_REFS_AND_ARGS - 40 + 16 + .cfi_rel_offset r7, FRAME_SIZE_SAVE_REFS_AND_ARGS - 40 + 20 + .cfi_rel_offset r8, FRAME_SIZE_SAVE_REFS_AND_ARGS - 40 + 24 + .cfi_rel_offset r10, FRAME_SIZE_SAVE_REFS_AND_ARGS - 40 + 28 + .cfi_rel_offset r11, FRAME_SIZE_SAVE_REFS_AND_ARGS - 40 + 32 + .cfi_rel_offset lr, FRAME_SIZE_SAVE_REFS_AND_ARGS - 40 + 36 + vstmdb r4!, {s0-s15} @ 16 words of float args. + + // Move the frame register to a callee-save register. + mov r11, ip + .cfi_def_cfa_register r11 + + // Place tagged managed sp in Thread::Current()->top_quick_frame. + orr ip, r11, #1 // Tag as GenericJNI frame. + str ip, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET] + + // Call artFindNativeMethodRunnable() + mov r0, rSELF // pass Thread::Current() + bl artFindNativeMethodRunnable + + // Store result in scratch reg. + mov ip, r0 + + // Restore the frame. We shall not need the method anymore, so use r4 as scratch register. + mov r4, r11 + .cfi_def_cfa_register r4 + ldr r0, [r4, #4] + add r11, r4, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - 40 - 64) + vldmia r11!, {s0-s15} @ 16 words of float args. + ldmia r11, {r1-r3, r5-r8, r10-r11, lr} @ 10 words of callee saves and args. + .cfi_restore r1 + .cfi_restore r2 + .cfi_restore r3 + .cfi_restore r5 + .cfi_restore r6 + .cfi_restore r7 + .cfi_restore r8 + .cfi_restore r10 + .cfi_restore r11 + .cfi_restore lr + REFRESH_MARKING_REGISTER + + // Check for exception. + cmp ip, #0 + beq 3f + + // Save arg regs so that we can use them as temporaries. + push {r0-r3} // No `.cfi_adjust_cfa_offset`, CFA register is currently r4, not sp. + + // Move stack args to their original place. + mov r0, r4 + add r1, sp, #16 +2: + ldrd r2, r3, [r0, #-8]! + strd r2, r3, [r0, #FRAME_SIZE_SAVE_REFS_AND_ARGS] + cmp r1, r0 + bne 2b + + // Replace original return address with caller's return address. + ldr r1, [r4, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - __SIZEOF_POINTER__)] + str lr, [r4, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - __SIZEOF_POINTER__)] + + // Restore LR and redefine CFI to release ownership of the JNI stub frame. + .cfi_remember_state + mov lr, r1 + .cfi_def_cfa sp, FRAME_SIZE_SAVE_REFS_AND_ARGS + 16 + + // Restore args + pop {r0-r3} + .cfi_adjust_cfa_offset -16 + + // Remove the frame reservation. + add sp, #FRAME_SIZE_SAVE_REFS_AND_ARGS + .cfi_adjust_cfa_offset -FRAME_SIZE_SAVE_REFS_AND_ARGS + + // Do the tail call. + bx ip + .cfi_restore_state + .cfi_def_cfa x4, FRAME_SIZE_SAVE_REFS_AND_ARGS + +3: + // Drop stack args and the SaveRefsAndArgs reservation. + mov sp, r4 + add sp, #FRAME_SIZE_SAVE_REFS_AND_ARGS + .cfi_def_cfa sp, 0 + +.Lcritical_deliver_exception: + // When delivering exception, we check that rSELF was saved but the SaveRefsAndArgs frame does + // not save it, so we cannot use DELIVER_PENDING_EXCEPTION_FRAME_READY with the above frames. + DELIVER_PENDING_EXCEPTION +END art_jni_dlsym_lookup_critical_stub diff --git a/runtime/arch/arm/jni_frame_arm.h b/runtime/arch/arm/jni_frame_arm.h new file mode 100644 index 0000000000..5203eafe16 --- /dev/null +++ b/runtime/arch/arm/jni_frame_arm.h @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2020 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_RUNTIME_ARCH_ARM_JNI_FRAME_ARM_H_ +#define ART_RUNTIME_ARCH_ARM_JNI_FRAME_ARM_H_ + +#include + +#include "arch/instruction_set.h" +#include "base/bit_utils.h" +#include "base/globals.h" +#include "base/logging.h" + +namespace art { +namespace arm { + +constexpr size_t kFramePointerSize = static_cast(PointerSize::k32); +static_assert(kArmPointerSize == PointerSize::k32, "Unexpected ARM pointer size"); + +// The AAPCS requires 8-byte alignement. This is not as strict as the Managed ABI stack alignment. +static constexpr size_t kAapcsStackAlignment = 8u; +static_assert(kAapcsStackAlignment < kStackAlignment); + +// How many registers can be used for passing arguments. +// Note: AAPCS is soft-float, so these are all core registers. +constexpr size_t kJniArgumentRegisterCount = 4u; + +// Get the size of "out args" for @CriticalNative method stub. +// This must match the size of the frame emitted by the JNI compiler at the native call site. +inline size_t GetCriticalNativeOutArgsSize(const char* shorty, uint32_t shorty_len) { + DCHECK_EQ(shorty_len, strlen(shorty)); + + size_t reg = 0; // Register for the current argument; if reg >= 4, we shall use stack. + for (size_t i = 1; i != shorty_len; ++i) { + if (shorty[i] == 'J' || shorty[i] == 'D') { + // 8-byte args need to start in even-numbered register or at aligned stack position. + reg += (reg & 1); + // Count first word and let the common path count the second. + reg += 1u; + } + reg += 1u; + } + size_t stack_args = std::max(reg, kJniArgumentRegisterCount) - kJniArgumentRegisterCount; + size_t size = kFramePointerSize * stack_args; + + // Check if this is a tail call, i.e. there are no stack args and the return type + // is not an FP type (otherwise we need to move the result to FP register). + // No need to sign/zero extend small return types thanks to AAPCS. + if (size != 0u || shorty[0] == 'F' || shorty[0] == 'D') { + size += kFramePointerSize; // We need to spill LR with the args. + } + return RoundUp(size, kAapcsStackAlignment); +} + +} // namespace arm +} // namespace art + +#endif // ART_RUNTIME_ARCH_ARM_JNI_FRAME_ARM_H_ + diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index 9eee3459f9..f94694d393 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -24,45 +24,6 @@ /* Deliver an exception pending on a thread */ .extern artDeliverPendingException - /* - * Macro to spill the GPRs. - */ -.macro SPILL_ALL_CALLEE_SAVE_GPRS - push {r4-r11, lr} @ 9 words (36 bytes) of callee saves. - .cfi_adjust_cfa_offset 36 - .cfi_rel_offset r4, 0 - .cfi_rel_offset r5, 4 - .cfi_rel_offset r6, 8 - .cfi_rel_offset r7, 12 - .cfi_rel_offset r8, 16 - .cfi_rel_offset r9, 20 - .cfi_rel_offset r10, 24 - .cfi_rel_offset r11, 28 - .cfi_rel_offset lr, 32 -.endm - - /* - * Macro that sets up the callee save frame to conform with - * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves) - */ -.macro SETUP_SAVE_ALL_CALLEE_SAVES_FRAME rTemp - SPILL_ALL_CALLEE_SAVE_GPRS @ 9 words (36 bytes) of callee saves. - vpush {s16-s31} @ 16 words (64 bytes) of floats. - .cfi_adjust_cfa_offset 64 - sub sp, #12 @ 3 words of space, bottom word will hold Method* - .cfi_adjust_cfa_offset 12 - RUNTIME_CURRENT1 \rTemp @ Load Runtime::Current into rTemp. - @ Load kSaveAllCalleeSaves Method* into rTemp. - ldr \rTemp, [\rTemp, #RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET] - str \rTemp, [sp, #0] @ Place Method* at bottom of stack. - str sp, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET] @ Place sp in Thread::Current()->top_quick_frame. - - // Ugly compile-time check, but we only have the preprocessor. -#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 36 + 64 + 12) -#error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(ARM) size not as expected." -#endif -.endm - /* * Macro that sets up the callee save frame to conform with * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly). @@ -111,36 +72,6 @@ .cfi_adjust_cfa_offset -28 .endm - /* - * Macro that sets up the callee save frame to conform with - * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs). - */ -.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY - // Note: We could avoid saving R8 in the case of Baker read - // barriers, as it is overwritten by REFRESH_MARKING_REGISTER - // later; but it's not worth handling this special case. - push {r1-r3, r5-r8, r10-r11, lr} @ 10 words of callee saves and args. - .cfi_adjust_cfa_offset 40 - .cfi_rel_offset r1, 0 - .cfi_rel_offset r2, 4 - .cfi_rel_offset r3, 8 - .cfi_rel_offset r5, 12 - .cfi_rel_offset r6, 16 - .cfi_rel_offset r7, 20 - .cfi_rel_offset r8, 24 - .cfi_rel_offset r10, 28 - .cfi_rel_offset r11, 32 - .cfi_rel_offset lr, 36 - vpush {s0-s15} @ 16 words of float args. - .cfi_adjust_cfa_offset 64 - sub sp, #8 @ 2 words of space, alignment padding and Method* - .cfi_adjust_cfa_offset 8 - // Ugly compile-time check, but we only have the preprocessor. -#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 40 + 64 + 8) -#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(ARM) size not as expected." -#endif -.endm - .macro SETUP_SAVE_REFS_AND_ARGS_FRAME rTemp SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY RUNTIME_CURRENT3 \rTemp @ Load Runtime::Current into rTemp. @@ -156,28 +87,6 @@ str sp, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET] @ Place sp in Thread::Current()->top_quick_frame. .endm -.macro RESTORE_SAVE_REFS_AND_ARGS_FRAME - add sp, #8 @ rewind sp - .cfi_adjust_cfa_offset -8 - vpop {s0-s15} - .cfi_adjust_cfa_offset -64 - // Note: Likewise, we could avoid restoring X20 in the case of Baker - // read barriers, as it is overwritten by REFRESH_MARKING_REGISTER - // later; but it's not worth handling this special case. - pop {r1-r3, r5-r8, r10-r11, lr} @ 10 words of callee saves - .cfi_restore r1 - .cfi_restore r2 - .cfi_restore r3 - .cfi_restore r5 - .cfi_restore r6 - .cfi_restore r7 - .cfi_restore r8 - .cfi_restore r10 - .cfi_restore r11 - .cfi_restore lr - .cfi_adjust_cfa_offset -40 -.endm - /* * Macro that sets up the callee save frame to conform with * Runtime::CreateCalleeSaveMethod(kSaveEverything) @@ -273,17 +182,6 @@ .cfi_adjust_cfa_offset -52 .endm -// Macro to refresh the Marking Register (R8). -// -// This macro must be called at the end of functions implementing -// entrypoints that possibly (directly or indirectly) perform a -// suspend check (before they return). -.macro REFRESH_MARKING_REGISTER -#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER) - ldr rMR, [rSELF, #THREAD_IS_GC_MARKING_OFFSET] -#endif -.endm - .macro RETURN_IF_RESULT_IS_ZERO cbnz r0, 1f @ result non-zero branch over bx lr @ return @@ -296,24 +194,6 @@ 1: .endm - /* - * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending - * exception is Thread::Current()->exception_ when the runtime method frame is ready. - */ -.macro DELIVER_PENDING_EXCEPTION_FRAME_READY - mov r0, rSELF @ pass Thread::Current - bl artDeliverPendingExceptionFromCode @ artDeliverPendingExceptionFromCode(Thread*) -.endm - - /* - * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending - * exception is Thread::Current()->exception_. - */ -.macro DELIVER_PENDING_EXCEPTION - SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0 @ save callee saves for throw - DELIVER_PENDING_EXCEPTION_FRAME_READY -.endm - .macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name .extern \cxx_name ENTRY \c_name diff --git a/runtime/arch/arm64/asm_support_arm64.S b/runtime/arch/arm64/asm_support_arm64.S index a3cf6f08b3..b1e5c864ce 100644 --- a/runtime/arch/arm64/asm_support_arm64.S +++ b/runtime/arch/arm64/asm_support_arm64.S @@ -103,18 +103,26 @@ .cfi_restore \reg .endm -.macro SAVE_TWO_REGS reg1, reg2, offset - stp \reg1, \reg2, [sp, #(\offset)] +.macro SAVE_TWO_REGS_BASE base, reg1, reg2, offset + stp \reg1, \reg2, [\base, #(\offset)] .cfi_rel_offset \reg1, (\offset) .cfi_rel_offset \reg2, (\offset) + 8 .endm -.macro RESTORE_TWO_REGS reg1, reg2, offset - ldp \reg1, \reg2, [sp, #(\offset)] +.macro SAVE_TWO_REGS reg1, reg2, offset + SAVE_TWO_REGS_BASE sp, \reg1, \reg2, \offset +.endm + +.macro RESTORE_TWO_REGS_BASE base, reg1, reg2, offset + ldp \reg1, \reg2, [\base, #(\offset)] .cfi_restore \reg1 .cfi_restore \reg2 .endm +.macro RESTORE_TWO_REGS reg1, reg2, offset + RESTORE_TWO_REGS_BASE sp, \reg1, \reg2, \offset +.endm + .macro LOAD_RUNTIME_INSTANCE reg #if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10 adrp xIP0, :pg_hi21_nc:_ZN3art7Runtime9instance_E @@ -190,6 +198,71 @@ DECREASE_FRAME 96 .endm +.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL base + // Ugly compile-time check, but we only have the preprocessor. +#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 224) +#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(ARM64) size not as expected." +#endif + + // Stack alignment filler [\base, #8]. + // FP args. + stp d0, d1, [\base, #16] + stp d2, d3, [\base, #32] + stp d4, d5, [\base, #48] + stp d6, d7, [\base, #64] + + // Core args. + SAVE_TWO_REGS_BASE \base, x1, x2, 80 + SAVE_TWO_REGS_BASE \base, x3, x4, 96 + SAVE_TWO_REGS_BASE \base, x5, x6, 112 + + // x7, Callee-saves. + // Note: We could avoid saving X20 in the case of Baker read + // barriers, as it is overwritten by REFRESH_MARKING_REGISTER + // later; but it's not worth handling this special case. + SAVE_TWO_REGS_BASE \base, x7, x20, 128 + SAVE_TWO_REGS_BASE \base, x21, x22, 144 + SAVE_TWO_REGS_BASE \base, x23, x24, 160 + SAVE_TWO_REGS_BASE \base, x25, x26, 176 + SAVE_TWO_REGS_BASE \base, x27, x28, 192 + + // x29(callee-save) and LR. + SAVE_TWO_REGS_BASE \base, x29, xLR, 208 +.endm + +// TODO: Probably no need to restore registers preserved by aapcs64. (That would require +// auditing all users to make sure they restore aapcs64 callee-save registers they clobber.) +.macro RESTORE_SAVE_REFS_AND_ARGS_FRAME_INTERNAL base + // FP args. + ldp d0, d1, [\base, #16] + ldp d2, d3, [\base, #32] + ldp d4, d5, [\base, #48] + ldp d6, d7, [\base, #64] + + // Core args. + RESTORE_TWO_REGS_BASE \base, x1, x2, 80 + RESTORE_TWO_REGS_BASE \base, x3, x4, 96 + RESTORE_TWO_REGS_BASE \base, x5, x6, 112 + + // x7, Callee-saves. + // Note: Likewise, we could avoid restoring X20 in the case of Baker + // read barriers, as it is overwritten by REFRESH_MARKING_REGISTER + // later; but it's not worth handling this special case. + RESTORE_TWO_REGS_BASE \base, x7, x20, 128 + RESTORE_TWO_REGS_BASE \base, x21, x22, 144 + RESTORE_TWO_REGS_BASE \base, x23, x24, 160 + RESTORE_TWO_REGS_BASE \base, x25, x26, 176 + RESTORE_TWO_REGS_BASE \base, x27, x28, 192 + + // x29(callee-save) and LR. + RESTORE_TWO_REGS_BASE \base, x29, xLR, 208 +.endm + +.macro RESTORE_SAVE_REFS_AND_ARGS_FRAME + RESTORE_SAVE_REFS_AND_ARGS_FRAME_INTERNAL sp + DECREASE_FRAME FRAME_SIZE_SAVE_REFS_AND_ARGS +.endm + /* * Macro that sets up the callee save frame to conform with * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves) diff --git a/runtime/arch/arm64/jni_entrypoints_arm64.S b/runtime/arch/arm64/jni_entrypoints_arm64.S index e645799c41..8a34662645 100644 --- a/runtime/arch/arm64/jni_entrypoints_arm64.S +++ b/runtime/arch/arm64/jni_entrypoints_arm64.S @@ -23,69 +23,210 @@ .extern artFindNativeMethodRunnable ENTRY art_jni_dlsym_lookup_stub - // spill regs. - stp x29, x30, [sp, #-16]! - .cfi_adjust_cfa_offset 16 - .cfi_rel_offset x29, 0 - .cfi_rel_offset x30, 8 - mov x29, sp - stp d6, d7, [sp, #-16]! - .cfi_adjust_cfa_offset 16 - stp d4, d5, [sp, #-16]! - .cfi_adjust_cfa_offset 16 - stp d2, d3, [sp, #-16]! - .cfi_adjust_cfa_offset 16 - stp d0, d1, [sp, #-16]! - .cfi_adjust_cfa_offset 16 - stp x6, x7, [sp, #-16]! - .cfi_adjust_cfa_offset 16 - stp x4, x5, [sp, #-16]! - .cfi_adjust_cfa_offset 16 - stp x2, x3, [sp, #-16]! - .cfi_adjust_cfa_offset 16 - stp x0, x1, [sp, #-16]! - .cfi_adjust_cfa_offset 16 - - mov x0, xSELF // pass Thread::Current() - // Call artFindNativeMethod() for normal native and artFindNativeMethodRunnable for @FastNative. - ldr xIP0, [x0, #THREAD_TOP_QUICK_FRAME_OFFSET] // uintptr_t tagged_quick_frame - bic xIP0, xIP0, #1 // ArtMethod** sp - ldr xIP0, [xIP0] // ArtMethod* method - ldr xIP0, [xIP0, #ART_METHOD_ACCESS_FLAGS_OFFSET] // uint32_t access_flags - tst xIP0, #ACCESS_FLAGS_METHOD_IS_FAST_NATIVE - b.ne .Llookup_stub_fast_native - bl artFindNativeMethod - b .Llookup_stub_continue -.Llookup_stub_fast_native: - bl artFindNativeMethodRunnable + // spill regs. + stp x29, x30, [sp, #-16]! + .cfi_adjust_cfa_offset 16 + .cfi_rel_offset x29, 0 + .cfi_rel_offset x30, 8 + mov x29, sp + stp d6, d7, [sp, #-16]! + .cfi_adjust_cfa_offset 16 + stp d4, d5, [sp, #-16]! + .cfi_adjust_cfa_offset 16 + stp d2, d3, [sp, #-16]! + .cfi_adjust_cfa_offset 16 + stp d0, d1, [sp, #-16]! + .cfi_adjust_cfa_offset 16 + stp x6, x7, [sp, #-16]! + .cfi_adjust_cfa_offset 16 + stp x4, x5, [sp, #-16]! + .cfi_adjust_cfa_offset 16 + stp x2, x3, [sp, #-16]! + .cfi_adjust_cfa_offset 16 + stp x0, x1, [sp, #-16]! + .cfi_adjust_cfa_offset 16 + + mov x0, xSELF // pass Thread::Current() + // Call artFindNativeMethod() for normal native and artFindNativeMethodRunnable() + // for @FastNative or @CriticalNative. + ldr xIP0, [x0, #THREAD_TOP_QUICK_FRAME_OFFSET] // uintptr_t tagged_quick_frame + bic xIP0, xIP0, #1 // ArtMethod** sp + ldr xIP0, [xIP0] // ArtMethod* method + ldr xIP0, [xIP0, #ART_METHOD_ACCESS_FLAGS_OFFSET] // uint32_t access_flags + mov xIP1, #(ACCESS_FLAGS_METHOD_IS_FAST_NATIVE | ACCESS_FLAGS_METHOD_IS_CRITICAL_NATIVE) + tst xIP0, xIP1 + b.ne .Llookup_stub_fast_native + bl artFindNativeMethod + b .Llookup_stub_continue + .Llookup_stub_fast_native: + bl artFindNativeMethodRunnable .Llookup_stub_continue: - mov x17, x0 // store result in scratch reg. - - // load spill regs. - ldp x0, x1, [sp], #16 - .cfi_adjust_cfa_offset -16 - ldp x2, x3, [sp], #16 - .cfi_adjust_cfa_offset -16 - ldp x4, x5, [sp], #16 - .cfi_adjust_cfa_offset -16 - ldp x6, x7, [sp], #16 - .cfi_adjust_cfa_offset -16 - ldp d0, d1, [sp], #16 - .cfi_adjust_cfa_offset -16 - ldp d2, d3, [sp], #16 - .cfi_adjust_cfa_offset -16 - ldp d4, d5, [sp], #16 - .cfi_adjust_cfa_offset -16 - ldp d6, d7, [sp], #16 - .cfi_adjust_cfa_offset -16 - ldp x29, x30, [sp], #16 - .cfi_adjust_cfa_offset -16 - .cfi_restore x29 - .cfi_restore x30 - - cbz x17, 1f // is method code null ? - br x17 // if non-null, tail call to method's code. + mov x17, x0 // store result in scratch reg. + + // load spill regs. + ldp x0, x1, [sp], #16 + .cfi_adjust_cfa_offset -16 + ldp x2, x3, [sp], #16 + .cfi_adjust_cfa_offset -16 + ldp x4, x5, [sp], #16 + .cfi_adjust_cfa_offset -16 + ldp x6, x7, [sp], #16 + .cfi_adjust_cfa_offset -16 + ldp d0, d1, [sp], #16 + .cfi_adjust_cfa_offset -16 + ldp d2, d3, [sp], #16 + .cfi_adjust_cfa_offset -16 + ldp d4, d5, [sp], #16 + .cfi_adjust_cfa_offset -16 + ldp d6, d7, [sp], #16 + .cfi_adjust_cfa_offset -16 + ldp x29, x30, [sp], #16 + .cfi_adjust_cfa_offset -16 + .cfi_restore x29 + .cfi_restore x30 + + cbz x17, 1f // is method code null ? + br x17 // if non-null, tail call to method's code. 1: - ret // restore regs and return to caller to handle exception. + ret // restore regs and return to caller to handle exception. END art_jni_dlsym_lookup_stub + +ENTRY art_jni_dlsym_lookup_critical_stub + // The hidden arg holding the tagged method (bit 0 set means GenericJNI) is x15. + // For Generic JNI we already have a managed frame, so we reuse the art_jni_dlsym_lookup_stub. + tbnz x15, #0, art_jni_dlsym_lookup_stub + + // We need to create a GenericJNI managed frame above the stack args. + + // GenericJNI frame is similar to SaveRegsAndArgs frame with the native method + // instead of runtime method saved at the bottom. Note that the runtime shall + // not examine the args here, otherwise we would have to move them in registers + // and stack to account for the difference between managed and native ABIs. + INCREASE_FRAME FRAME_SIZE_SAVE_REFS_AND_ARGS + SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL sp + // Save the hidden arg as method pointer, x0 in the padding. + // (x0 is an arg in native ABI but not considered an arg in managed ABI.) + SAVE_TWO_REGS x15, x0, 0 + + // Call artCriticalNativeOutArgsSize(method) + mov x0, x15 // x0 := method (from hidden arg) + bl artCriticalNativeOutArgsSize + + // Check if we have any stack args. + cbnz x0, .Lcritical_has_stack_args + + // Without stack args, the frame is fully constructed. + // Place tagged managed sp in Thread::Current()->top_quick_frame. + mov xIP0, sp + orr xIP0, xIP0, #1 // Tag as GenericJNI frame. + str xIP0, [xSELF, #THREAD_TOP_QUICK_FRAME_OFFSET] + + // Call artFindNativeMethodRunnable() + mov x0, xSELF // pass Thread::Current() + bl artFindNativeMethodRunnable + + // Store result in scratch reg. + mov xIP0, x0 + + // Restore frame. + .cfi_remember_state + RESTORE_TWO_REGS x15, x0, 0 + RESTORE_SAVE_REFS_AND_ARGS_FRAME + REFRESH_MARKING_REGISTER + + // Check for exception. + cbz xIP0, .Lcritical_deliver_exception + + // Do the tail call + br xIP0 + .cfi_restore_state + .cfi_def_cfa_offset FRAME_SIZE_SAVE_REFS_AND_ARGS + +.Lcritical_has_stack_args: + // Move the out args size to a scratch register. + mov xIP0, x0 + + // Restore register args as we're about to move stack args. + RESTORE_TWO_REGS x15, x0, 0 + RESTORE_SAVE_REFS_AND_ARGS_FRAME_INTERNAL sp + + // Move out args. For simplicity include the return address at the end. + mov x8, sp // Destination. + add x9, sp, xIP0 // Destination end. +1: + ldp x10, x11, [x8, #FRAME_SIZE_SAVE_REFS_AND_ARGS] + stp x10, x11, [x8], #16 + cmp x8, x9 + bne 1b + + // Save our LR, load caller's LR and redefine CFI to take ownership of the JNI stub frame. + str xLR, [x9, #-__SIZEOF_POINTER__] + mov xLR, x11 // The last moved value from the loop above. + .cfi_def_cfa x9, FRAME_SIZE_SAVE_REFS_AND_ARGS + + // Re-create the SaveRefsAndArgs frame above the args. + SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL x9 + SAVE_TWO_REGS_BASE x9, x15, x0, 0 + + // Move the frame register to a callee-save register. + mov x29, x9 + .cfi_def_cfa_register x29 + + // Place tagged managed sp in Thread::Current()->top_quick_frame. + orr xIP0, x29, #1 // Tag as GenericJNI frame. + str xIP0, [xSELF, #THREAD_TOP_QUICK_FRAME_OFFSET] + + // Call artFindNativeMethodRunnable() + mov x0, xSELF // pass Thread::Current() + bl artFindNativeMethodRunnable + + // Store result in scratch reg. + mov xIP0, x0 + + // Restore the frame. + mov x9, x29 + .cfi_def_cfa_register x9 + RESTORE_TWO_REGS_BASE x9, x15, x0, 0 + RESTORE_SAVE_REFS_AND_ARGS_FRAME_INTERNAL x9 + REFRESH_MARKING_REGISTER + + // Check for exception. + cbz xIP0, 3f + + // Move stack args to their original place. + mov x8, x9 +2: + ldp x10, x11, [x8, #-16]! + stp x10, x11, [x8, #FRAME_SIZE_SAVE_REFS_AND_ARGS] + cmp sp, x8 + bne 2b + + // Replace original return address with caller's return address. + ldr xIP1, [x9, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - __SIZEOF_POINTER__)] + str xLR, [x9, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - __SIZEOF_POINTER__)] + + // Restore LR and redefine CFI to release ownership of the JNI stub frame. + .cfi_remember_state + mov xLR, xIP1 + .cfi_def_cfa sp, FRAME_SIZE_SAVE_REFS_AND_ARGS + + // Remove the frame reservation. + DECREASE_FRAME FRAME_SIZE_SAVE_REFS_AND_ARGS + + // Do the tail call. + br xIP0 + .cfi_restore_state + .cfi_def_cfa x9, FRAME_SIZE_SAVE_REFS_AND_ARGS + +3: + // Drop stack args and the SaveRefsAndArgs reservation. + mov sp, x9 + add sp, sp, #FRAME_SIZE_SAVE_REFS_AND_ARGS + .cfi_def_cfa sp, 0 + +.Lcritical_deliver_exception: + // When delivering exception, we check that xSELF was saved but the SaveRefsAndArgs frame does + // not save it, so we cannot use DELIVER_PENDING_EXCEPTION_FRAME_READY with the above frames. + DELIVER_PENDING_EXCEPTION +END art_jni_dlsym_lookup_critical_stub diff --git a/runtime/arch/arm64/jni_frame_arm64.h b/runtime/arch/arm64/jni_frame_arm64.h new file mode 100644 index 0000000000..fa4d43c171 --- /dev/null +++ b/runtime/arch/arm64/jni_frame_arm64.h @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2020 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_RUNTIME_ARCH_ARM64_JNI_FRAME_ARM64_H_ +#define ART_RUNTIME_ARCH_ARM64_JNI_FRAME_ARM64_H_ + +#include + +#include "arch/instruction_set.h" +#include "base/bit_utils.h" +#include "base/globals.h" +#include "base/logging.h" + +namespace art { +namespace arm64 { + +constexpr size_t kFramePointerSize = static_cast(PointerSize::k64); +static_assert(kArm64PointerSize == PointerSize::k64, "Unexpected ARM64 pointer size"); + +// The AAPCS64 requires 16-byte alignement. This is the same as the Managed ABI stack alignment. +static constexpr size_t kAapcs64StackAlignment = 16u; +static_assert(kAapcs64StackAlignment == kStackAlignment); + +// Up to how many float-like (float, double) args can be in registers. +// The rest of the args must go on the stack. +constexpr size_t kMaxFloatOrDoubleRegisterArguments = 8u; +// Up to how many integer-like (pointers, objects, longs, int, short, bool, etc) args can be +// in registers. The rest of the args must go on the stack. +constexpr size_t kMaxIntLikeRegisterArguments = 8u; + +// Get the size of "out args" for @CriticalNative method stub. +// This must match the size of the frame emitted by the JNI compiler at the native call site. +inline size_t GetCriticalNativeOutArgsSize(const char* shorty, uint32_t shorty_len) { + DCHECK_EQ(shorty_len, strlen(shorty)); + + size_t num_fp_args = 0u; + for (size_t i = 1; i != shorty_len; ++i) { + if (shorty[i] == 'F' || shorty[i] == 'D') { + num_fp_args += 1u; + } + } + size_t num_non_fp_args = shorty_len - 1u - num_fp_args; + + // Account for FP arguments passed through v0-v7. + size_t num_stack_fp_args = + num_fp_args - std::min(kMaxFloatOrDoubleRegisterArguments, num_fp_args); + // Account for other (integer and pointer) arguments passed through GPR (x0-x7). + size_t num_stack_non_fp_args = + num_non_fp_args - std::min(kMaxIntLikeRegisterArguments, num_non_fp_args); + // The size of outgoing arguments. + size_t size = + (num_stack_fp_args + num_stack_non_fp_args) * static_cast(kArm64PointerSize); + + // We can make a tail call if there are no stack args and we do not need + // to extend the result. Otherwise, add space for return PC. + if (size != 0u || shorty[0] == 'B' || shorty[0] == 'C' || shorty[0] == 'S' || shorty[0] == 'Z') { + size += kFramePointerSize; // We need to spill LR with the args. + } + return RoundUp(size, kAapcs64StackAlignment); +} + +} // namespace arm64 +} // namespace art + +#endif // ART_RUNTIME_ARCH_ARM64_JNI_FRAME_ARM64_H_ + diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index 7260700b86..634c762040 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -49,42 +49,6 @@ DECREASE_FRAME 96 .endm - -.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL - INCREASE_FRAME 224 - - // Ugly compile-time check, but we only have the preprocessor. -#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 224) -#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(ARM64) size not as expected." -#endif - - // Stack alignment filler [sp, #8]. - // FP args. - stp d0, d1, [sp, #16] - stp d2, d3, [sp, #32] - stp d4, d5, [sp, #48] - stp d6, d7, [sp, #64] - - // Core args. - SAVE_TWO_REGS x1, x2, 80 - SAVE_TWO_REGS x3, x4, 96 - SAVE_TWO_REGS x5, x6, 112 - - // x7, Callee-saves. - // Note: We could avoid saving X20 in the case of Baker read - // barriers, as it is overwritten by REFRESH_MARKING_REGISTER - // later; but it's not worth handling this special case. - SAVE_TWO_REGS x7, x20, 128 - SAVE_TWO_REGS x21, x22, 144 - SAVE_TWO_REGS x23, x24, 160 - SAVE_TWO_REGS x25, x26, 176 - SAVE_TWO_REGS x27, x28, 192 - - // x29(callee-save) and LR. - SAVE_TWO_REGS x29, xLR, 208 - -.endm - /* * Macro that sets up the callee save frame to conform with * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs). @@ -99,7 +63,8 @@ // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveRefAndArgs]; ldr xIP0, [xIP0, RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET] - SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL + INCREASE_FRAME FRAME_SIZE_SAVE_REFS_AND_ARGS + SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL sp str xIP0, [sp] // Store ArtMethod* Runtime::callee_save_methods_[kSaveRefsAndArgs]. // Place sp in Thread::Current()->top_quick_frame. @@ -108,42 +73,14 @@ .endm .macro SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0 - SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL + INCREASE_FRAME FRAME_SIZE_SAVE_REFS_AND_ARGS + SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL sp str x0, [sp, #0] // Store ArtMethod* to bottom of stack. // Place sp in Thread::Current()->top_quick_frame. mov xIP0, sp str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET] .endm -// TODO: Probably no need to restore registers preserved by aapcs64. -.macro RESTORE_SAVE_REFS_AND_ARGS_FRAME - // FP args. - ldp d0, d1, [sp, #16] - ldp d2, d3, [sp, #32] - ldp d4, d5, [sp, #48] - ldp d6, d7, [sp, #64] - - // Core args. - RESTORE_TWO_REGS x1, x2, 80 - RESTORE_TWO_REGS x3, x4, 96 - RESTORE_TWO_REGS x5, x6, 112 - - // x7, Callee-saves. - // Note: Likewise, we could avoid restoring X20 in the case of Baker - // read barriers, as it is overwritten by REFRESH_MARKING_REGISTER - // later; but it's not worth handling this special case. - RESTORE_TWO_REGS x7, x20, 128 - RESTORE_TWO_REGS x21, x22, 144 - RESTORE_TWO_REGS x23, x24, 160 - RESTORE_TWO_REGS x25, x26, 176 - RESTORE_TWO_REGS x27, x28, 192 - - // x29(callee-save) and LR. - RESTORE_TWO_REGS x29, xLR, 208 - - DECREASE_FRAME 224 -.endm - /* * Macro that sets up the callee save frame to conform with * Runtime::CreateCalleeSaveMethod(kSaveEverything) diff --git a/runtime/arch/x86/asm_support_x86.S b/runtime/arch/x86/asm_support_x86.S index 8f43cc8da7..8938d8b640 100644 --- a/runtime/arch/x86/asm_support_x86.S +++ b/runtime/arch/x86/asm_support_x86.S @@ -94,7 +94,7 @@ #define CFI_RESTORE(reg) #define CFI_REL_OFFSET(reg,size) #define CFI_REMEMBER_STATE - #define CFI_RESTORE_STATE_AND_DEF_CFA(off) + #define CFI_RESTORE_STATE_AND_DEF_CFA(reg,off) #define CFI_ESCAPE(...) #endif @@ -156,6 +156,18 @@ MACRO1(POP, reg) CFI_RESTORE(REG_VAR(reg)) END_MACRO +// Arguments do not need .cfi_rel_offset as they are caller-saved and +// therefore cannot hold caller's variables or unwinding data. +MACRO1(PUSH_ARG, reg) + pushl REG_VAR(reg) + CFI_ADJUST_CFA_OFFSET(4) +END_MACRO + +MACRO1(POP_ARG, reg) + popl REG_VAR(reg) + CFI_ADJUST_CFA_OFFSET(-4) +END_MACRO + MACRO1(CFI_RESTORE_REG, reg) CFI_RESTORE(REG_VAR(reg)) END_MACRO @@ -199,5 +211,64 @@ MACRO1(UNPOISON_HEAP_REF, rRef) #endif // USE_HEAP_POISONING END_MACRO + /* + * Macro that sets up the callee save frame to conform with + * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs), except for pushing the method + */ +MACRO0(SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY) + PUSH edi // Save callee saves + PUSH esi + PUSH ebp + PUSH_ARG ebx // Save args. + PUSH_ARG edx + PUSH_ARG ecx + // Create space for FPR args. + subl MACRO_LITERAL(4 * 8), %esp + CFI_ADJUST_CFA_OFFSET(4 * 8) + // Save FPRs. + movsd %xmm0, 0(%esp) + movsd %xmm1, 8(%esp) + movsd %xmm2, 16(%esp) + movsd %xmm3, 24(%esp) + + // Ugly compile-time check, but we only have the preprocessor. + // First +4: implicit return address pushed on stack when caller made call. + // Last +4: we're not pushing the method on the stack here. +#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 4 + 6*4 + 4*8 + 4) +#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(X86) size not as expected." +#endif +END_MACRO + +MACRO0(RESTORE_SAVE_REFS_AND_ARGS_FRAME) + // Restore FPRs. EAX is still on the stack. + movsd 4(%esp), %xmm0 + movsd 12(%esp), %xmm1 + movsd 20(%esp), %xmm2 + movsd 28(%esp), %xmm3 + + addl MACRO_LITERAL(36), %esp // Remove FPRs and method pointer. + CFI_ADJUST_CFA_OFFSET(-36) + + POP_ARG ecx // Restore args + POP_ARG edx + POP_ARG ebx + POP ebp // Restore callee saves + POP esi + POP edi +END_MACRO + + /* + * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending + * exception is Thread::Current()->exception_ when the runtime method frame is ready. + */ +MACRO0(DELIVER_PENDING_EXCEPTION_FRAME_READY) + // Outgoing argument set up + subl MACRO_LITERAL(12), %esp // alignment padding + CFI_ADJUST_CFA_OFFSET(12) + pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current() + CFI_ADJUST_CFA_OFFSET(4) + call SYMBOL(artDeliverPendingExceptionFromCode) // artDeliverPendingExceptionFromCode(Thread*) + UNREACHABLE +END_MACRO #endif // ART_RUNTIME_ARCH_X86_ASM_SUPPORT_X86_S_ diff --git a/runtime/arch/x86/jni_entrypoints_x86.S b/runtime/arch/x86/jni_entrypoints_x86.S index 4862e9936d..086e96f3cd 100644 --- a/runtime/arch/x86/jni_entrypoints_x86.S +++ b/runtime/arch/x86/jni_entrypoints_x86.S @@ -24,12 +24,14 @@ DEFINE_FUNCTION art_jni_dlsym_lookup_stub CFI_ADJUST_CFA_OFFSET(8) pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current() CFI_ADJUST_CFA_OFFSET(4) - // Call artFindNativeMethod() for normal native and artFindNativeMethodRunnable for @FastNative. + // Call artFindNativeMethod() for normal native and artFindNativeMethodRunnable() + // for @FastNative or @CriticalNative. movl (%esp), %eax // Thread* self movl THREAD_TOP_QUICK_FRAME_OFFSET(%eax), %eax // uintptr_t tagged_quick_frame andl LITERAL(0xfffffffe), %eax // ArtMethod** sp movl (%eax), %eax // ArtMethod* method - testl LITERAL(ACCESS_FLAGS_METHOD_IS_FAST_NATIVE), ART_METHOD_ACCESS_FLAGS_OFFSET(%eax) + testl LITERAL(ACCESS_FLAGS_METHOD_IS_FAST_NATIVE | ACCESS_FLAGS_METHOD_IS_CRITICAL_NATIVE), \ + ART_METHOD_ACCESS_FLAGS_OFFSET(%eax) jne .Llookup_stub_fast_native call SYMBOL(artFindNativeMethod) // (Thread*) jmp .Llookup_stub_continue @@ -44,3 +46,178 @@ DEFINE_FUNCTION art_jni_dlsym_lookup_stub .Lno_native_code_found: ret END_FUNCTION art_jni_dlsym_lookup_stub + +DEFINE_FUNCTION art_jni_dlsym_lookup_critical_stub + // The hidden arg holding the tagged method (bit 0 set means GenericJNI) is eax. + // For Generic JNI we already have a managed frame, so we reuse the art_jni_dlsym_lookup_stub. + testl LITERAL(1), %eax + jnz art_jni_dlsym_lookup_stub + + // We need to create a GenericJNI managed frame above the stack args. + + // GenericJNI frame is similar to SaveRegsAndArgs frame with the native method + // instead of runtime method saved at the bottom. Note that the runtime shall + // not examine the args here, otherwise we would have to reload them from stack + // to account for the difference between managed and native ABIs. + SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY + pushl %eax // Save the hidden arg as method pointer at the bottom of the stack. + CFI_ADJUST_CFA_OFFSET(4) + + // Call artCriticalNativeOutArgsSize(method); method is conveniently at the bottom of the stack. + call SYMBOL(artCriticalNativeOutArgsSize) + + // Check if we have any stack args other than return PC. + cmp LITERAL(__SIZEOF_POINTER__), %eax + jnz .Lcritical_has_stack_args + + // Without stack args, the frame is fully constructed. + // Place tagged managed sp in Thread::Current()->top_quick_frame. + leal 1(%esp), %eax // Tag as GenericJNI frame. + mov %eax, %fs:THREAD_TOP_QUICK_FRAME_OFFSET + + // Call artFindNativeMethodRunnable() + subl LITERAL(12), %esp // align stack + CFI_ADJUST_CFA_OFFSET(12) + pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current() + CFI_ADJUST_CFA_OFFSET(4) + call SYMBOL(artFindNativeMethodRunnable) // (Thread*) + addl LITERAL(16), %esp + CFI_ADJUST_CFA_OFFSET(-16) + + // Check for exception. + test %eax, %eax + jz 1f + + // Restore frame and do the tail call. + CFI_REMEMBER_STATE + RESTORE_SAVE_REFS_AND_ARGS_FRAME + jmp *%eax + CFI_RESTORE_STATE_AND_DEF_CFA(%esp, FRAME_SIZE_SAVE_REFS_AND_ARGS) + +1: + DELIVER_PENDING_EXCEPTION_FRAME_READY + +.Lcritical_has_stack_args: + // As mentioned above, the runtime shall not examine the args in the managed frame + // and since all args for the native call are on the stack, we can use the managed + // args registers as scratch registers. So, EBX, EDX and ECX are available and we + // do not need to restore xmm0-xmm3 either. + + // Restore registers as we're about to move stack args over the current SaveRefsAndArgs frame. + movl (%esp), %edx // Remember the method in EDX. + movl 48(%esp), %ebp + CFI_RESTORE(%ebp) + movl 52(%esp), %esi + CFI_RESTORE(%esi) + movl 56(%esp), %edi + CFI_RESTORE(%edi) + + // Calculate the address of the end of the move destination and redefine CFI to take + // ownership of the JNI stub frame. EBX is conveniently callee-save in native ABI. + leal 0(%esp, %eax, 1), %ebx + CFI_DEF_CFA(%ebx, FRAME_SIZE_SAVE_REFS_AND_ARGS) + + // Calculate the number of DWORDs to move. + shrl LITERAL(2), %eax + leal -1(%eax), %ecx // Do not move the return PC. + + // Load our return PC to EAX. + movl FRAME_SIZE_SAVE_REFS_AND_ARGS - __SIZEOF_POINTER__(%esp), %eax + + // Save EDI, ESI so that we can use them for moving stack args. + pushl %edi // No `CFI_ADJUST_CFA_OFFSET`, CFA register is currently EBX, not ESP. + pushl %esi // ditto + + // Mov the stack args. + leal 2 * __SIZEOF_POINTER__(%esp), %edi + leal FRAME_SIZE_SAVE_REFS_AND_ARGS(%edi), %esi + rep movsd + + // Save our return PC. + movl %eax, (%edi) + + // Restore EDI, ESI. + popl %esi // No `CFI_ADJUST_CFA_OFFSET`, CFA register is currently EBX, not ESP. + popl %edi // ditto + + // Re-create the SaveRefsAndArgs frame above the args. + movl %edi, 56(%ebx) + CFI_REL_OFFSET(%edi, 56) + movl %esi, 52(%ebx) + CFI_REL_OFFSET(%esi, 52) + movl %ebp, 48(%ebx) + CFI_REL_OFFSET(%ebp, 48) + // Skip managed ABI args EBX, EDX, ECX and FPRs, see above. + // (We have already clobbered EBX, EDX, ECX anyway). + movl %edx, (%ebx) // Save method pointer. + + // Place tagged managed sp in Thread::Current()->top_quick_frame. + leal 1(%ebx), %eax // Tag as GenericJNI frame. + movl %eax, %fs:THREAD_TOP_QUICK_FRAME_OFFSET + + // Call artFindNativeMethodRunnable() + subl LITERAL(12), %esp // align stack, no `CFI_ADJUST_CFA_OFFSET`. + pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current() + call SYMBOL(artFindNativeMethodRunnable) // (Thread*) + addl LITERAL(16), %esp // Pop args, no `CFI_ADJUST_CFA_OFFSET`. + + // Check for exception. + test %eax, %eax + jz 2f + + // Restore the frame. We shall not need the method anymore. + CFI_REMEMBER_STATE + movl 48(%ebx), %ebp + CFI_RESTORE(%ebp) + movl 52(%ebx), %esi + CFI_RESTORE(%esi) + movl 56(%ebx), %edi + CFI_RESTORE(%edi) + + // Remember our return PC in EDX. + movl -__SIZEOF_POINTER__(%ebx), %edx + + // Calculate the number of DWORDs to move. + leal -__SIZEOF_POINTER__(%ebx), %ecx // Do not move return PC. + subl %esp, %ecx + shrl LITERAL(2), %ecx + + // Save EDI, ESI so that we can use them for moving stack args. + pushl %edi // No `CFI_ADJUST_CFA_OFFSET`, CFA register is currently EBX, not ESP. + pushl %esi // ditto + + // Mov stack args to their original place. + leal -2 * __SIZEOF_POINTER__(%ebx), %esi + leal FRAME_SIZE_SAVE_REFS_AND_ARGS - 2 * __SIZEOF_POINTER__(%ebx), %edi + std + rep movsd + cld + + // Store our return PC. + movl %edx, (%edi) + + // Restore EDI, ESI. + popl %esi // No `CFI_ADJUST_CFA_OFFSET`, CFA register is currently EBX, not ESP. + popl %edi // ditto + + // Redefine CFI to release ownership of the JNI stub frame. + CFI_DEF_CFA(%esp, FRAME_SIZE_SAVE_REFS_AND_ARGS) + + // Remove the frame reservation. + addl LITERAL(FRAME_SIZE_SAVE_REFS_AND_ARGS - __SIZEOF_POINTER__), %esp + CFI_ADJUST_CFA_OFFSET(-FRAME_SIZE_SAVE_REFS_AND_ARGS - __SIZEOF_POINTER__) + + // Do the tail call. + jmp *%eax + CFI_RESTORE_STATE_AND_DEF_CFA(%ebx, FRAME_SIZE_SAVE_REFS_AND_ARGS) + +2: + // Replicate DELIVER_PENDING_EXCEPTION_FRAME_READY without CFI_ADJUST_CFA_OFFSET, + // CFA register is currently EBX, not ESP. + + // Outgoing argument set up + subl MACRO_LITERAL(12), %esp // alignment padding + pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current() + call SYMBOL(artDeliverPendingExceptionFromCode) // artDeliverPendingExceptionFromCode(Thread*) + UNREACHABLE +END_FUNCTION art_jni_dlsym_lookup_critical_stub diff --git a/runtime/arch/x86/jni_frame_x86.h b/runtime/arch/x86/jni_frame_x86.h new file mode 100644 index 0000000000..e710179e28 --- /dev/null +++ b/runtime/arch/x86/jni_frame_x86.h @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2020 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_RUNTIME_ARCH_X86_JNI_FRAME_X86_H_ +#define ART_RUNTIME_ARCH_X86_JNI_FRAME_X86_H_ + +#include + +#include "arch/instruction_set.h" +#include "base/bit_utils.h" +#include "base/globals.h" +#include "base/logging.h" + +namespace art { +namespace x86 { + +constexpr size_t kFramePointerSize = static_cast(PointerSize::k32); +static_assert(kX86PointerSize == PointerSize::k32, "Unexpected x86 pointer size"); + +static constexpr size_t kNativeStackAlignment = 16; // IA-32 cdecl requires 16 byte alignment. +static_assert(kNativeStackAlignment == kStackAlignment); + +// Get the size of "out args" for @CriticalNative method stub. +// This must match the size of the frame emitted by the JNI compiler at the native call site. +inline size_t GetCriticalNativeOutArgsSize(const char* shorty, uint32_t shorty_len) { + DCHECK_EQ(shorty_len, strlen(shorty)); + + size_t num_long_or_double_args = 0u; + for (size_t i = 1; i != shorty_len; ++i) { + if (shorty[i] == 'J' || shorty[i] == 'D') { + num_long_or_double_args += 1u; + } + } + size_t num_arg_words = shorty_len - 1u + num_long_or_double_args; + + // The size of outgoing arguments. + size_t size = num_arg_words * static_cast(kX86PointerSize); + + // Add return address size. + size += kFramePointerSize; + // We can make a tail call if there are no stack args and the return type is not + // FP type (needs moving from ST0 to MMX0) and we do not need to extend the result. + bool return_type_ok = shorty[0] == 'I' || shorty[0] == 'J' || shorty[0] == 'V'; + if (return_type_ok && size == kFramePointerSize) { + return kFramePointerSize; + } + + return RoundUp(size, kNativeStackAlignment); +} + +} // namespace x86 +} // namespace art + +#endif // ART_RUNTIME_ARCH_X86_JNI_FRAME_X86_H_ + diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index 7d2a7e6815..4abdf70f1d 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -120,20 +120,7 @@ END_MACRO * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs) */ MACRO2(SETUP_SAVE_REFS_AND_ARGS_FRAME, got_reg, temp_reg) - PUSH edi // Save callee saves - PUSH esi - PUSH ebp - PUSH ebx // Save args - PUSH edx - PUSH ecx - // Create space for FPR args. - subl MACRO_LITERAL(4 * 8), %esp - CFI_ADJUST_CFA_OFFSET(4 * 8) - // Save FPRs. - movsd %xmm0, 0(%esp) - movsd %xmm1, 8(%esp) - movsd %xmm2, 16(%esp) - movsd %xmm3, 24(%esp) + SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY SETUP_GOT_NOSAVE RAW_VAR(got_reg) // Load Runtime::instance_ from GOT. @@ -144,12 +131,6 @@ MACRO2(SETUP_SAVE_REFS_AND_ARGS_FRAME, got_reg, temp_reg) CFI_ADJUST_CFA_OFFSET(4) // Store esp as the stop quick frame. movl %esp, %fs:THREAD_TOP_QUICK_FRAME_OFFSET - - // Ugly compile-time check, but we only have the preprocessor. - // Last +4: implicit return address pushed on stack when caller made call. -#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 7*4 + 4*8 + 4) -#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(X86) size not as expected." -#endif END_MACRO /* @@ -157,47 +138,14 @@ END_MACRO * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs) where the method is passed in EAX. */ MACRO0(SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_EAX) - // Save callee and GPR args, mixed together to agree with core spills bitmap. - PUSH edi // Save callee saves - PUSH esi - PUSH ebp - PUSH ebx // Save args - PUSH edx - PUSH ecx - - // Create space for FPR args. - subl MACRO_LITERAL(32), %esp - CFI_ADJUST_CFA_OFFSET(32) + SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY - // Save FPRs. - movsd %xmm0, 0(%esp) - movsd %xmm1, 8(%esp) - movsd %xmm2, 16(%esp) - movsd %xmm3, 24(%esp) - - PUSH eax // Store the ArtMethod reference at the bottom of the stack. + pushl %eax // Store the ArtMethod reference at the bottom of the stack. + CFI_ADJUST_CFA_OFFSET(4) // Store esp as the stop quick frame. movl %esp, %fs:THREAD_TOP_QUICK_FRAME_OFFSET END_MACRO -MACRO0(RESTORE_SAVE_REFS_AND_ARGS_FRAME) - // Restore FPRs. EAX is still on the stack. - movsd 4(%esp), %xmm0 - movsd 12(%esp), %xmm1 - movsd 20(%esp), %xmm2 - movsd 28(%esp), %xmm3 - - addl MACRO_LITERAL(36), %esp // Remove FPRs and EAX. - CFI_ADJUST_CFA_OFFSET(-36) - - POP ecx // Restore args except eax - POP edx - POP ebx - POP ebp // Restore callee saves - POP esi - POP edi -END_MACRO - // Restore register and jump to routine // Inputs: EDI contains pointer to code. // Notes: Need to pop EAX too (restores Method*) @@ -329,20 +277,6 @@ MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_KEEP_EAX) RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_EAX END_MACRO - /* - * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending - * exception is Thread::Current()->exception_ when the runtime method frame is ready. - */ -MACRO0(DELIVER_PENDING_EXCEPTION_FRAME_READY) - // Outgoing argument set up - subl MACRO_LITERAL(12), %esp // alignment padding - CFI_ADJUST_CFA_OFFSET(12) - pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current() - CFI_ADJUST_CFA_OFFSET(4) - call SYMBOL(artDeliverPendingExceptionFromCode) // artDeliverPendingExceptionFromCode(Thread*) - UNREACHABLE -END_MACRO - /* * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending * exception is Thread::Current()->exception_. diff --git a/runtime/arch/x86_64/asm_support_x86_64.S b/runtime/arch/x86_64/asm_support_x86_64.S index 2b50cdb77d..6a60a98e51 100644 --- a/runtime/arch/x86_64/asm_support_x86_64.S +++ b/runtime/arch/x86_64/asm_support_x86_64.S @@ -156,6 +156,28 @@ MACRO1(POP, reg) CFI_RESTORE(REG_VAR(reg)) END_MACRO +// Arguments do not need .cfi_rel_offset as they are caller-saved and +// therefore cannot hold caller's variables or unwinding data. +MACRO1(PUSH_ARG, reg) + pushq REG_VAR(reg) + CFI_ADJUST_CFA_OFFSET(8) +END_MACRO + +MACRO1(POP_ARG, reg) + popq REG_VAR(reg) + CFI_ADJUST_CFA_OFFSET(-8) +END_MACRO + +MACRO3(SAVE_REG_BASE, base, reg, offset) + movq REG_VAR(reg), RAW_VAR(offset)(REG_VAR(base)) + CFI_REL_OFFSET(REG_VAR(reg), RAW_VAR(offset)) +END_MACRO + +MACRO3(RESTORE_REG_BASE, base, reg, offset) + movq RAW_VAR(offset)(REG_VAR(base)), REG_VAR(reg) + CFI_RESTORE(REG_VAR(reg)) +END_MACRO + MACRO1(UNIMPLEMENTED,name) FUNCTION_TYPE(SYMBOL(\name)) ASM_HIDDEN VAR(name) @@ -249,6 +271,77 @@ MACRO0(RESTORE_SAVE_REFS_ONLY_FRAME) POP r15 END_MACRO + /* + * Macro that sets up the callee save frame to conform with + * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs), except for storing the method. + */ +MACRO0(SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY) + // Save callee and GPR args, mixed together to agree with core spills bitmap. + PUSH r15 // Callee save. + PUSH r14 // Callee save. + PUSH r13 // Callee save. + PUSH r12 // Callee save. + PUSH_ARG r9 // Quick arg 5. + PUSH_ARG r8 // Quick arg 4. + PUSH_ARG rsi // Quick arg 1. + PUSH rbp // Callee save. + PUSH rbx // Callee save. + PUSH_ARG rdx // Quick arg 2. + PUSH_ARG rcx // Quick arg 3. + // Create space for FPR args and create 2 slots for ArtMethod*. + subq MACRO_LITERAL(16 + 12 * 8), %rsp + CFI_ADJUST_CFA_OFFSET(16 + 12 * 8) + // Save FPRs. + movq %xmm0, 16(%rsp) + movq %xmm1, 24(%rsp) + movq %xmm2, 32(%rsp) + movq %xmm3, 40(%rsp) + movq %xmm4, 48(%rsp) + movq %xmm5, 56(%rsp) + movq %xmm6, 64(%rsp) + movq %xmm7, 72(%rsp) + movq %xmm12, 80(%rsp) + movq %xmm13, 88(%rsp) + movq %xmm14, 96(%rsp) + movq %xmm15, 104(%rsp) + + // Ugly compile-time check, but we only have the preprocessor. + // Last +8: implicit return address pushed on stack when caller made call. +#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 11 * 8 + 12 * 8 + 16 + 8) +#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(X86_64) size not as expected." +#endif +END_MACRO + +MACRO0(RESTORE_SAVE_REFS_AND_ARGS_FRAME) + // Restore FPRs. + movq 16(%rsp), %xmm0 + movq 24(%rsp), %xmm1 + movq 32(%rsp), %xmm2 + movq 40(%rsp), %xmm3 + movq 48(%rsp), %xmm4 + movq 56(%rsp), %xmm5 + movq 64(%rsp), %xmm6 + movq 72(%rsp), %xmm7 + movq 80(%rsp), %xmm12 + movq 88(%rsp), %xmm13 + movq 96(%rsp), %xmm14 + movq 104(%rsp), %xmm15 + addq MACRO_LITERAL(80 + 4 * 8), %rsp + CFI_ADJUST_CFA_OFFSET(-(80 + 4 * 8)) + // Restore callee and GPR args, mixed together to agree with core spills bitmap. + POP_ARG rcx + POP_ARG rdx + POP rbx + POP rbp + POP_ARG rsi + POP_ARG r8 + POP_ARG r9 + POP r12 + POP r13 + POP r14 + POP r15 +END_MACRO + /* * Macro that sets up the callee save frame to conform with * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves) diff --git a/runtime/arch/x86_64/jni_entrypoints_x86_64.S b/runtime/arch/x86_64/jni_entrypoints_x86_64.S index 3860c37d14..e1b8e5294c 100644 --- a/runtime/arch/x86_64/jni_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/jni_entrypoints_x86_64.S @@ -20,16 +20,16 @@ * Jni dlsym lookup stub. */ DEFINE_FUNCTION art_jni_dlsym_lookup_stub - // Save callee and GPR args, mixed together to agree with core spills bitmap. - PUSH r9 // Arg. - PUSH r8 // Arg. - PUSH rdi // JniEnv. - PUSH rsi // Arg. - PUSH rdx // Arg. - PUSH rcx // Arg. + // Save callee and GPR args. + PUSH_ARG r9 // Arg. + PUSH_ARG r8 // Arg. + PUSH_ARG rdi // Arg. (JniEnv for normal and @FastNative) + PUSH_ARG rsi // Arg. + PUSH_ARG rdx // Arg. + PUSH_ARG rcx // Arg. // Create space for FPR args, plus padding for alignment - subq LITERAL(72 + 4 * 8), %rsp - CFI_ADJUST_CFA_OFFSET(72 + 4 * 8) + subq LITERAL(72), %rsp + CFI_ADJUST_CFA_OFFSET(72) // Save FPRs. movq %xmm0, 0(%rsp) movq %xmm1, 8(%rsp) @@ -39,17 +39,15 @@ DEFINE_FUNCTION art_jni_dlsym_lookup_stub movq %xmm5, 40(%rsp) movq %xmm6, 48(%rsp) movq %xmm7, 56(%rsp) - movq %xmm12, 64(%rsp) - movq %xmm13, 72(%rsp) - movq %xmm14, 80(%rsp) - movq %xmm15, 88(%rsp) // prepare call movq %gs:THREAD_SELF_OFFSET, %rdi // RDI := Thread::Current() - // Call artFindNativeMethod() for normal native and artFindNativeMethodRunnable for @FastNative. + // Call artFindNativeMethod() for normal native and artFindNativeMethodRunnable() + // for @FastNative or @CriticalNative. movq THREAD_TOP_QUICK_FRAME_OFFSET(%rdi), %rax // uintptr_t tagged_quick_frame andq LITERAL(0xfffffffffffffffe), %rax // ArtMethod** sp movq (%rax), %rax // ArtMethod* method - testl LITERAL(ACCESS_FLAGS_METHOD_IS_FAST_NATIVE), ART_METHOD_ACCESS_FLAGS_OFFSET(%rax) + testl LITERAL(ACCESS_FLAGS_METHOD_IS_FAST_NATIVE | ACCESS_FLAGS_METHOD_IS_CRITICAL_NATIVE), \ + ART_METHOD_ACCESS_FLAGS_OFFSET(%rax) jne .Llookup_stub_fast_native call SYMBOL(artFindNativeMethod) // (Thread*) jmp .Llookup_stub_continue @@ -65,21 +63,200 @@ DEFINE_FUNCTION art_jni_dlsym_lookup_stub movq 40(%rsp), %xmm5 movq 48(%rsp), %xmm6 movq 56(%rsp), %xmm7 - movq 64(%rsp), %xmm12 - movq 72(%rsp), %xmm13 - movq 80(%rsp), %xmm14 - movq 88(%rsp), %xmm15 - addq LITERAL(72 + 4 * 8), %rsp - CFI_ADJUST_CFA_OFFSET(-72 - 4 * 8) - POP rcx // Arg. - POP rdx // Arg. - POP rsi // Arg. - POP rdi // JniEnv. - POP r8 // Arg. - POP r9 // Arg. - testq %rax, %rax // check if returned method code is null + addq LITERAL(72), %rsp + CFI_ADJUST_CFA_OFFSET(-72) + POP_ARG rcx // Arg. + POP_ARG rdx // Arg. + POP_ARG rsi // Arg. + POP_ARG rdi // Arg. (JniEnv for normal and @FastNative) + POP_ARG r8 // Arg. + POP_ARG r9 // Arg. + testq %rax, %rax // check if returned method code is null jz .Lno_native_code_found // if null, jump to return to handle jmp *%rax // otherwise, tail call to intended method .Lno_native_code_found: ret END_FUNCTION art_jni_dlsym_lookup_stub + +DEFINE_FUNCTION art_jni_dlsym_lookup_critical_stub + // The hidden arg holding the tagged method (bit 0 set means GenericJNI) is r11. + // For Generic JNI we already have a managed frame, so we reuse the art_jni_dlsym_lookup_stub. + testq LITERAL(1), %r11 + jnz art_jni_dlsym_lookup_stub + + // We need to create a GenericJNI managed frame above the stack args. + + // GenericJNI frame is similar to SaveRegsAndArgs frame with the native method + // instead of runtime method saved at the bottom. + + // As we always have "stack args" on x86-64 (due to xmm12-xmm15 being callee-save + // in managed ABI but caller-save in native ABI), do not create a proper frame yet + // as we do on other architectures where it's useful for no stack args case. + + // Reserve space for the frame (return PC is on stack). + subq MACRO_LITERAL(FRAME_SIZE_SAVE_REFS_AND_ARGS - __SIZEOF_POINTER__), %rsp + CFI_ADJUST_CFA_OFFSET(FRAME_SIZE_SAVE_REFS_AND_ARGS - __SIZEOF_POINTER__) + + // Save GPR args. + PUSH_ARG r9 + PUSH_ARG r8 + PUSH_ARG rdi + PUSH_ARG rsi + PUSH_ARG rdx + PUSH_ARG rcx + // Create space for FPR args. + subq LITERAL(64), %rsp + CFI_ADJUST_CFA_OFFSET(64) + // Save FPRs. + movq %xmm0, 0(%rsp) + movq %xmm1, 8(%rsp) + movq %xmm2, 16(%rsp) + movq %xmm3, 24(%rsp) + movq %xmm4, 32(%rsp) + movq %xmm5, 40(%rsp) + movq %xmm6, 48(%rsp) + movq %xmm7, 56(%rsp) + + // Add alignment padding. + subq MACRO_LITERAL(__SIZEOF_POINTER__), %rsp + CFI_ADJUST_CFA_OFFSET(__SIZEOF_POINTER__) + // Save hidden arg. + PUSH_ARG r11 + + // Call artCriticalNativeOutArgsSize(method). + movq %r11, %rdi // Pass the method from hidden arg. + call SYMBOL(artCriticalNativeOutArgsSize) + + // Calculate the address of the end of the move destination and redefine CFI to take + // ownership of the JNI stub frame. + leaq 16 * __SIZEOF_POINTER__(%rsp, %rax, 1), %r10 // 16 QWORDs of registers saved above. + CFI_DEF_CFA(%r10, FRAME_SIZE_SAVE_REFS_AND_ARGS) + + // Calculate the number of QWORDs to move. + shrq LITERAL(3), %rax + leaq -1(%rax), %rcx // Do not move the return PC. + + // Load our return PC to EAX. + movq FRAME_SIZE_SAVE_REFS_AND_ARGS + (16 - 1) * __SIZEOF_POINTER__(%rsp), %rax + + // Mov the stack args. + leaq 16 * __SIZEOF_POINTER__(%rsp), %rdi + leaq FRAME_SIZE_SAVE_REFS_AND_ARGS(%rdi), %rsi + rep movsq + + // Save our return PC. + movq %rax, (%rdi) + + // Pop the hidden arg and alignment padding. + popq %r11 // No `.cfi_adjust_cfa_offset`, CFA register is currently R10, not RSP. + addq MACRO_LITERAL(__SIZEOF_POINTER__), %rsp // ditto + + // Fill the SaveRefsAndArgs frame above the args, without actual args. Note that + // the runtime shall not examine the args here, otherwise we would have to move them in + // registers and stack to account for the difference between managed and native ABIs. + SAVE_REG_BASE r10, r15, 192 + SAVE_REG_BASE r10, r14, 184 + SAVE_REG_BASE r10, r13, 176 + SAVE_REG_BASE r10, r12, 168 + // Skip args r9, r8, rsi. + SAVE_REG_BASE r10, rbp, 136 + SAVE_REG_BASE r10, rbx, 128 + // Skip args rdx, rcx. + // Skip args xmm0-xmm7. + // Copy managed callee-saves xmm12-xmm15 from out args to the managed frame as they + // may theoretically store variables or unwinding data. (The compiled stub preserves + // them but the artCriticalNativeOutArgsSize() call above may clobber them.) + movq -5 * __SIZEOF_POINTER__(%r10), %xmm12 + movq -4 * __SIZEOF_POINTER__(%r10), %xmm13 + movq -3 * __SIZEOF_POINTER__(%r10), %xmm14 + movq -2 * __SIZEOF_POINTER__(%r10), %xmm15 + movq %xmm12, 80(%r10) + movq %xmm13, 88(%r10) + movq %xmm14, 96(%r10) + movq %xmm15, 104(%r10) + // Save the hidden arg as method pointer at the bottom of the stack. + movq %r11, (%r10) + + // Move the frame register to a callee-save register. + movq %r10, %rbp + CFI_DEF_CFA_REGISTER(%rbp) + + // Place tagged managed sp in Thread::Current()->top_quick_frame. + leaq 1(%rbp), %rax // Tag as GenericJNI frame. + movq %rax, %gs:THREAD_TOP_QUICK_FRAME_OFFSET + + // Call artFindNativeMethodRunnable() + movq %gs:THREAD_SELF_OFFSET, %rdi // pass Thread::Current() + call SYMBOL(artFindNativeMethodRunnable) // (Thread*) + + // Check for exception. + test %rax, %rax + jz 2f + + // Restore the frame. We shall not need the method anymore. + .cfi_remember_state + movq %rbp, %r10 + CFI_DEF_CFA_REGISTER(%r10) + // Skip args xmm0-xmm7 and managed callee-saves xmm12-xmm15 (not needed for native call). + // Skip args rdx, rcx. + RESTORE_REG_BASE r10, rbx, 128 + RESTORE_REG_BASE r10, rbp, 136 + // Skip args r9, r8, rsi. + RESTORE_REG_BASE r10, r12, 168 + RESTORE_REG_BASE r10, r13, 176 + RESTORE_REG_BASE r10, r14, 184 + RESTORE_REG_BASE r10, r15, 192 + + // Remember our return PC in R11. + movq -__SIZEOF_POINTER__(%r10), %r11 + + // Calculate the number of DWORDs to move. + leaq -(1 + 14) * __SIZEOF_POINTER__(%r10), %rcx // Do not move return PC, 14 arg regs saved. + subq %rsp, %rcx + shrq LITERAL(3), %rcx + + // Mov stack args to their original place. + leaq -2 * __SIZEOF_POINTER__(%r10), %rsi + leaq FRAME_SIZE_SAVE_REFS_AND_ARGS - 2 * __SIZEOF_POINTER__(%r10), %rdi + std + rep movsq + cld + + // Store our return PC. + movq %r11, (%rdi) + + // Redefine CFI to release ownership of the JNI stub frame. + CFI_DEF_CFA(%rsp, FRAME_SIZE_SAVE_REFS_AND_ARGS + 14 * __SIZEOF_POINTER__) + + // Restore args. + movq 0(%rsp), %xmm0 + movq 8(%rsp), %xmm1 + movq 16(%rsp), %xmm2 + movq 24(%rsp), %xmm3 + movq 32(%rsp), %xmm4 + movq 40(%rsp), %xmm5 + movq 48(%rsp), %xmm6 + movq 56(%rsp), %xmm7 + addq LITERAL(64), %rsp + CFI_ADJUST_CFA_OFFSET(-64) + POP_ARG rcx + POP_ARG rdx + POP_ARG rsi + POP_ARG rdi + POP_ARG r8 + POP_ARG r9 + + // Remove the frame reservation. + addq LITERAL(FRAME_SIZE_SAVE_REFS_AND_ARGS - __SIZEOF_POINTER__), %rsp + CFI_ADJUST_CFA_OFFSET(-(FRAME_SIZE_SAVE_REFS_AND_ARGS - __SIZEOF_POINTER__)) + + // Do the tail call. + jmp *%rax + CFI_RESTORE_STATE_AND_DEF_CFA(%rbp, FRAME_SIZE_SAVE_REFS_AND_ARGS) + +2: + // Drop the args from the stack (the r11 and padding was already removed). + addq LITERAL(14 * __SIZEOF_POINTER__), %rsp + + DELIVER_PENDING_EXCEPTION_FRAME_READY +END_FUNCTION art_jni_dlsym_lookup_critical_stub diff --git a/runtime/arch/x86_64/jni_frame_x86_64.h b/runtime/arch/x86_64/jni_frame_x86_64.h new file mode 100644 index 0000000000..65736fe756 --- /dev/null +++ b/runtime/arch/x86_64/jni_frame_x86_64.h @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2020 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_RUNTIME_ARCH_X86_64_JNI_FRAME_X86_64_H_ +#define ART_RUNTIME_ARCH_X86_64_JNI_FRAME_X86_64_H_ + +#include + +#include "arch/instruction_set.h" +#include "base/bit_utils.h" +#include "base/globals.h" +#include "base/logging.h" + +namespace art { +namespace x86_64 { + +constexpr size_t kFramePointerSize = static_cast(PointerSize::k64); +static_assert(kX86_64PointerSize == PointerSize::k64, "Unexpected x86_64 pointer size"); + +static constexpr size_t kNativeStackAlignment = 16; +static_assert(kNativeStackAlignment == kStackAlignment); + +// We always have to spill registers xmm12-xmm15 which are callee-save +// in managed ABI but caller-save in native ABI. +constexpr size_t kMmxSpillSize = 8u; +constexpr size_t kAlwaysSpilledMmxRegisters = 4; + +// XMM0..XMM7 can be used to pass the first 8 floating args. The rest must go on the stack. +// -- Managed and JNI calling conventions. +constexpr size_t kMaxFloatOrDoubleRegisterArguments = 8u; +// Up to how many integer-like (pointers, objects, longs, int, short, bool, etc) args can be +// enregistered. The rest of the args must go on the stack. +// -- JNI calling convention only (Managed excludes RDI, so it's actually 5). +constexpr size_t kMaxIntLikeRegisterArguments = 6u; + +// Get the size of "out args" for @CriticalNative method stub. +// This must match the size of the frame emitted by the JNI compiler at the native call site. +inline size_t GetCriticalNativeOutArgsSize(const char* shorty, uint32_t shorty_len) { + DCHECK_EQ(shorty_len, strlen(shorty)); + + size_t num_fp_args = 0u; + for (size_t i = 1; i != shorty_len; ++i) { + if (shorty[i] == 'F' || shorty[i] == 'D') { + num_fp_args += 1u; + } + } + size_t num_non_fp_args = shorty_len - 1u - num_fp_args; + + // Account for FP arguments passed through Xmm0..Xmm7. + size_t num_stack_fp_args = + num_fp_args - std::min(kMaxFloatOrDoubleRegisterArguments, num_fp_args); + // Account for other (integer) arguments passed through GPR (RDI, RSI, RDX, RCX, R8, R9). + size_t num_stack_non_fp_args = + num_non_fp_args - std::min(kMaxIntLikeRegisterArguments, num_non_fp_args); + // The size of outgoing arguments. + static_assert(kFramePointerSize == kMmxSpillSize); + size_t size = (num_stack_fp_args + num_stack_non_fp_args) * kFramePointerSize; + + // We always need to spill xmm12-xmm15 as they are managed callee-saves + // but not native callee-saves. + size += kAlwaysSpilledMmxRegisters * kMmxSpillSize; + // Add return address size. + size += kFramePointerSize; + + return RoundUp(size, kNativeStackAlignment); +} + +} // namespace x86_64 +} // namespace art + +#endif // ART_RUNTIME_ARCH_X86_64_JNI_FRAME_X86_64_H_ + diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index c2f87b2479..abc3a8a9d4 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -38,117 +38,27 @@ MACRO0(SETUP_SAVE_REFS_AND_ARGS_FRAME) int3 int3 #else + SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY // R10 := Runtime::Current() movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10 movq (%r10), %r10 - // Save callee and GPR args, mixed together to agree with core spills bitmap. - PUSH r15 // Callee save. - PUSH r14 // Callee save. - PUSH r13 // Callee save. - PUSH r12 // Callee save. - PUSH r9 // Quick arg 5. - PUSH r8 // Quick arg 4. - PUSH rsi // Quick arg 1. - PUSH rbp // Callee save. - PUSH rbx // Callee save. - PUSH rdx // Quick arg 2. - PUSH rcx // Quick arg 3. - // Create space for FPR args and create 2 slots for ArtMethod*. - subq MACRO_LITERAL(16 + 12 * 8), %rsp - CFI_ADJUST_CFA_OFFSET(16 + 12 * 8) // R10 := ArtMethod* for ref and args callee save frame method. movq RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET(%r10), %r10 - // Save FPRs. - movq %xmm0, 16(%rsp) - movq %xmm1, 24(%rsp) - movq %xmm2, 32(%rsp) - movq %xmm3, 40(%rsp) - movq %xmm4, 48(%rsp) - movq %xmm5, 56(%rsp) - movq %xmm6, 64(%rsp) - movq %xmm7, 72(%rsp) - movq %xmm12, 80(%rsp) - movq %xmm13, 88(%rsp) - movq %xmm14, 96(%rsp) - movq %xmm15, 104(%rsp) // Store ArtMethod* to bottom of stack. movq %r10, 0(%rsp) // Store rsp as the top quick frame. movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET - - // Ugly compile-time check, but we only have the preprocessor. - // Last +8: implicit return address pushed on stack when caller made call. -#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 11 * 8 + 12 * 8 + 16 + 8) -#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(X86_64) size not as expected." -#endif #endif // __APPLE__ END_MACRO MACRO0(SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI) - // Save callee and GPR args, mixed together to agree with core spills bitmap. - PUSH r15 // Callee save. - PUSH r14 // Callee save. - PUSH r13 // Callee save. - PUSH r12 // Callee save. - PUSH r9 // Quick arg 5. - PUSH r8 // Quick arg 4. - PUSH rsi // Quick arg 1. - PUSH rbp // Callee save. - PUSH rbx // Callee save. - PUSH rdx // Quick arg 2. - PUSH rcx // Quick arg 3. - // Create space for FPR args and create 2 slots for ArtMethod*. - subq LITERAL(80 + 4 * 8), %rsp - CFI_ADJUST_CFA_OFFSET(80 + 4 * 8) - // Save FPRs. - movq %xmm0, 16(%rsp) - movq %xmm1, 24(%rsp) - movq %xmm2, 32(%rsp) - movq %xmm3, 40(%rsp) - movq %xmm4, 48(%rsp) - movq %xmm5, 56(%rsp) - movq %xmm6, 64(%rsp) - movq %xmm7, 72(%rsp) - movq %xmm12, 80(%rsp) - movq %xmm13, 88(%rsp) - movq %xmm14, 96(%rsp) - movq %xmm15, 104(%rsp) + SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY // Store ArtMethod to bottom of stack. movq %rdi, 0(%rsp) // Store rsp as the stop quick frame. movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET END_MACRO -MACRO0(RESTORE_SAVE_REFS_AND_ARGS_FRAME) - // Restore FPRs. - movq 16(%rsp), %xmm0 - movq 24(%rsp), %xmm1 - movq 32(%rsp), %xmm2 - movq 40(%rsp), %xmm3 - movq 48(%rsp), %xmm4 - movq 56(%rsp), %xmm5 - movq 64(%rsp), %xmm6 - movq 72(%rsp), %xmm7 - movq 80(%rsp), %xmm12 - movq 88(%rsp), %xmm13 - movq 96(%rsp), %xmm14 - movq 104(%rsp), %xmm15 - addq MACRO_LITERAL(80 + 4 * 8), %rsp - CFI_ADJUST_CFA_OFFSET(-(80 + 4 * 8)) - // Restore callee and GPR args, mixed together to agree with core spills bitmap. - POP rcx - POP rdx - POP rbx - POP rbp - POP rsi - POP r8 - POP r9 - POP r12 - POP r13 - POP r14 - POP r15 -END_MACRO - /* * Macro that sets up the callee save frame to conform with * Runtime::CreateCalleeSaveMethod(kSaveEverything) diff --git a/runtime/art_method.cc b/runtime/art_method.cc index a74d1ad712..d0b6fde98e 100644 --- a/runtime/art_method.cc +++ b/runtime/art_method.cc @@ -406,7 +406,8 @@ const void* ArtMethod::RegisterNative(const void* native_method) { void ArtMethod::UnregisterNative() { CHECK(IsNative()) << PrettyMethod(); // restore stub to lookup native pointer via dlsym - SetEntryPointFromJni(GetJniDlsymLookupStub()); + SetEntryPointFromJni( + IsCriticalNative() ? GetJniDlsymLookupCriticalStub() : GetJniDlsymLookupStub()); } bool ArtMethod::IsOverridableByDefaultMethod() { diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc index 4a4171e283..8f3e1cb45f 100644 --- a/runtime/class_linker.cc +++ b/runtime/class_linker.cc @@ -627,6 +627,7 @@ ClassLinker::ClassLinker(InternTable* intern_table, bool fast_class_not_found_ex intern_table_(intern_table), fast_class_not_found_exceptions_(fast_class_not_found_exceptions), jni_dlsym_lookup_trampoline_(nullptr), + jni_dlsym_lookup_critical_trampoline_(nullptr), quick_resolution_trampoline_(nullptr), quick_imt_conflict_trampoline_(nullptr), quick_generic_jni_trampoline_(nullptr), @@ -851,6 +852,7 @@ bool ClassLinker::InitWithoutImage(std::vector> b if (!runtime->IsAotCompiler()) { // We need to set up the generic trampolines since we don't have an image. jni_dlsym_lookup_trampoline_ = GetJniDlsymLookupStub(); + jni_dlsym_lookup_critical_trampoline_ = GetJniDlsymLookupCriticalStub(); quick_resolution_trampoline_ = GetQuickResolutionStub(); quick_imt_conflict_trampoline_ = GetQuickImtConflictStub(); quick_generic_jni_trampoline_ = GetQuickGenericJniStub(); @@ -1202,6 +1204,7 @@ bool ClassLinker::InitFromBootImage(std::string* error_msg) { DCHECK(!oat_files.empty()); const OatHeader& default_oat_header = oat_files[0]->GetOatHeader(); jni_dlsym_lookup_trampoline_ = default_oat_header.GetJniDlsymLookupTrampoline(); + jni_dlsym_lookup_critical_trampoline_ = default_oat_header.GetJniDlsymLookupCriticalTrampoline(); quick_resolution_trampoline_ = default_oat_header.GetQuickResolutionTrampoline(); quick_imt_conflict_trampoline_ = default_oat_header.GetQuickImtConflictTrampoline(); quick_generic_jni_trampoline_ = default_oat_header.GetQuickGenericJniTrampoline(); @@ -1212,6 +1215,8 @@ bool ClassLinker::InitFromBootImage(std::string* error_msg) { const OatHeader& ith_oat_header = oat_files[i]->GetOatHeader(); const void* ith_jni_dlsym_lookup_trampoline_ = ith_oat_header.GetJniDlsymLookupTrampoline(); + const void* ith_jni_dlsym_lookup_critical_trampoline_ = + ith_oat_header.GetJniDlsymLookupCriticalTrampoline(); const void* ith_quick_resolution_trampoline = ith_oat_header.GetQuickResolutionTrampoline(); const void* ith_quick_imt_conflict_trampoline = @@ -1221,6 +1226,7 @@ bool ClassLinker::InitFromBootImage(std::string* error_msg) { const void* ith_quick_to_interpreter_bridge_trampoline = ith_oat_header.GetQuickToInterpreterBridge(); if (ith_jni_dlsym_lookup_trampoline_ != jni_dlsym_lookup_trampoline_ || + ith_jni_dlsym_lookup_critical_trampoline_ != jni_dlsym_lookup_critical_trampoline_ || ith_quick_resolution_trampoline != quick_resolution_trampoline_ || ith_quick_imt_conflict_trampoline != quick_imt_conflict_trampoline_ || ith_quick_generic_jni_trampoline != quick_generic_jni_trampoline_ || @@ -9349,6 +9355,11 @@ bool ClassLinker::IsJniDlsymLookupStub(const void* entry_point) const { (jni_dlsym_lookup_trampoline_ == entry_point); } +bool ClassLinker::IsJniDlsymLookupCriticalStub(const void* entry_point) const { + return entry_point == GetJniDlsymLookupCriticalStub() || + (jni_dlsym_lookup_critical_trampoline_ == entry_point); +} + const void* ClassLinker::GetRuntimeQuickGenericJniStub() const { return GetQuickGenericJniStub(); } diff --git a/runtime/class_linker.h b/runtime/class_linker.h index 26621af509..f82a7c7bc4 100644 --- a/runtime/class_linker.h +++ b/runtime/class_linker.h @@ -594,6 +594,9 @@ class ClassLinker { // Is the given entry point the JNI dlsym lookup stub? bool IsJniDlsymLookupStub(const void* entry_point) const; + // Is the given entry point the JNI dlsym lookup critical stub? + bool IsJniDlsymLookupCriticalStub(const void* entry_point) const; + const void* GetQuickToInterpreterBridgeTrampoline() const { return quick_to_interpreter_bridge_trampoline_; } @@ -1426,6 +1429,7 @@ class ClassLinker { // Trampolines within the image the bounce to runtime entrypoints. Done so that there is a single // patch point within the image. TODO: make these proper relocations. const void* jni_dlsym_lookup_trampoline_; + const void* jni_dlsym_lookup_critical_trampoline_; const void* quick_resolution_trampoline_; const void* quick_imt_conflict_trampoline_; const void* quick_generic_jni_trampoline_; diff --git a/runtime/entrypoints/jni/jni_entrypoints.cc b/runtime/entrypoints/jni/jni_entrypoints.cc index d008e1a686..f1e577243b 100644 --- a/runtime/entrypoints/jni/jni_entrypoints.cc +++ b/runtime/entrypoints/jni/jni_entrypoints.cc @@ -16,6 +16,11 @@ #include +#include "arch/arm/jni_frame_arm.h" +#include "arch/arm64/jni_frame_arm64.h" +#include "arch/instruction_set.h" +#include "arch/x86/jni_frame_x86.h" +#include "arch/x86_64/jni_frame_x86_64.h" #include "art_method-inl.h" #include "entrypoints/entrypoint_utils.h" #include "jni/java_vm_ext.h" @@ -52,4 +57,24 @@ extern "C" const void* artFindNativeMethod(Thread* self) { return artFindNativeMethodRunnable(self); } +extern "C" size_t artCriticalNativeOutArgsSize(ArtMethod* method) + REQUIRES_SHARED(Locks::mutator_lock_) { + uint32_t shorty_len; + const char* shorty = method->GetShorty(&shorty_len); + switch (kRuntimeISA) { + case InstructionSet::kArm: + case InstructionSet::kThumb2: + return arm::GetCriticalNativeOutArgsSize(shorty, shorty_len); + case InstructionSet::kArm64: + return arm64::GetCriticalNativeOutArgsSize(shorty, shorty_len); + case InstructionSet::kX86: + return x86::GetCriticalNativeOutArgsSize(shorty, shorty_len); + case InstructionSet::kX86_64: + return x86_64::GetCriticalNativeOutArgsSize(shorty, shorty_len); + default: + UNIMPLEMENTED(FATAL) << kRuntimeISA; + UNREACHABLE(); + } +} + } // namespace art diff --git a/runtime/entrypoints/jni/jni_entrypoints.h b/runtime/entrypoints/jni/jni_entrypoints.h index 9c1b0dc62e..0aabed045d 100644 --- a/runtime/entrypoints/jni/jni_entrypoints.h +++ b/runtime/entrypoints/jni/jni_entrypoints.h @@ -29,8 +29,10 @@ namespace art { // Pointers to functions that are called by JNI trampolines via thread-local storage. struct PACKED(4) JniEntryPoints { - // Called when the JNI method isn't registered. + // Called when the JNI method isn't registered for normal native and @FastNative methods. void* (*pDlsymLookup)(JNIEnv* env, jobject); + // Called when the JNI method isn't registered for @CriticalNative methods. + void* (*pDlsymLookupCritical)(JNIEnv* env, jobject); }; } // namespace art diff --git a/runtime/entrypoints/quick/quick_default_init_entrypoints.h b/runtime/entrypoints/quick/quick_default_init_entrypoints.h index 048deb4803..a77bb85740 100644 --- a/runtime/entrypoints/quick/quick_default_init_entrypoints.h +++ b/runtime/entrypoints/quick/quick_default_init_entrypoints.h @@ -29,6 +29,7 @@ namespace art { static void DefaultInitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { // JNI jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub; + jpoints->pDlsymLookupCritical = art_jni_dlsym_lookup_critical_stub; // Alloc ResetQuickAllocEntryPoints(qpoints, /* is_marking= */ true); diff --git a/runtime/entrypoints/runtime_asm_entrypoints.h b/runtime/entrypoints/runtime_asm_entrypoints.h index a43358fe04..9f470345b7 100644 --- a/runtime/entrypoints/runtime_asm_entrypoints.h +++ b/runtime/entrypoints/runtime_asm_entrypoints.h @@ -35,6 +35,11 @@ static inline const void* GetJniDlsymLookupStub() { return reinterpret_cast(art_jni_dlsym_lookup_stub); } +extern "C" void* art_jni_dlsym_lookup_critical_stub(JNIEnv*, jobject); +static inline const void* GetJniDlsymLookupCriticalStub() { + return reinterpret_cast(art_jni_dlsym_lookup_critical_stub); +} + // Return the address of quick stub code for handling IMT conflicts. extern "C" void art_quick_imt_conflict_trampoline(ArtMethod*); static inline const void* GetQuickImtConflictStub() { diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc index d88584d727..52c4142712 100644 --- a/runtime/entrypoints_order_test.cc +++ b/runtime/entrypoints_order_test.cc @@ -147,8 +147,12 @@ class EntrypointsOrderTest : public CommonRuntimeTest { void CheckJniEntryPoints() { CHECKED(OFFSETOF_MEMBER(JniEntryPoints, pDlsymLookup) == 0, JniEntryPoints_start_with_dlsymlookup); - CHECKED(OFFSETOF_MEMBER(JniEntryPoints, pDlsymLookup) - + sizeof(void*) == sizeof(JniEntryPoints), JniEntryPoints_all); + CHECKED(OFFSETOF_MEMBER(JniEntryPoints, pDlsymLookup) + sizeof(void*) == + OFFSETOF_MEMBER(JniEntryPoints, pDlsymLookupCritical), + JniEntryPoints_dlsymlookup_critical); + CHECKED(OFFSETOF_MEMBER(JniEntryPoints, pDlsymLookupCritical) + sizeof(void*) == + sizeof(JniEntryPoints), + JniEntryPoints_all); } void CheckQuickEntryPoints() { diff --git a/runtime/oat.cc b/runtime/oat.cc index 7b13430765..17c797a780 100644 --- a/runtime/oat.cc +++ b/runtime/oat.cc @@ -73,6 +73,7 @@ OatHeader::OatHeader(InstructionSet instruction_set, oat_dex_files_offset_(0), executable_offset_(0), jni_dlsym_lookup_trampoline_offset_(0), + jni_dlsym_lookup_critical_trampoline_offset_(0), quick_generic_jni_trampoline_offset_(0), quick_imt_conflict_trampoline_offset_(0), quick_resolution_trampoline_offset_(0), @@ -217,6 +218,22 @@ void OatHeader::SetJniDlsymLookupTrampolineOffset(uint32_t offset) { jni_dlsym_lookup_trampoline_offset_ = offset; } +const void* OatHeader::GetJniDlsymLookupCriticalTrampoline() const { + return GetTrampoline(*this, GetJniDlsymLookupCriticalTrampolineOffset()); +} + +uint32_t OatHeader::GetJniDlsymLookupCriticalTrampolineOffset() const { + DCHECK(IsValid()); + return jni_dlsym_lookup_critical_trampoline_offset_; +} + +void OatHeader::SetJniDlsymLookupCriticalTrampolineOffset(uint32_t offset) { + DCHECK(IsValid()); + DCHECK_EQ(jni_dlsym_lookup_critical_trampoline_offset_, 0U) << offset; + + jni_dlsym_lookup_critical_trampoline_offset_ = offset; +} + const void* OatHeader::GetQuickGenericJniTrampoline() const { return GetTrampoline(*this, GetQuickGenericJniTrampolineOffset()); } diff --git a/runtime/oat.h b/runtime/oat.h index 206f8afa7f..8c81844728 100644 --- a/runtime/oat.h +++ b/runtime/oat.h @@ -32,8 +32,8 @@ class InstructionSetFeatures; class PACKED(4) OatHeader { public: static constexpr std::array kOatMagic { { 'o', 'a', 't', '\n' } }; - // Last oat version changed reason: invokeinterface on j.l.Object do a vtable call. - static constexpr std::array kOatVersion { { '1', '7', '9', '\0' } }; + // Last oat version changed reason: Allow late lookup for @CriticalNative. + static constexpr std::array kOatVersion { { '1', '8', '0', '\0' } }; static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline"; static constexpr const char* kDebuggableKey = "debuggable"; @@ -72,6 +72,9 @@ class PACKED(4) OatHeader { const void* GetJniDlsymLookupTrampoline() const; uint32_t GetJniDlsymLookupTrampolineOffset() const; void SetJniDlsymLookupTrampolineOffset(uint32_t offset); + const void* GetJniDlsymLookupCriticalTrampoline() const; + uint32_t GetJniDlsymLookupCriticalTrampolineOffset() const; + void SetJniDlsymLookupCriticalTrampolineOffset(uint32_t offset); const void* GetQuickGenericJniTrampoline() const; uint32_t GetQuickGenericJniTrampolineOffset() const; @@ -123,6 +126,7 @@ class PACKED(4) OatHeader { uint32_t oat_dex_files_offset_; uint32_t executable_offset_; uint32_t jni_dlsym_lookup_trampoline_offset_; + uint32_t jni_dlsym_lookup_critical_trampoline_offset_; uint32_t quick_generic_jni_trampoline_offset_; uint32_t quick_imt_conflict_trampoline_offset_; uint32_t quick_resolution_trampoline_offset_; diff --git a/runtime/stack.cc b/runtime/stack.cc index 8916618f1a..58a73cc4f2 100644 --- a/runtime/stack.cc +++ b/runtime/stack.cc @@ -805,10 +805,12 @@ QuickMethodFrameInfo StackVisitor::GetCurrentQuickFrameInfo() const { return RuntimeCalleeSaveFrame::GetMethodFrameInfo(CalleeSaveType::kSaveRefsAndArgs); } - // The only remaining case is if the method is native and uses the generic JNI stub, - // called either directly or through some (resolution, instrumentation) trampoline. + // The only remaining cases are for native methods that either + // - use the Generic JNI stub, called either directly or through some + // (resolution, instrumentation) trampoline; or + // - fake a Generic JNI frame in art_jni_dlsym_lookup_critical_stub. DCHECK(method->IsNative()); - if (kIsDebugBuild) { + if (kIsDebugBuild && !method->IsCriticalNative()) { ClassLinker* class_linker = runtime->GetClassLinker(); const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(method, kRuntimePointerSize); diff --git a/runtime/thread.cc b/runtime/thread.cc index 3add372fd6..77b9f4f24c 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -3371,6 +3371,7 @@ void Thread::DumpThreadOffset(std::ostream& os, uint32_t offset) { return; \ } JNI_ENTRY_POINT_INFO(pDlsymLookup) + JNI_ENTRY_POINT_INFO(pDlsymLookupCritical) #undef JNI_ENTRY_POINT_INFO #define QUICK_ENTRY_POINT_INFO(x) \ diff --git a/test/178-app-image-native-method/expected.txt b/test/178-app-image-native-method/expected.txt index 6327f97968..30cc3360d9 100644 --- a/test/178-app-image-native-method/expected.txt +++ b/test/178-app-image-native-method/expected.txt @@ -1,10 +1,14 @@ JNI_OnLoad called test testFast +testCritical testMissing testMissingFast +testMissingCritical JNI_OnLoad called test testFast +testCritical testMissing testMissingFast +testMissingCritical diff --git a/test/178-app-image-native-method/native_methods.cc b/test/178-app-image-native-method/native_methods.cc index 5c4fb3ee82..794a78a3c1 100644 --- a/test/178-app-image-native-method/native_methods.cc +++ b/test/178-app-image-native-method/native_methods.cc @@ -38,6 +38,10 @@ static inline bool VerifyManyParameters( (i8 == 81) && (l8 == 82) && (f8 == 83.0) && (d8 == 84.0); } +extern "C" JNIEXPORT jint JNICALL Java_Test_nativeMethodVoid(JNIEnv*, jclass) { + return 42; +} + extern "C" JNIEXPORT jint JNICALL Java_Test_nativeMethod(JNIEnv*, jclass, jint i) { return i; } @@ -64,6 +68,10 @@ extern "C" JNIEXPORT jint JNICALL Java_Test_nativeMethodWithManyParameters( return ok ? 42 : -1; } +extern "C" JNIEXPORT jint JNICALL Java_TestFast_nativeMethodVoid(JNIEnv*, jclass) { + return 42; +} + extern "C" JNIEXPORT jint JNICALL Java_TestFast_nativeMethod(JNIEnv*, jclass, jint i) { return i; } @@ -90,6 +98,10 @@ extern "C" JNIEXPORT jint JNICALL Java_TestFast_nativeMethodWithManyParameters( return ok ? 42 : -1; } +extern "C" JNIEXPORT jint JNICALL Java_TestCritical_nativeMethodVoid() { + return 42; +} + extern "C" JNIEXPORT jint JNICALL Java_TestCritical_nativeMethod(jint i) { return i; } diff --git a/test/178-app-image-native-method/run b/test/178-app-image-native-method/run index 3cb4d09d0d..f4b07f022d 100644 --- a/test/178-app-image-native-method/run +++ b/test/178-app-image-native-method/run @@ -22,4 +22,4 @@ return_status1=$? ${RUN} ${@} --profile -Xcompiler-option --compiler-filter=verify return_status2=$? -(exit ${return_status1}) # && (exit ${return_status2}) +(exit ${return_status1}) && (exit ${return_status2}) diff --git a/test/178-app-image-native-method/src/Main.java b/test/178-app-image-native-method/src/Main.java index bec774009b..07990cb498 100644 --- a/test/178-app-image-native-method/src/Main.java +++ b/test/178-app-image-native-method/src/Main.java @@ -31,17 +31,17 @@ public static void main(String[] args) throws Exception { new TestMissingCritical(); makeVisiblyInitialized(); // Make sure they are visibly initialized. - // FIXME: @FastNative and @CriticalNative fail a state check in artFindNativeMethod(). test(); testFast(); - // testCritical(); + testCritical(); testMissing(); testMissingFast(); - // testMissingCritical(); + testMissingCritical(); } static void test() { System.out.println("test"); + assertEquals(42, Test.nativeMethodVoid()); assertEquals(42, Test.nativeMethod(42)); assertEquals(42, Test.nativeMethodWithManyParameters( 11, 12L, 13.0f, 14.0d, @@ -56,6 +56,7 @@ static void test() { static void testFast() { System.out.println("testFast"); + assertEquals(42, TestFast.nativeMethodVoid()); assertEquals(42, TestFast.nativeMethod(42)); assertEquals(42, TestFast.nativeMethodWithManyParameters( 11, 12L, 13.0f, 14.0d, @@ -70,6 +71,7 @@ static void testFast() { static void testCritical() { System.out.println("testCritical"); + assertEquals(42, TestCritical.nativeMethodVoid()); assertEquals(42, TestCritical.nativeMethod(42)); assertEquals(42, TestCritical.nativeMethodWithManyParameters( 11, 12L, 13.0f, 14.0d, @@ -85,6 +87,11 @@ static void testCritical() { static void testMissing() { System.out.println("testMissing"); + try { + TestMissing.nativeMethodVoid(); + throw new Error("UNREACHABLE"); + } catch (LinkageError expected) {} + try { TestMissing.nativeMethod(42); throw new Error("UNREACHABLE"); @@ -107,6 +114,11 @@ static void testMissing() { static void testMissingFast() { System.out.println("testMissingFast"); + try { + TestMissingFast.nativeMethodVoid(); + throw new Error("UNREACHABLE"); + } catch (LinkageError expected) {} + try { TestMissingFast.nativeMethod(42); throw new Error("UNREACHABLE"); @@ -129,6 +141,11 @@ static void testMissingFast() { static void testMissingCritical() { System.out.println("testMissingCritical"); + try { + TestMissingCritical.nativeMethodVoid(); + throw new Error("UNREACHABLE"); + } catch (LinkageError expected) {} + try { TestMissingCritical.nativeMethod(42); throw new Error("UNREACHABLE"); @@ -158,6 +175,8 @@ static void assertEquals(int expected, int actual) { } class Test { + public static native int nativeMethodVoid(); + public static native int nativeMethod(int i); public static native int nativeMethodWithManyParameters( @@ -172,6 +191,9 @@ public static native int nativeMethodWithManyParameters( } class TestFast { + @FastNative + public static native int nativeMethodVoid(); + @FastNative public static native int nativeMethod(int i); @@ -188,6 +210,9 @@ public static native int nativeMethodWithManyParameters( } class TestCritical { + @CriticalNative + public static native int nativeMethodVoid(); + @CriticalNative public static native int nativeMethod(int i); @@ -204,6 +229,8 @@ public static native int nativeMethodWithManyParameters( } class TestMissing { + public static native int nativeMethodVoid(); + public static native int nativeMethod(int i); public static native int nativeMethodWithManyParameters( @@ -218,6 +245,9 @@ public static native int nativeMethodWithManyParameters( } class TestMissingFast { + @FastNative + public static native int nativeMethodVoid(); + @FastNative public static native int nativeMethod(int i); @@ -234,6 +264,9 @@ public static native int nativeMethodWithManyParameters( } class TestMissingCritical { + @CriticalNative + public static native int nativeMethodVoid(); + @CriticalNative public static native int nativeMethod(int i); diff --git a/test/MyClassNatives/MyClassNatives.java b/test/MyClassNatives/MyClassNatives.java index c601e3e9bc..7935eb3041 100644 --- a/test/MyClassNatives/MyClassNatives.java +++ b/test/MyClassNatives/MyClassNatives.java @@ -122,6 +122,8 @@ native void maxParamNumber(Object o0, Object o1, Object o2, Object o3, Object o4 native void withoutImplementation(); // Normal native native Object withoutImplementationRefReturn(); + // Normal native + native static void staticWithoutImplementation(); // Normal native native static void stackArgsIntsFirst(int i1, int i2, int i3, int i4, int i5, int i6, int i7, @@ -256,6 +258,8 @@ native void maxParamNumber_Fast(Object o0, Object o1, Object o2, Object o3, Obje native void withoutImplementation_Fast(); @FastNative native Object withoutImplementationRefReturn_Fast(); + @FastNative + native static void staticWithoutImplementation_Fast(); @FastNative native static void stackArgsIntsFirst_Fast(int i1, int i2, int i3, int i4, int i5, int i6, int i7, @@ -300,6 +304,9 @@ native static long getStackArgSignExtendedMips64_Fast(int i1, int i2, int i3, in @CriticalNative static native double fooSDD_Critical(double x, double y); + @CriticalNative + native static void staticWithoutImplementation_Critical(); + @CriticalNative native static void stackArgsIntsFirst_Critical(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10, float f1, float f2, float f3, float f4, float f5, float f6, diff --git a/tools/cpp-define-generator/globals.def b/tools/cpp-define-generator/globals.def index ca0c8ba363..6c9b2b055f 100644 --- a/tools/cpp-define-generator/globals.def +++ b/tools/cpp-define-generator/globals.def @@ -32,6 +32,8 @@ ASM_DEFINE(ACCESS_FLAGS_METHOD_IS_FAST_NATIVE, art::kAccFastNative) +ASM_DEFINE(ACCESS_FLAGS_METHOD_IS_CRITICAL_NATIVE, + art::kAccCriticalNative) ASM_DEFINE(ACCESS_FLAGS_CLASS_IS_FINALIZABLE, art::kAccClassIsFinalizable) ASM_DEFINE(ACCESS_FLAGS_CLASS_IS_FINALIZABLE_BIT,