From 1030297d77ae5110b7873530f645aeec3a4264ba Mon Sep 17 00:00:00 2001
From: Yuanfang Chen <yuanfang.chen@sony.com>
Date: Mon, 27 Jun 2022 11:33:45 -0700
Subject: [PATCH] [ubsan] Using metadata instead of prologue data for function
 sanitizer

Information in the function `Prologue Data` is intentionally opaque.
When a function with `Prologue Data` is duplicated. The self (global
value) references inside `Prologue Data` is still pointing to the
original function. This may cause errors like `fatal error: error in backend: Cannot represent a difference across sections`.

This patch detaches the information from function `Prologue Data`
and attaches it to a function metadata node.

This and D116130 fix https://github.com/llvm/llvm-project/issues/49689.

Reviewed By: pcc

Differential Revision: https://reviews.llvm.org/D115844

(cherry picked from commit 6678f8e505b19069a9dbdc3e3ee088d543752412)
---
 clang/lib/CodeGen/CodeGenFunction.cpp         | 36 ++++--------------
 clang/lib/CodeGen/CodeGenFunction.h           |  4 --
 clang/lib/CodeGen/CodeGenModule.cpp           | 16 ++++++++
 clang/lib/CodeGen/CodeGenModule.h             |  5 +++
 clang/lib/Driver/SanitizerArgs.cpp            | 13 +++++++
 clang/test/CodeGen/ubsan-function.cpp         |  5 ++-
 .../test/CodeGenCXX/catch-undef-behavior.cpp  | 37 +++++++++----------
 .../CodeGenCXX/ubsan-function-noexcept.cpp    |  6 ++-
 clang/test/Driver/fsanitize.c                 |  3 ++
 9 files changed, 69 insertions(+), 56 deletions(-)

diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 50e1638924d1..d7393526bb2c 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -560,29 +560,6 @@ bool CodeGenFunction::AlwaysEmitXRayTypedEvents() const {
               XRayInstrKind::Typed);
 }
 
-llvm::Constant *
-CodeGenFunction::EncodeAddrForUseInPrologue(llvm::Function *F,
-                                            llvm::Constant *Addr) {
-  // Addresses stored in prologue data can't require run-time fixups and must
-  // be PC-relative. Run-time fixups are undesirable because they necessitate
-  // writable text segments, which are unsafe. And absolute addresses are
-  // undesirable because they break PIE mode.
-
-  // Add a layer of indirection through a private global. Taking its address
-  // won't result in a run-time fixup, even if Addr has linkonce_odr linkage.
-  auto *GV = new llvm::GlobalVariable(CGM.getModule(), Addr->getType(),
-                                      /*isConstant=*/true,
-                                      llvm::GlobalValue::PrivateLinkage, Addr);
-
-  // Create a PC-relative address.
-  auto *GOTAsInt = llvm::ConstantExpr::getPtrToInt(GV, IntPtrTy);
-  auto *FuncAsInt = llvm::ConstantExpr::getPtrToInt(F, IntPtrTy);
-  auto *PCRelAsInt = llvm::ConstantExpr::getSub(GOTAsInt, FuncAsInt);
-  return (IntPtrTy == Int32Ty)
-             ? PCRelAsInt
-             : llvm::ConstantExpr::getTrunc(PCRelAsInt, Int32Ty);
-}
-
 llvm::Value *
 CodeGenFunction::DecodeAddrUsedInPrologue(llvm::Value *F,
                                           llvm::Value *EncodedAddr) {
@@ -926,12 +903,13 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
           FD->getType(), EST_None);
       llvm::Constant *FTRTTIConst =
           CGM.GetAddrOfRTTIDescriptor(ProtoTy, /*ForEH=*/true);
-      llvm::Constant *FTRTTIConstEncoded =
-          EncodeAddrForUseInPrologue(Fn, FTRTTIConst);
-      llvm::Constant *PrologueStructElems[] = {PrologueSig, FTRTTIConstEncoded};
-      llvm::Constant *PrologueStructConst =
-          llvm::ConstantStruct::getAnon(PrologueStructElems, /*Packed=*/true);
-      Fn->setPrologueData(PrologueStructConst);
+      llvm::GlobalVariable *FTRTTIProxy =
+          CGM.GetOrCreateRTTIProxyGlobalVariable(FTRTTIConst);
+      llvm::LLVMContext &Ctx = Fn->getContext();
+      llvm::MDBuilder MDB(Ctx);
+      Fn->setMetadata(llvm::LLVMContext::MD_func_sanitize,
+                      MDB.createRTTIPointerPrologue(PrologueSig, FTRTTIProxy));
+      CGM.addCompilerUsedGlobal(FTRTTIProxy);
     }
   }
 
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index df99cd9a1b79..046b249b1aac 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -2351,10 +2351,6 @@ public:
   /// XRay typed event handling calls.
   bool AlwaysEmitXRayTypedEvents() const;
 
-  /// Encode an address into a form suitable for use in a function prologue.
-  llvm::Constant *EncodeAddrForUseInPrologue(llvm::Function *F,
-                                             llvm::Constant *Addr);
-
   /// Decode an address used in a function prologue, encoded by \c
   /// EncodeAddrForUseInPrologue.
   llvm::Value *DecodeAddrUsedInPrologue(llvm::Value *F,
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 2777fc22600d..58eef1b0fdca 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -1826,6 +1826,22 @@ CodeGenModule::getMostBaseClasses(const CXXRecordDecl *RD) {
   return MostBases.takeVector();
 }
 
+llvm::GlobalVariable *
+CodeGenModule::GetOrCreateRTTIProxyGlobalVariable(llvm::Constant *Addr) {
+  auto It = RTTIProxyMap.find(Addr);
+  if (It != RTTIProxyMap.end())
+    return It->second;
+
+  auto *FTRTTIProxy = new llvm::GlobalVariable(
+      TheModule, Addr->getType(),
+      /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, Addr,
+      "__llvm_rtti_proxy");
+  FTRTTIProxy->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+
+  RTTIProxyMap[Addr] = FTRTTIProxy;
+  return FTRTTIProxy;
+}
+
 void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
                                                            llvm::Function *F) {
   llvm::AttrBuilder B(F->getContext());
diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index a8a63c8da57f..3a9d542eb2e0 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -561,6 +561,8 @@ private:
   MetadataTypeMap VirtualMetadataIdMap;
   MetadataTypeMap GeneralizedMetadataIdMap;
 
+  llvm::DenseMap<const llvm::Constant *, llvm::GlobalVariable *> RTTIProxyMap;
+
 public:
   CodeGenModule(ASTContext &C, const HeaderSearchOptions &headersearchopts,
                 const PreprocessorOptions &ppopts,
@@ -1411,6 +1413,9 @@ public:
   std::vector<const CXXRecordDecl *>
   getMostBaseClasses(const CXXRecordDecl *RD);
 
+  llvm::GlobalVariable *
+  GetOrCreateRTTIProxyGlobalVariable(llvm::Constant *Addr);
+
   /// Get the declaration of std::terminate for the platform.
   llvm::FunctionCallee getTerminateFn();
 
diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp
index 403fac76f060..96cef9eb80b8 100644
--- a/clang/lib/Driver/SanitizerArgs.cpp
+++ b/clang/lib/Driver/SanitizerArgs.cpp
@@ -367,6 +367,19 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
         Add &= ~NotAllowedWithMinimalRuntime;
       }
 
+      if (llvm::opt::Arg *A = Args.getLastArg(options::OPT_mcmodel_EQ)) {
+        StringRef CM = A->getValue();
+        if (CM != "small" &&
+            (Add & SanitizerKind::Function & ~DiagnosedKinds)) {
+          if (DiagnoseErrors)
+            D.Diag(diag::err_drv_argument_only_allowed_with)
+                << "-fsanitize=function"
+                << "-mcmodel=small";
+          Add &= ~SanitizerKind::Function;
+          DiagnosedKinds |= SanitizerKind::Function;
+        }
+      }
+
       // FIXME: Make CFI on member function calls compatible with cross-DSO CFI.
       // There are currently two problems:
       // - Virtual function call checks need to pass a pointer to the function
diff --git a/clang/test/CodeGen/ubsan-function.cpp b/clang/test/CodeGen/ubsan-function.cpp
index 2466d8a2645d..8a16dfdf5da1 100644
--- a/clang/test/CodeGen/ubsan-function.cpp
+++ b/clang/test/CodeGen/ubsan-function.cpp
@@ -1,6 +1,7 @@
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s -fsanitize=function -fno-sanitize-recover=all | FileCheck %s
 
-// CHECK-LABEL: define{{.*}} void @_Z3funv() #0 prologue <{ i32, i32 }> <{ i32 846595819, i32 trunc (i64 sub (i64 ptrtoint (i8** @0 to i64), i64 ptrtoint (void ()* @_Z3funv to i64)) to i32) }> {
+// CHECK: @[[PROXY:.*]] = private unnamed_addr constant i8* bitcast ({ i8*, i8* }* @_ZTIFvvE to i8*)
+// CHECK: define{{.*}} void @_Z3funv() #0 !func_sanitize ![[FUNCSAN:.*]] {
 void fun() {}
 
 // CHECK-LABEL: define{{.*}} void @_Z6callerPFvvE(void ()* noundef %f)
@@ -20,3 +21,5 @@ void fun() {}
 // CHECK: [[LABEL3]]:
 // CHECK: br label %[[LABEL4]], !nosanitize
 void caller(void (*f)()) { f(); }
+
+// CHECK: ![[FUNCSAN]] = !{i32 846595819, i8** @[[PROXY]]}
diff --git a/clang/test/CodeGenCXX/catch-undef-behavior.cpp b/clang/test/CodeGenCXX/catch-undef-behavior.cpp
index d6b094cb5b82..ade29797f5a0 100644
--- a/clang/test/CodeGenCXX/catch-undef-behavior.cpp
+++ b/clang/test/CodeGenCXX/catch-undef-behavior.cpp
@@ -1,8 +1,8 @@
-// RUN: %clang_cc1 -disable-noundef-analysis -std=c++11 -fsanitize=signed-integer-overflow,integer-divide-by-zero,float-divide-by-zero,shift-base,shift-exponent,unreachable,return,vla-bound,alignment,null,vptr,object-size,float-cast-overflow,bool,enum,array-bounds,function -fsanitize-recover=signed-integer-overflow,integer-divide-by-zero,float-divide-by-zero,shift-base,shift-exponent,vla-bound,alignment,null,vptr,object-size,float-cast-overflow,bool,enum,array-bounds,function -emit-llvm %s -o - -triple x86_64-linux-gnu | opt -instnamer -S | FileCheck %s
+// RUN: %clang_cc1 -disable-noundef-analysis -std=c++11 -fsanitize=signed-integer-overflow,integer-divide-by-zero,float-divide-by-zero,shift-base,shift-exponent,unreachable,return,vla-bound,alignment,null,vptr,object-size,float-cast-overflow,bool,enum,array-bounds,function -fsanitize-recover=signed-integer-overflow,integer-divide-by-zero,float-divide-by-zero,shift-base,shift-exponent,vla-bound,alignment,null,vptr,object-size,float-cast-overflow,bool,enum,array-bounds,function -emit-llvm %s -o - -triple x86_64-linux-gnu | opt -instnamer -S | FileCheck %s --check-prefixes=CHECK,CHECK-FUNCSAN
 // RUN: %clang_cc1 -disable-noundef-analysis -std=c++11 -fsanitize=vptr,address -fsanitize-recover=vptr,address -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefix=CHECK-ASAN
 // RUN: %clang_cc1 -disable-noundef-analysis -std=c++11 -fsanitize=vptr -fsanitize-recover=vptr -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefix=DOWNCAST-NULL
-// RUN: %clang_cc1 -disable-noundef-analysis -std=c++11 -fsanitize=function -emit-llvm %s -o - -triple x86_64-linux-gnux32 | FileCheck %s --check-prefix=CHECK-X32
-// RUN: %clang_cc1 -disable-noundef-analysis -std=c++11 -fsanitize=function -emit-llvm %s -o - -triple i386-linux-gnu | FileCheck %s --check-prefix=CHECK-X86
+// RUN: %clang_cc1 -disable-noundef-analysis -std=c++11 -fsanitize=function -emit-llvm %s -o - -triple x86_64-linux-gnux32 | FileCheck %s --check-prefix=CHECK-FUNCSAN
+// RUN: %clang_cc1 -disable-noundef-analysis -std=c++11 -fsanitize=function -emit-llvm %s -o - -triple i386-linux-gnu | FileCheck %s --check-prefix=CHECK-FUNCSAN
 
 struct S {
   double d;
@@ -16,9 +16,7 @@ struct S {
 // Check that type mismatch handler is not modified by ASan.
 // CHECK-ASAN: private unnamed_addr global { { [{{.*}} x i8]*, i32, i32 }, { i16, i16, [4 x i8] }*, i8*, i8 } { {{.*}}, { i16, i16, [4 x i8] }* [[TYPE_DESCR]], {{.*}} }
 
-// CHECK: [[IndirectRTTI_ZTIFvPFviEE:@.+]] = private constant i8* bitcast ({ i8*, i8* }* @_ZTIFvPFviEE to i8*)
-// CHECK-X86: [[IndirectRTTI_ZTIFvPFviEE:@.+]] = private constant i8* bitcast ({ i8*, i8* }* @_ZTIFvPFviEE to i8*)
-// CHECK-X32: [[IndirectRTTI_ZTIFvPFviEE:@.+]] = private constant i8* bitcast ({ i8*, i8* }* @_ZTIFvPFviEE to i8*)
+// CHECK-FUNCSAN: [[PROXY:@.+]] = private unnamed_addr constant i8* bitcast ({ i8*, i8* }* @_ZTIFvPFviEE to i8*)
 
 struct T : S {};
 
@@ -399,10 +397,7 @@ void downcast_reference(B &b) {
   // CHECK-NEXT: br i1 [[AND]]
 }
 
-//
-// CHECK-LABEL: @_Z22indirect_function_callPFviE({{.*}} prologue <{ i32, i32 }> <{ i32 846595819, i32 trunc (i64 sub (i64 ptrtoint (i8** {{.*}} to i64), i64 ptrtoint (void (void (i32)*)* @_Z22indirect_function_callPFviE to i64)) to i32) }>
-// CHECK-X32: @_Z22indirect_function_callPFviE({{.*}} prologue <{ i32, i32 }> <{ i32 846595819, i32 sub (i32 ptrtoint (i8** [[IndirectRTTI_ZTIFvPFviEE]] to i32), i32 ptrtoint (void (void (i32)*)* @_Z22indirect_function_callPFviE to i32)) }>
-// CHECK-X86: @_Z22indirect_function_callPFviE({{.*}} prologue <{ i32, i32 }> <{ i32 846595819, i32 sub (i32 ptrtoint (i8** [[IndirectRTTI_ZTIFvPFviEE]] to i32), i32 ptrtoint (void (void (i32)*)* @_Z22indirect_function_callPFviE to i32)) }>
+// CHECK-FUNCSAN: @_Z22indirect_function_callPFviE({{.*}} !func_sanitize ![[FUNCSAN:.*]] {
 void indirect_function_call(void (*p)(int)) {
   // CHECK: [[PTR:%.+]] = bitcast void (i32)* {{.*}} to <{ i32, i32 }>*
 
@@ -483,34 +478,34 @@ void force_irgen() {
 }
 
 // CHECK-LABEL: define{{.*}} void @_ZN29FunctionSanitizerVirtualCalls1B1fEv
-// CHECK-NOT: prologue
+// CHECK-NOT: !func_sanitize
 //
 // CHECK-LABEL: define{{.*}} void @_ZTv0_n24_N29FunctionSanitizerVirtualCalls1B1fEv
-// CHECK-NOT: prologue
+// CHECK-NOT: !func_sanitize
 //
 // CHECK-LABEL: define{{.*}} void @_ZN29FunctionSanitizerVirtualCalls11force_irgenEv()
-// CHECK: prologue
+// CHECK: !func_sanitize
 //
 // CHECK-LABEL: define linkonce_odr void @_ZN29FunctionSanitizerVirtualCalls1AC1Ev
-// CHECK-NOT: prologue
+// CHECK-NOT: !func_sanitize
 //
 // CHECK-LABEL: define linkonce_odr void @_ZN29FunctionSanitizerVirtualCalls1A1gEv
-// CHECK-NOT: prologue
+// CHECK-NOT: !func_sanitize
 //
 // CHECK-LABEL: define linkonce_odr void @_ZN29FunctionSanitizerVirtualCalls1A1hEv
-// CHECK-NOT: prologue
+// CHECK-NOT: !func_sanitize
 //
 // CHECK-LABEL: define linkonce_odr void @_ZN29FunctionSanitizerVirtualCalls1BC1Ev
-// CHECK-NOT: prologue
+// CHECK-NOT: !func_sanitize
 //
 // CHECK-LABEL: define linkonce_odr void @_ZN29FunctionSanitizerVirtualCalls1B1bEv
-// CHECK-NOT: prologue
+// CHECK-NOT: !func_sanitize
 //
 // CHECK-LABEL: define linkonce_odr void @_ZN29FunctionSanitizerVirtualCalls1B1gEv
-// CHECK-NOT: prologue
+// CHECK-NOT: !func_sanitize
 //
 // CHECK-LABEL: define linkonce_odr void @_ZN29FunctionSanitizerVirtualCalls1B1qEv
-// CHECK: prologue
+// CHECK: !func_sanitize
 
 }
 
@@ -754,3 +749,5 @@ void ThisAlign::this_align_lambda_2() {
 }
 
 // CHECK: attributes [[NR_NUW]] = { noreturn nounwind }
+
+// CHECK-FUNCSAN: ![[FUNCSAN]] = !{i32 846595819, i8** [[PROXY]]}
diff --git a/clang/test/CodeGenCXX/ubsan-function-noexcept.cpp b/clang/test/CodeGenCXX/ubsan-function-noexcept.cpp
index 3c0c0e8be91f..9d5eb1edefe5 100644
--- a/clang/test/CodeGenCXX/ubsan-function-noexcept.cpp
+++ b/clang/test/CodeGenCXX/ubsan-function-noexcept.cpp
@@ -2,8 +2,8 @@
 
 // Check that typeinfo recorded in function prolog doesn't have "Do" noexcept
 // qualifier in its mangled name.
-// CHECK: @[[RTTI:[0-9]+]] = private constant i8* bitcast ({ i8*, i8* }* @_ZTIFvvE to i8*)
-// CHECK: define{{.*}} void @_Z1fv() #{{.*}} prologue <{ i32, i32 }> <{ i32 {{.*}}, i32 trunc (i64 sub (i64 ptrtoint (i8** @[[RTTI]] to i64), i64 ptrtoint (void ()* @_Z1fv to i64)) to i32) }>
+// CHECK: [[PROXY:@.*]] = private unnamed_addr constant i8* bitcast ({ i8*, i8* }* @_ZTIFvvE to i8*)
+// CHECK: define{{.*}} void @_Z1fv() #{{.*}} !func_sanitize ![[FUNCSAN:.*]] {
 void f() noexcept {}
 
 // CHECK: define{{.*}} void @_Z1gPDoFvvE
@@ -13,3 +13,5 @@ void g(void (*p)() noexcept) {
   // CHECK: icmp eq i8* %{{.*}}, bitcast ({ i8*, i8* }* @_ZTIFvvE to i8*), !nosanitize
   p();
 }
+
+// CHECK: ![[FUNCSAN]] = !{i32 846595819, i8** [[PROXY]]}
diff --git a/clang/test/Driver/fsanitize.c b/clang/test/Driver/fsanitize.c
index 17fce1981eea..624dc98478ec 100644
--- a/clang/test/Driver/fsanitize.c
+++ b/clang/test/Driver/fsanitize.c
@@ -915,3 +915,6 @@
 
 // RUN: %clang -fsanitize=undefined,float-divide-by-zero %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-DIVBYZERO-UBSAN
 // CHECK-DIVBYZERO-UBSAN: "-fsanitize={{.*}},float-divide-by-zero,{{.*}}"
+
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=undefined,function -mcmodel=large %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-FUNCTION-CODE-MODEL
+// CHECK-UBSAN-FUNCTION-CODE-MODEL: error: invalid argument '-fsanitize=function' only allowed with '-mcmodel=small'